Files
Patriot_Hacks-24/AI Training/ml.py
2025-10-24 02:07:59 -04:00

40 lines
1.2 KiB
Python
Executable File

import dask.dataframe as dd
import os
df = dd.read_parquet("hf://datasets/edwinpalegre/trashnet_enhanced/data/train-*.parquet")
count = 0
for index, row in df.iterrows():
label = row["label"]
image = row["image"]["bytes"]
if label == 0:
with open(os.path.join("images", "biodegradable", f"biodegradable_{count}.jpg"), "wb") as f:
f.write(image)
elif label == 1:
with open(os.path.join("images", "cardboard", f"cardboard_{count}.jpg"), "wb") as f:
f.write(image)
elif label == 2:
with open(os.path.join("images", "glass", f"glass_{count}.jpg"), "wb") as f:
f.write(image)
elif label == 3:
with open(os.path.join("images", "metal", f"metal_{count}.jpg"), "wb") as f:
f.write(image)
elif label == 4:
with open(os.path.join("images", "paper", f"paper_{count}.jpg"), "wb") as f:
f.write(image)
elif label == 5:
with open(os.path.join("images", "plastic", f"plastic_{count}.jpg"), "wb") as f:
f.write(image)
else:
print("Label not found")
break
print(f"Saved image {count}")
count += 1
print("Done!")