40 lines
1.2 KiB
Python
Executable File
40 lines
1.2 KiB
Python
Executable File
import dask.dataframe as dd
|
|
import os
|
|
|
|
df = dd.read_parquet("hf://datasets/edwinpalegre/trashnet_enhanced/data/train-*.parquet")
|
|
|
|
count = 0
|
|
|
|
for index, row in df.iterrows():
|
|
label = row["label"]
|
|
image = row["image"]["bytes"]
|
|
|
|
if label == 0:
|
|
with open(os.path.join("images", "biodegradable", f"biodegradable_{count}.jpg"), "wb") as f:
|
|
f.write(image)
|
|
elif label == 1:
|
|
with open(os.path.join("images", "cardboard", f"cardboard_{count}.jpg"), "wb") as f:
|
|
f.write(image)
|
|
elif label == 2:
|
|
with open(os.path.join("images", "glass", f"glass_{count}.jpg"), "wb") as f:
|
|
f.write(image)
|
|
elif label == 3:
|
|
with open(os.path.join("images", "metal", f"metal_{count}.jpg"), "wb") as f:
|
|
f.write(image)
|
|
elif label == 4:
|
|
with open(os.path.join("images", "paper", f"paper_{count}.jpg"), "wb") as f:
|
|
f.write(image)
|
|
elif label == 5:
|
|
with open(os.path.join("images", "plastic", f"plastic_{count}.jpg"), "wb") as f:
|
|
f.write(image)
|
|
else:
|
|
print("Label not found")
|
|
break
|
|
|
|
print(f"Saved image {count}")
|
|
count += 1
|
|
|
|
print("Done!")
|
|
|
|
|