import dask.dataframe as dd import os df = dd.read_parquet("hf://datasets/edwinpalegre/trashnet_enhanced/data/train-*.parquet") count = 0 for index, row in df.iterrows(): label = row["label"] image = row["image"]["bytes"] if label == 0: with open(os.path.join("images", "biodegradable", f"biodegradable_{count}.jpg"), "wb") as f: f.write(image) elif label == 1: with open(os.path.join("images", "cardboard", f"cardboard_{count}.jpg"), "wb") as f: f.write(image) elif label == 2: with open(os.path.join("images", "glass", f"glass_{count}.jpg"), "wb") as f: f.write(image) elif label == 3: with open(os.path.join("images", "metal", f"metal_{count}.jpg"), "wb") as f: f.write(image) elif label == 4: with open(os.path.join("images", "paper", f"paper_{count}.jpg"), "wb") as f: f.write(image) elif label == 5: with open(os.path.join("images", "plastic", f"plastic_{count}.jpg"), "wb") as f: f.write(image) else: print("Label not found") break print(f"Saved image {count}") count += 1 print("Done!")