Files
2025-10-24 02:07:59 -04:00

97 lines
3.3 KiB
Python
Executable File

import dask.dataframe as dd
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateBatch, ImageFileCreateEntry, Region
from msrest.authentication import ApiKeyCredentials
import os, time, uuid
ENDPOINT = "https://trashvision.cognitiveservices.azure.com/"
training_key = "611e786a785648e38f346f18e7f7e7ed"
prediction_key = "611e786a785648e38f346f18e7f7e7ed"
project_id = "a67f7d7b-c980-49bd-b57d-0bd1367b29d0"
credentials = ApiKeyCredentials(in_headers={"Training-key": training_key})
trainer = CustomVisionTrainingClient(ENDPOINT, credentials)
prediction_credentials = ApiKeyCredentials(in_headers={"Prediction-key": prediction_key})
predictor = CustomVisionPredictionClient(ENDPOINT, prediction_credentials)
df = dd.read_parquet("hf://datasets/edwinpalegre/trashnet_enhanced/data/train-*.parquet")
# ## iterate over the the first 5 rows of the dataframe and decode the image bytes to an image and save it to a file
tags = trainer.get_tags(project_id)
biodegradable_tag = None
cardboard_tag = None
glass_tag = None
metal_tag = None
paper_tag = None
plastic_tag = None
for tag in tags:
if tag.name == "biodegradable":
biodegradable_tag = tag
elif tag.name == "cardboard":
cardboard_tag = tag
elif tag.name == "glass":
glass_tag = tag
elif tag.name == "metal":
metal_tag = tag
elif tag.name == "paper":
paper_tag = tag
elif tag.name == "plastic":
plastic_tag = tag
print(biodegradable_tag)
print(cardboard_tag)
print(glass_tag)
print(metal_tag)
print(paper_tag)
print(plastic_tag)
# get all images from in the current dir and upload them to the custom vision project
# base_image_location = os.path.join (os.path.dirname(__file__), "images")
# tagged_images_with_regions = []
# for image in os.listdir(base_image_location):
# print(image)
# with open(os.path.join(base_image_location, image), "rb") as image_contents:
# trainer.create_images_from_data(project_id, image_contents.read(), [biodegradable_tag.id])
# print("Uploaded image: ", image)
# time.sleep(5)
skip = 10031
count = 0
for index, row in df.iterrows():
if count < skip:
count += 1
continue
else:
count += 1
image = row["image"]["bytes"]
label = row["label"]
if label == 0:
trainer.create_images_from_data(project_id, image, [biodegradable_tag.id])
elif label == 1:
trainer.create_images_from_data(project_id, image, [cardboard_tag.id])
elif label == 2:
trainer.create_images_from_data(project_id, image, [glass_tag.id])
elif label == 3:
trainer.create_images_from_data(project_id, image, [metal_tag.id])
elif label == 4:
trainer.create_images_from_data(project_id, image, [paper_tag.id])
elif label == 5:
trainer.create_images_from_data(project_id, image, [plastic_tag.id])
print(f"C: {count}, I: {index}, L: {label}, Uploaded image")
time.sleep(1)
print("Done uploading images")