import dask.dataframe as dd from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateBatch, ImageFileCreateEntry, Region from msrest.authentication import ApiKeyCredentials import os, time, uuid ENDPOINT = "https://trashvision.cognitiveservices.azure.com/" training_key = "611e786a785648e38f346f18e7f7e7ed" prediction_key = "611e786a785648e38f346f18e7f7e7ed" project_id = "a67f7d7b-c980-49bd-b57d-0bd1367b29d0" credentials = ApiKeyCredentials(in_headers={"Training-key": training_key}) trainer = CustomVisionTrainingClient(ENDPOINT, credentials) prediction_credentials = ApiKeyCredentials(in_headers={"Prediction-key": prediction_key}) predictor = CustomVisionPredictionClient(ENDPOINT, prediction_credentials) df = dd.read_parquet("hf://datasets/edwinpalegre/trashnet_enhanced/data/train-*.parquet") # ## iterate over the the first 5 rows of the dataframe and decode the image bytes to an image and save it to a file tags = trainer.get_tags(project_id) biodegradable_tag = None cardboard_tag = None glass_tag = None metal_tag = None paper_tag = None plastic_tag = None for tag in tags: if tag.name == "biodegradable": biodegradable_tag = tag elif tag.name == "cardboard": cardboard_tag = tag elif tag.name == "glass": glass_tag = tag elif tag.name == "metal": metal_tag = tag elif tag.name == "paper": paper_tag = tag elif tag.name == "plastic": plastic_tag = tag print(biodegradable_tag) print(cardboard_tag) print(glass_tag) print(metal_tag) print(paper_tag) print(plastic_tag) # get all images from in the current dir and upload them to the custom vision project # base_image_location = os.path.join (os.path.dirname(__file__), "images") # tagged_images_with_regions = [] # for image in os.listdir(base_image_location): # print(image) # with open(os.path.join(base_image_location, image), "rb") as image_contents: # trainer.create_images_from_data(project_id, image_contents.read(), [biodegradable_tag.id]) # print("Uploaded image: ", image) # time.sleep(5) skip = 10031 count = 0 for index, row in df.iterrows(): if count < skip: count += 1 continue else: count += 1 image = row["image"]["bytes"] label = row["label"] if label == 0: trainer.create_images_from_data(project_id, image, [biodegradable_tag.id]) elif label == 1: trainer.create_images_from_data(project_id, image, [cardboard_tag.id]) elif label == 2: trainer.create_images_from_data(project_id, image, [glass_tag.id]) elif label == 3: trainer.create_images_from_data(project_id, image, [metal_tag.id]) elif label == 4: trainer.create_images_from_data(project_id, image, [paper_tag.id]) elif label == 5: trainer.create_images_from_data(project_id, image, [plastic_tag.id]) print(f"C: {count}, I: {index}, L: {label}, Uploaded image") time.sleep(1) print("Done uploading images")