97 lines
3.3 KiB
Python
Executable File
97 lines
3.3 KiB
Python
Executable File
import dask.dataframe as dd
|
|
|
|
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
|
|
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
|
|
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateBatch, ImageFileCreateEntry, Region
|
|
from msrest.authentication import ApiKeyCredentials
|
|
import os, time, uuid
|
|
|
|
ENDPOINT = "https://trashvision.cognitiveservices.azure.com/"
|
|
training_key = "611e786a785648e38f346f18e7f7e7ed"
|
|
prediction_key = "611e786a785648e38f346f18e7f7e7ed"
|
|
project_id = "a67f7d7b-c980-49bd-b57d-0bd1367b29d0"
|
|
|
|
credentials = ApiKeyCredentials(in_headers={"Training-key": training_key})
|
|
trainer = CustomVisionTrainingClient(ENDPOINT, credentials)
|
|
prediction_credentials = ApiKeyCredentials(in_headers={"Prediction-key": prediction_key})
|
|
predictor = CustomVisionPredictionClient(ENDPOINT, prediction_credentials)
|
|
|
|
df = dd.read_parquet("hf://datasets/edwinpalegre/trashnet_enhanced/data/train-*.parquet")
|
|
|
|
# ## iterate over the the first 5 rows of the dataframe and decode the image bytes to an image and save it to a file
|
|
|
|
tags = trainer.get_tags(project_id)
|
|
|
|
biodegradable_tag = None
|
|
cardboard_tag = None
|
|
glass_tag = None
|
|
metal_tag = None
|
|
paper_tag = None
|
|
plastic_tag = None
|
|
|
|
for tag in tags:
|
|
if tag.name == "biodegradable":
|
|
biodegradable_tag = tag
|
|
elif tag.name == "cardboard":
|
|
cardboard_tag = tag
|
|
elif tag.name == "glass":
|
|
glass_tag = tag
|
|
elif tag.name == "metal":
|
|
metal_tag = tag
|
|
elif tag.name == "paper":
|
|
paper_tag = tag
|
|
elif tag.name == "plastic":
|
|
plastic_tag = tag
|
|
|
|
print(biodegradable_tag)
|
|
print(cardboard_tag)
|
|
print(glass_tag)
|
|
print(metal_tag)
|
|
print(paper_tag)
|
|
print(plastic_tag)
|
|
|
|
# get all images from in the current dir and upload them to the custom vision project
|
|
|
|
# base_image_location = os.path.join (os.path.dirname(__file__), "images")
|
|
|
|
# tagged_images_with_regions = []
|
|
|
|
# for image in os.listdir(base_image_location):
|
|
# print(image)
|
|
# with open(os.path.join(base_image_location, image), "rb") as image_contents:
|
|
# trainer.create_images_from_data(project_id, image_contents.read(), [biodegradable_tag.id])
|
|
# print("Uploaded image: ", image)
|
|
# time.sleep(5)
|
|
|
|
skip = 10031
|
|
count = 0
|
|
|
|
for index, row in df.iterrows():
|
|
|
|
if count < skip:
|
|
count += 1
|
|
continue
|
|
else:
|
|
count += 1
|
|
|
|
image = row["image"]["bytes"]
|
|
label = row["label"]
|
|
|
|
if label == 0:
|
|
trainer.create_images_from_data(project_id, image, [biodegradable_tag.id])
|
|
elif label == 1:
|
|
trainer.create_images_from_data(project_id, image, [cardboard_tag.id])
|
|
elif label == 2:
|
|
trainer.create_images_from_data(project_id, image, [glass_tag.id])
|
|
elif label == 3:
|
|
trainer.create_images_from_data(project_id, image, [metal_tag.id])
|
|
elif label == 4:
|
|
trainer.create_images_from_data(project_id, image, [paper_tag.id])
|
|
elif label == 5:
|
|
trainer.create_images_from_data(project_id, image, [plastic_tag.id])
|
|
|
|
print(f"C: {count}, I: {index}, L: {label}, Uploaded image")
|
|
time.sleep(1)
|
|
|
|
print("Done uploading images")
|