from .connection import get_mongo_client from datetime import datetime DB_NAME ="hoya_metadata" def get_datasets_collection (): client =get_mongo_client () db =client .get_database (DB_NAME ) return db ["datasets"] def get_categories_collection (): client =get_mongo_client () db =client .get_database (DB_NAME ) return db ["categories"] def is_file_processed (filename ): collection =get_datasets_collection () return collection .find_one ({"filename":filename })is not None def log_processed_file (filename ,category =None ,chunk_count =0 ): collection =get_datasets_collection () doc ={ "filename":filename , "category":category , "chunk_count":chunk_count , "processed_at":datetime .utcnow (), "status":"processed" } collection .insert_one (doc ) def get_all_datasets (): collection =get_datasets_collection () return list (collection .find ({},{"_id":0 })) def get_datasets_by_category (category ): collection =get_datasets_collection () return list (collection .find ({"category":category },{"_id":0 })) def delete_dataset_record (filename ): collection =get_datasets_collection () result =collection .delete_one ({"filename":filename }) return result .deleted_count >0 def create_category (name ,description =""): collection =get_categories_collection () if collection .find_one ({"name":name }): return False collection .insert_one ({ "name":name , "description":description , "created_at":datetime .utcnow () }) return True def get_all_categories (): collection =get_categories_collection () return list (collection .find ({},{"_id":0 })) def delete_category (name ): collection =get_categories_collection () result =collection .delete_one ({"name":name }) return result .deleted_count >0