Restore code and save recent updates

This commit is contained in:
2026-01-25 03:31:01 +00:00
parent bae861c71f
commit 5ce0b4d278
54 changed files with 2963 additions and 2899 deletions

View File

@@ -1,62 +1,62 @@
from .connection import get_mongo_client
from datetime import datetime
from .connection import get_mongo_client
from datetime import datetime
DB_NAME = "hoya_metadata"
DB_NAME ="hoya_metadata"
def get_datasets_collection():
client = get_mongo_client()
db = client.get_database(DB_NAME)
return db["datasets"]
def get_datasets_collection ():
client =get_mongo_client ()
db =client .get_database (DB_NAME )
return db ["datasets"]
def get_categories_collection():
client = get_mongo_client()
db = client.get_database(DB_NAME)
return db["categories"]
def get_categories_collection ():
client =get_mongo_client ()
db =client .get_database (DB_NAME )
return db ["categories"]
def is_file_processed(filename):
collection = get_datasets_collection()
return collection.find_one({"filename": filename}) is not None
def is_file_processed (filename ):
collection =get_datasets_collection ()
return collection .find_one ({"filename":filename })is not None
def log_processed_file(filename, category=None, chunk_count=0):
collection = get_datasets_collection()
doc = {
"filename": filename,
"category": category,
"chunk_count": chunk_count,
"processed_at": datetime.utcnow(),
"status": "processed"
def log_processed_file (filename ,category =None ,chunk_count =0 ):
collection =get_datasets_collection ()
doc ={
"filename":filename ,
"category":category ,
"chunk_count":chunk_count ,
"processed_at":datetime .utcnow (),
"status":"processed"
}
collection.insert_one(doc)
collection .insert_one (doc )
def get_all_datasets():
collection = get_datasets_collection()
return list(collection.find({}, {"_id": 0}))
def get_all_datasets ():
collection =get_datasets_collection ()
return list (collection .find ({},{"_id":0 }))
def get_datasets_by_category(category):
collection = get_datasets_collection()
return list(collection.find({"category": category}, {"_id": 0}))
def get_datasets_by_category (category ):
collection =get_datasets_collection ()
return list (collection .find ({"category":category },{"_id":0 }))
def delete_dataset_record(filename):
collection = get_datasets_collection()
result = collection.delete_one({"filename": filename})
return result.deleted_count > 0
def delete_dataset_record (filename ):
collection =get_datasets_collection ()
result =collection .delete_one ({"filename":filename })
return result .deleted_count >0
def create_category(name, description=""):
collection = get_categories_collection()
if collection.find_one({"name": name}):
return False
collection.insert_one({
"name": name,
"description": description,
"created_at": datetime.utcnow()
def create_category (name ,description =""):
collection =get_categories_collection ()
if collection .find_one ({"name":name }):
return False
collection .insert_one ({
"name":name ,
"description":description ,
"created_at":datetime .utcnow ()
})
return True
return True
def get_all_categories():
collection = get_categories_collection()
return list(collection.find({}, {"_id": 0}))
def get_all_categories ():
collection =get_categories_collection ()
return list (collection .find ({},{"_id":0 }))
def delete_category(name):
collection = get_categories_collection()
result = collection.delete_one({"name": name})
return result.deleted_count > 0
def delete_category (name ):
collection =get_categories_collection ()
result =collection .delete_one ({"name":name })
return result .deleted_count >0