Populate DB Chromadb

This commit is contained in:
2026-01-24 07:52:48 +00:00
parent d145f7e94c
commit 4298368b63
10 changed files with 279 additions and 48 deletions

View File

@@ -0,0 +1,62 @@
from .connection import get_mongo_client
from datetime import datetime
DB_NAME = "hoya_metadata"
def get_datasets_collection():
client = get_mongo_client()
db = client.get_database(DB_NAME)
return db["datasets"]
def get_categories_collection():
client = get_mongo_client()
db = client.get_database(DB_NAME)
return db["categories"]
def is_file_processed(filename):
collection = get_datasets_collection()
return collection.find_one({"filename": filename}) is not None
def log_processed_file(filename, category=None, chunk_count=0):
collection = get_datasets_collection()
doc = {
"filename": filename,
"category": category,
"chunk_count": chunk_count,
"processed_at": datetime.utcnow(),
"status": "processed"
}
collection.insert_one(doc)
def get_all_datasets():
collection = get_datasets_collection()
return list(collection.find({}, {"_id": 0}))
def get_datasets_by_category(category):
collection = get_datasets_collection()
return list(collection.find({"category": category}, {"_id": 0}))
def delete_dataset_record(filename):
collection = get_datasets_collection()
result = collection.delete_one({"filename": filename})
return result.deleted_count > 0
def create_category(name, description=""):
collection = get_categories_collection()
if collection.find_one({"name": name}):
return False
collection.insert_one({
"name": name,
"description": description,
"created_at": datetime.utcnow()
})
return True
def get_all_categories():
collection = get_categories_collection()
return list(collection.find({}, {"_id": 0}))
def delete_category(name):
collection = get_categories_collection()
result = collection.delete_one({"name": name})
return result.deleted_count > 0