This commit is contained in:
KasaNick
2026-01-24 04:26:02 -05:00
parent 4298368b63
commit 9ac637cb41
9 changed files with 581 additions and 23 deletions

View File

@@ -17,9 +17,7 @@ def get_collection(collection_name=COLLECTION_NAME):
def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
collection = get_collection(collection_name)
ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)]
if metadata_list:
collection.add(
ids=ids,
@@ -33,22 +31,17 @@ def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadat
embeddings=embeddings,
documents=texts
)
return len(texts)
def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None):
collection = get_collection(collection_name)
query_params = {
"query_embeddings": [query_embedding],
"n_results": num_results
}
if filter_metadata:
query_params["where"] = filter_metadata
results = collection.query(**query_params)
output = []
if results and results["documents"]:
for i, doc in enumerate(results["documents"][0]):
@@ -57,7 +50,6 @@ def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_resul
"text": doc,
"score": score
})
return output
def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME):