Files
Hoya26/backend/src/rag/store.py
2026-01-24 04:01:05 +00:00

59 lines
1.6 KiB
Python

import os
from pymongo import MongoClient
from .embeddings import get_embeddings_batch
def get_mongo_client():
uri = os.environ.get("MONGO_URI")
if not uri:
raise ValueError("MONGO_URI environment variable not set")
return MongoClient(uri)
def ingest_documents(text_chunks, collection_name="rag_documents"):
client = get_mongo_client()
db = client.get_database("vectors_db")
collection = db[collection_name]
embeddings = get_embeddings_batch(text_chunks)
documents = []
for text, embedding in zip(text_chunks, embeddings):
documents.append({
"text": text,
"embedding": embedding
})
if documents:
collection.insert_many(documents)
return len(documents)
return 0
def vector_search(query_text, collection_name="rag_documents", num_results=5):
from .embeddings import get_embedding
query_embedding = get_embedding(query_text)
client = get_mongo_client()
db = client.get_database("vectors_db")
collection = db[collection_name]
pipeline = [
{
"$vectorSearch": {
"index": "vector_index",
"path": "embedding",
"queryVector": query_embedding,
"numCandidates": num_results * 10,
"limit": num_results
}
},
{
"$project": {
"_id": 0,
"text": 1,
"score": { "$meta": "vectorSearchScore" }
}
}
]
results = list(collection.aggregate(pipeline))
return results