import os from pymongo import MongoClient from .embeddings import get_embeddings_batch def get_mongo_client(): uri = os.environ.get("MONGO_URI") if not uri: raise ValueError("MONGO_URI environment variable not set") return MongoClient(uri) def ingest_documents(text_chunks, collection_name="rag_documents"): client = get_mongo_client() db = client.get_database("vectors_db") collection = db[collection_name] embeddings = get_embeddings_batch(text_chunks) documents = [] for text, embedding in zip(text_chunks, embeddings): documents.append({ "text": text, "embedding": embedding }) if documents: collection.insert_many(documents) return len(documents) return 0 def vector_search(query_text, collection_name="rag_documents", num_results=5): from .embeddings import get_embedding query_embedding = get_embedding(query_text) client = get_mongo_client() db = client.get_database("vectors_db") collection = db[collection_name] pipeline = [ { "$vectorSearch": { "index": "vector_index", "path": "embedding", "queryVector": query_embedding, "numCandidates": num_results * 10, "limit": num_results } }, { "$project": { "_id": 0, "text": 1, "score": { "$meta": "vectorSearchScore" } } } ] results = list(collection.aggregate(pipeline)) return results