Hoya26/backend/src/rag/store.py

import os
from pymongo import MongoClient
from .embeddings import get_embeddings_batch

def get_mongo_client():
    uri = os.environ.get("MONGO_URI")
    if not uri:
        raise ValueError("MONGO_URI environment variable not set")
    return MongoClient(uri)

def ingest_documents(text_chunks, collection_name="rag_documents"):
    client = get_mongo_client()
    db = client.get_database("vectors_db")
    collection = db[collection_name]

    embeddings = get_embeddings_batch(text_chunks)

    documents = []
    for text, embedding in zip(text_chunks, embeddings):
        documents.append({
            "text": text,
            "embedding": embedding
        })

    if documents:
        collection.insert_many(documents)
        return len(documents)
    return 0

def vector_search(query_text, collection_name="rag_documents", num_results=5):
    from .embeddings import get_embedding
    query_embedding = get_embedding(query_text)

    client = get_mongo_client()
    db = client.get_database("vectors_db")
    collection = db[collection_name]

    pipeline = [
        {
            "$vectorSearch": {
                "index": "vector_index",
                "path": "embedding",
                "queryVector": query_embedding,
                "numCandidates": num_results * 10,
                "limit": num_results
            }
        },
        {
            "$project": {
                "_id": 0,
                "text": 1,
                "score": { "$meta": "vectorSearchScore" }
            }
        }
    ]

    results = list(collection.aggregate(pipeline))
    return results