Code Update

2026-02-03 19:24:34 -05:00 · 2026-01-24 04:01:05 +00:00
parent c87279ed52
commit aad7e6e08d
13 changed files with 18 additions and 469 deletions
--- a/backend/src/init.py
+++ b/backend/src/init.py
@@ -5,9 +5,8 @@ from .routes.rag import rag_bp

 def create_app():
    app = Flask(__name__)
-    CORS(app) # Enable CORS for all routes
+    CORS(app)
    
-    # Register Blueprints
    app.register_blueprint(main_bp)
    app.register_blueprint(rag_bp, url_prefix='/api/rag')
    
--- a/backend/src/gemini/client.py
+++ b/backend/src/gemini/client.py
@@ -2,10 +2,6 @@ from google import genai
 import os

 def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
-    """
-    Generates content using the Google GenAI SDK.
-    Defaults to gemini-2.0-flash-exp as per request (or similar).
-    """
    api_key = os.environ.get("GOOGLE_API_KEY")
    if not api_key:
        return "Error: GOOGLE_API_KEY not found."
--- a/backend/src/rag/embeddings.py
+++ b/backend/src/rag/embeddings.py
@@ -2,9 +2,6 @@ from google import genai
 import os

 def get_embedding(text, model="gemini-embedding-001"):
-    """
-    Generates an embedding for the given text using the Gemini API.
-    """
    api_key = os.environ.get("GOOGLE_API_KEY")
    if not api_key:
        raise ValueError("GOOGLE_API_KEY environment variable not set")
@@ -17,9 +14,6 @@ def get_embedding(text, model="gemini-embedding-001"):
    return result.embeddings[0].values

 def get_embeddings_batch(texts, model="gemini-embedding-001"):
-    """
-    Generates embeddings for a list of texts.
-    """
    api_key = os.environ.get("GOOGLE_API_KEY")
    if not api_key:
        raise ValueError("GOOGLE_API_KEY environment variable not set")
@@ -29,5 +23,4 @@ def get_embeddings_batch(texts, model="gemini-embedding-001"):
        model=model,
        contents=texts
    )
-    # The SDK returns a list of embedding objects
    return [emb.values for emb in result.embeddings]
--- a/backend/src/rag/ingest.py
+++ b/backend/src/rag/ingest.py
@@ -4,18 +4,10 @@ import io
 import os

 def load_csv(file_path):
-    """
-    Loads a CSV file and returns a list of strings (one per row).
-    This is a simplistic implementation - in production you might want specific columns.
-    """
    df = pd.read_csv(file_path)
-    # Convert each row to a string representation
    return df.apply(lambda x: ' | '.join(x.astype(str)), axis=1).tolist()

 def load_pdf(file_path):
-    """
-    Loads a PDF file and returns a list of strings (one per page).
-    """
    reader = PdfReader(file_path)
    text_chunks = []
    for page in reader.pages:
@@ -25,9 +17,6 @@ def load_pdf(file_path):
    return text_chunks

 def process_file(file_path):
-    """
-    Determines file type and returns text chunks.
-    """
    ext = os.path.splitext(file_path)[1].lower()
    if ext == '.csv':
        return load_csv(file_path)
--- a/backend/src/rag/store.py
+++ b/backend/src/rag/store.py
@@ -9,14 +9,10 @@ def get_mongo_client():
    return MongoClient(uri)

 def ingest_documents(text_chunks, collection_name="rag_documents"):
-    """
-    Generates embeddings for text chunks and stores them in MongoDB.
-    """
    client = get_mongo_client()
-    db = client.get_database("vectors_db") # Default DB name
+    db = client.get_database("vectors_db")
    collection = db[collection_name]
    
-    # Generate embeddings in batches (handling API limits might be needed for large sets)
    embeddings = get_embeddings_batch(text_chunks)
    
    documents = []
@@ -32,10 +28,6 @@ def ingest_documents(text_chunks, collection_name="rag_documents"):
    return 0

 def vector_search(query_text, collection_name="rag_documents", num_results=5):
-    """
-    Performs a vector search in MongoDB.
-    """
-    # 1. Get embedding for the query
    from .embeddings import get_embedding
    query_embedding = get_embedding(query_text)
    
@@ -43,7 +35,6 @@ def vector_search(query_text, collection_name="rag_documents", num_results=5):
    db = client.get_database("vectors_db")
    collection = db[collection_name]
    
-    # Note: You must have a vector search index defined in MongoDB Atlas for this to work.
    pipeline = [
        {
            "$vectorSearch": {