From c46ec76027ccdfa608585974024d2e188bafebca Mon Sep 17 00:00:00 2001
From: KasaNick <nickkasa7@gmail.com>
Date: Sat, 24 Jan 2026 06:33:29 -0500
Subject: [PATCH] gemeni endpoints

---
 backend/app.py                     | 44 +--------------------
 backend/scripts/populate_db.py     |  4 +-
 backend/src/__init__.py            |  2 +
 backend/src/chroma/vector_store.py |  8 ++++
 backend/src/gemini/__init__.py     | 57 +++++++++++++++++++++++++++
 backend/src/gemini/client.py       |  1 +
 backend/src/routes/gemini.py       | 62 ++++++++++++++++++++++++++++++
 7 files changed, 134 insertions(+), 44 deletions(-)
 create mode 100644 backend/src/routes/gemini.py

diff --git a/backend/app.py b/backend/app.py
index 30abf10..6524bdb 100644
--- a/backend/app.py
+++ b/backend/app.py
@@ -1,47 +1,7 @@
 import os
-from flask import Flask, request, jsonify
-from flask_cors import CORS
+from src import create_app
 
-from src.rag.gemeni import GeminiClient
-from src.mongo import get_database
-
-app = Flask(__name__)
-CORS(app)  
-
-try:
-    brain = GeminiClient()
-    db = get_database()
-    print("--- Backend Components Initialized Successfully ---")
-except Exception as e:
-    print(f"CRITICAL ERROR during initialization: {e}")
-
-@app.route('/')
-def health_check():
-    return {
-        "status": "online",
-        "message": "The Waiter is ready at the counter!"
-    }
-
-@app.route('/chat', methods=['POST'])
-def chat():
-    data = request.json
-    user_query = data.get("message")
-
-    if not user_query:
-        return jsonify({"error": "You didn't say anything!"}), 400
-
-    try:
-        context = ""
-        ai_reply = brain.ask(user_query, context)
-        return jsonify({
-            "status": "success",
-            "reply": ai_reply
-        })
-    except Exception as e:
-        return jsonify({
-            "status": "error",
-            "message": str(e)
-        }), 500
+app = create_app()
 
 if __name__ == "__main__":
     app.run(debug=True, port=5000)
\ No newline at end of file
diff --git a/backend/scripts/populate_db.py b/backend/scripts/populate_db.py
index 1408974..3865dcd 100644
--- a/backend/scripts/populate_db.py
+++ b/backend/scripts/populate_db.py
@@ -50,8 +50,8 @@ def populate_from_dataset(dataset_dir, category=None):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Populate vector database from dataset files")
-    parser.add_argument("--category", "-c", type=str)
-    parser.add_argument("--dir", "-d", type=str, default=None)
+    parser.add_argument("--category", "-c", type=str, help="Category to assign to ingested documents")
+    parser.add_argument("--dir", "-d", type=str, default=None, help="Dataset directory path")
     args = parser.parse_args()
     
     if args.dir:
diff --git a/backend/src/__init__.py b/backend/src/__init__.py
index b8a9680..63adc7f 100644
--- a/backend/src/__init__.py
+++ b/backend/src/__init__.py
@@ -2,6 +2,7 @@ from flask import Flask
 from flask_cors import CORS
 from .routes.main import main_bp
 from .routes.rag import rag_bp
+from .routes.gemini import gemini_bp
 
 def create_app():
     app = Flask(__name__)
@@ -9,5 +10,6 @@ def create_app():
     
     app.register_blueprint(main_bp)
     app.register_blueprint(rag_bp, url_prefix='/api/rag')
+    app.register_blueprint(gemini_bp, url_prefix='/api/gemini')
     
     return app
diff --git a/backend/src/chroma/vector_store.py b/backend/src/chroma/vector_store.py
index 799f584..bcb259a 100644
--- a/backend/src/chroma/vector_store.py
+++ b/backend/src/chroma/vector_store.py
@@ -17,7 +17,9 @@ def get_collection(collection_name=COLLECTION_NAME):
 
 def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
     collection = get_collection(collection_name)
+    
     ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)]
+    
     if metadata_list:
         collection.add(
             ids=ids,
@@ -31,17 +33,22 @@ def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadat
             embeddings=embeddings,
             documents=texts
         )
+    
     return len(texts)
 
 def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None):
     collection = get_collection(collection_name)
+    
     query_params = {
         "query_embeddings": [query_embedding],
         "n_results": num_results
     }
+    
     if filter_metadata:
         query_params["where"] = filter_metadata
+    
     results = collection.query(**query_params)
+    
     output = []
     if results and results["documents"]:
         for i, doc in enumerate(results["documents"][0]):
@@ -50,6 +57,7 @@ def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_resul
                 "text": doc,
                 "score": score
             })
+    
     return output
 
 def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME):
diff --git a/backend/src/gemini/__init__.py b/backend/src/gemini/__init__.py
index e69de29..aa52ae2 100644
--- a/backend/src/gemini/__init__.py
+++ b/backend/src/gemini/__init__.py
@@ -0,0 +1,57 @@
+from src.rag.embeddings import get_embedding
+from src.chroma.vector_store import search_documents
+from .client import generate_content
+
+GREENWASHING_ANALYSIS_PROMPT = """
+You are an expert Environmental, Social, and Governance (ESG) Analyst specialized in detecting 'Greenwashing'.
+Your task is to analyze the provided context from a company's data reports and determine if they are engaging in greenwashing.
+
+Greenwashing is defined as making misleading or unsubstantiated claims about the environmental benefits of a product, service, or company practice.
+
+Please evaluate the following:
+1. Vague Claims: Are they using broad terms like 'eco-friendly' without specific details?
+2. Lack of Proof: Are claims backed by data, third-party certifications, or specific metrics?
+3. Hidden Trade-offs: Do they highlight one green act while ignoring a much larger environmental harm?
+4. Symbolic Actions: Are they focusing on minor changes while their core business remains highly polluting?
+
+Based on the context provided, give a final verdict:
+- VERDICT: [Clear/Suspect/High Risk of Greenwashing]
+- REASONING: [Explain your findings clearly]
+- EVIDENCE: [Quote specific parts of the context if possible]
+- BETTER ALTERNATIVES: [Suggest 2-3 similar companies or products that have verified sustainability records or higher transparency in this specific area]
+"""
+
+def analyze_greenwashing(query, category=None, num_results=10):
+    try:
+        query_embedding = get_embedding(query)
+        
+        filter_metadata = None
+        if category:
+            filter_metadata = {"category": category}
+            
+        search_results = search_documents(
+            query_embedding, 
+            num_results=num_results, 
+            filter_metadata=filter_metadata
+        )
+        
+        context = ""
+        if search_results:
+            context = "--- START OF REPORT CONTEXT ---\n"
+            for res in search_results:
+                context += f"RELEVANT DATA CHUNK: {res['text']}\n\n"
+            context += "--- END OF REPORT CONTEXT ---\n"
+        
+        if context:
+            full_prompt = f"{GREENWASHING_ANALYSIS_PROMPT}\n\n{context}\n\nUSER QUERY/COMPANY FOCUS: {query}"
+        else:
+            return "No data found in the report to analyze. Please upload a report first."
+            
+        response = generate_content(full_prompt)
+        return response
+        
+    except Exception as e:
+        return f"Error in Analysis process: {str(e)}"
+
+def ask_gemini_with_rag(query, category=None, num_results=5):
+    return analyze_greenwashing(query, category, num_results)
diff --git a/backend/src/gemini/client.py b/backend/src/gemini/client.py
index 5fb5095..fef0733 100644
--- a/backend/src/gemini/client.py
+++ b/backend/src/gemini/client.py
@@ -5,6 +5,7 @@ def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
     api_key = os.environ.get("GOOGLE_API_KEY")
     if not api_key:
         return "Error: GOOGLE_API_KEY not found."
+
     try:
         client = genai.Client(api_key=api_key)
         response = client.models.generate_content(
diff --git a/backend/src/routes/gemini.py b/backend/src/routes/gemini.py
new file mode 100644
index 0000000..c662ec8
--- /dev/null
+++ b/backend/src/routes/gemini.py
@@ -0,0 +1,62 @@
+from flask import Blueprint, request, jsonify
+from src.rag.gemeni import GeminiClient
+from src.gemini import ask_gemini_with_rag
+
+gemini_bp = Blueprint('gemini', __name__)
+brain = None
+
+def get_brain():
+    global brain
+    if brain is None:
+        brain = GeminiClient()
+    return brain
+
+@gemini_bp.route('/ask', methods=['POST'])
+def ask():
+    data = request.json
+    prompt = data.get("prompt")
+    context = data.get("context", "")
+
+    if not prompt:
+        return jsonify({"error": "No prompt provided"}), 400
+
+    try:
+        client = get_brain()
+        response = client.ask(prompt, context)
+        return jsonify({
+            "status": "success",
+            "reply": response
+        })
+    except Exception as e:
+        return jsonify({
+            "status": "error",
+            "message": str(e)
+        }), 500
+
+@gemini_bp.route('/rag', methods=['POST'])
+def rag():
+    data = request.json
+    prompt = data.get("prompt")
+    category = data.get("category")
+    
+    if not prompt:
+        return jsonify({"error": "No prompt provided"}), 400
+        
+    try:
+        response = ask_gemini_with_rag(prompt, category=category)
+        return jsonify({
+            "status": "success",
+            "reply": response
+        })
+    except Exception as e:
+        return jsonify({
+            "status": "error",
+            "message": str(e)
+        }), 500
+
+@gemini_bp.route('/vision', methods=['POST'])
+def vision():
+    return jsonify({
+        "status": "error",
+        "message": "Vision endpoint not yet implemented"
+    }), 501