Merge branch 'main' of https://github.com/SirBlobby/Hoya26

2026-02-03 19:24:34 -05:00 · 2026-01-24 10:01:06 -05:00
parent 824395d6b2 c46ec76027
commit 4183d7f122
7 changed files with 134 additions and 44 deletions
--- a/backend/app.py
+++ b/backend/app.py
@@ -1,47 +1,7 @@
 import os
-from flask import Flask, request, jsonify
-from flask_cors import CORS
+from src import create_app

-from src.rag.gemeni import GeminiClient
-from src.mongo import get_database
-
-app = Flask(__name__)
-CORS(app)  
-
-try:
-    brain = GeminiClient()
-    db = get_database()
-    print("--- Backend Components Initialized Successfully ---")
-except Exception as e:
-    print(f"CRITICAL ERROR during initialization: {e}")
-
-@app.route('/')
-def health_check():
-    return {
-        "status": "online",
-        "message": "The Waiter is ready at the counter!"
-    }
-
-@app.route('/chat', methods=['POST'])
-def chat():
-    data = request.json
-    user_query = data.get("message")
-
-    if not user_query:
-        return jsonify({"error": "You didn't say anything!"}), 400
-
-    try:
-        context = ""
-        ai_reply = brain.ask(user_query, context)
-        return jsonify({
-            "status": "success",
-            "reply": ai_reply
-        })
-    except Exception as e:
-        return jsonify({
-            "status": "error",
-            "message": str(e)
-        }), 500
+app = create_app()

 if __name__ == "__main__":
    app.run(debug=True, port=5000)
--- a/backend/scripts/populate_db.py
+++ b/backend/scripts/populate_db.py
@@ -50,8 +50,8 @@ def populate_from_dataset(dataset_dir, category=None):

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Populate vector database from dataset files")
-    parser.add_argument("--category", "-c", type=str)
-    parser.add_argument("--dir", "-d", type=str, default=None)
+    parser.add_argument("--category", "-c", type=str, help="Category to assign to ingested documents")
+    parser.add_argument("--dir", "-d", type=str, default=None, help="Dataset directory path")
    args = parser.parse_args()
    
    if args.dir:
--- a/backend/src/init.py
+++ b/backend/src/init.py
@@ -2,6 +2,7 @@ from flask import Flask
 from flask_cors import CORS
 from .routes.main import main_bp
 from .routes.rag import rag_bp
+from .routes.gemini import gemini_bp

 def create_app():
    app = Flask(__name__)
@@ -9,5 +10,6 @@ def create_app():
    
    app.register_blueprint(main_bp)
    app.register_blueprint(rag_bp, url_prefix='/api/rag')
+    app.register_blueprint(gemini_bp, url_prefix='/api/gemini')
    
    return app
--- a/backend/src/chroma/vector_store.py
+++ b/backend/src/chroma/vector_store.py
@@ -17,7 +17,9 @@ def get_collection(collection_name=COLLECTION_NAME):

 def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
    collection = get_collection(collection_name)
+    
    ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)]
+    
    if metadata_list:
        collection.add(
            ids=ids,
@@ -31,17 +33,22 @@ def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadat
            embeddings=embeddings,
            documents=texts
        )
+    
    return len(texts)

 def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None):
    collection = get_collection(collection_name)
+    
    query_params = {
        "query_embeddings": [query_embedding],
        "n_results": num_results
    }
+    
    if filter_metadata:
        query_params["where"] = filter_metadata
+    
    results = collection.query(**query_params)
+    
    output = []
    if results and results["documents"]:
        for i, doc in enumerate(results["documents"][0]):
@@ -50,6 +57,7 @@ def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_resul
                "text": doc,
                "score": score
            })
+    
    return output

 def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME):
--- a/backend/src/gemini/init.py
+++ b/backend/src/gemini/init.py
@@ -0,0 +1,57 @@
+from src.rag.embeddings import get_embedding
+from src.chroma.vector_store import search_documents
+from .client import generate_content
+
+GREENWASHING_ANALYSIS_PROMPT = """
+You are an expert Environmental, Social, and Governance (ESG) Analyst specialized in detecting 'Greenwashing'.
+Your task is to analyze the provided context from a company's data reports and determine if they are engaging in greenwashing.
+
+Greenwashing is defined as making misleading or unsubstantiated claims about the environmental benefits of a product, service, or company practice.
+
+Please evaluate the following:
+1. Vague Claims: Are they using broad terms like 'eco-friendly' without specific details?
+2. Lack of Proof: Are claims backed by data, third-party certifications, or specific metrics?
+3. Hidden Trade-offs: Do they highlight one green act while ignoring a much larger environmental harm?
+4. Symbolic Actions: Are they focusing on minor changes while their core business remains highly polluting?
+
+Based on the context provided, give a final verdict:
+- VERDICT: [Clear/Suspect/High Risk of Greenwashing]
+- REASONING: [Explain your findings clearly]
+- EVIDENCE: [Quote specific parts of the context if possible]
+- BETTER ALTERNATIVES: [Suggest 2-3 similar companies or products that have verified sustainability records or higher transparency in this specific area]
+"""
+
+def analyze_greenwashing(query, category=None, num_results=10):
+    try:
+        query_embedding = get_embedding(query)
+        
+        filter_metadata = None
+        if category:
+            filter_metadata = {"category": category}
+            
+        search_results = search_documents(
+            query_embedding, 
+            num_results=num_results, 
+            filter_metadata=filter_metadata
+        )
+        
+        context = ""
+        if search_results:
+            context = "--- START OF REPORT CONTEXT ---\n"
+            for res in search_results:
+                context += f"RELEVANT DATA CHUNK: {res['text']}\n\n"
+            context += "--- END OF REPORT CONTEXT ---\n"
+        
+        if context:
+            full_prompt = f"{GREENWASHING_ANALYSIS_PROMPT}\n\n{context}\n\nUSER QUERY/COMPANY FOCUS: {query}"
+        else:
+            return "No data found in the report to analyze. Please upload a report first."
+            
+        response = generate_content(full_prompt)
+        return response
+        
+    except Exception as e:
+        return f"Error in Analysis process: {str(e)}"
+
+def ask_gemini_with_rag(query, category=None, num_results=5):
+    return analyze_greenwashing(query, category, num_results)
--- a/backend/src/gemini/client.py
+++ b/backend/src/gemini/client.py
@@ -5,6 +5,7 @@ def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
    api_key = os.environ.get("GOOGLE_API_KEY")
    if not api_key:
        return "Error: GOOGLE_API_KEY not found."
+
    try:
        client = genai.Client(api_key=api_key)
        response = client.models.generate_content(
--- a/backend/src/routes/gemini.py
+++ b/backend/src/routes/gemini.py
@@ -0,0 +1,62 @@
+from flask import Blueprint, request, jsonify
+from src.rag.gemeni import GeminiClient
+from src.gemini import ask_gemini_with_rag
+
+gemini_bp = Blueprint('gemini', __name__)
+brain = None
+
+def get_brain():
+    global brain
+    if brain is None:
+        brain = GeminiClient()
+    return brain
+
+@gemini_bp.route('/ask', methods=['POST'])
+def ask():
+    data = request.json
+    prompt = data.get("prompt")
+    context = data.get("context", "")
+
+    if not prompt:
+        return jsonify({"error": "No prompt provided"}), 400
+
+    try:
+        client = get_brain()
+        response = client.ask(prompt, context)
+        return jsonify({
+            "status": "success",
+            "reply": response
+        })
+    except Exception as e:
+        return jsonify({
+            "status": "error",
+            "message": str(e)
+        }), 500
+
+@gemini_bp.route('/rag', methods=['POST'])
+def rag():
+    data = request.json
+    prompt = data.get("prompt")
+    category = data.get("category")
+    
+    if not prompt:
+        return jsonify({"error": "No prompt provided"}), 400
+        
+    try:
+        response = ask_gemini_with_rag(prompt, category=category)
+        return jsonify({
+            "status": "success",
+            "reply": response
+        })
+    except Exception as e:
+        return jsonify({
+            "status": "error",
+            "message": str(e)
+        }), 500
+
+@gemini_bp.route('/vision', methods=['POST'])
+def vision():
+    return jsonify({
+        "status": "error",
+        "message": "Vision endpoint not yet implemented"
+    }), 501