Merge branch 'main' of https://github.com/SirBlobby/Hoya26

2026-02-04 03:34:34 -05:00 · 2026-01-24 10:01:06 -05:00
parent 824395d6b2 c46ec76027
commit 4183d7f122
7 changed files with 134 additions and 44 deletions
--- a/backend/app.py
+++ b/backend/app.py
@@ -1,47 +1,7 @@
 import os
-from flask import Flask, request, jsonify
+from src import create_app
 from flask_cors import CORS
-from src.rag.gemeni import GeminiClient
+app = create_app()
 from src.mongo import get_database
 app = Flask(__name__)
 CORS(app)  
 try:
    brain = GeminiClient()
    db = get_database()
    print("--- Backend Components Initialized Successfully ---")
 except Exception as e:
    print(f"CRITICAL ERROR during initialization: {e}")
@app.route('/')
 def health_check():
    return {
        "status": "online",
        "message": "The Waiter is ready at the counter!"
    }
@app.route('/chat', methods=['POST'])
 def chat():
    data = request.json
    user_query = data.get("message")
    if not user_query:
        return jsonify({"error": "You didn't say anything!"}), 400
    try:
        context = ""
        ai_reply = brain.ask(user_query, context)
        return jsonify({
            "status": "success",
            "reply": ai_reply
        })
    except Exception as e:
        return jsonify({
            "status": "error",
            "message": str(e)
        }), 500
 if __name__ == "__main__":
    app.run(debug=True, port=5000)
--- a/backend/scripts/populate_db.py
+++ b/backend/scripts/populate_db.py
@@ -50,8 +50,8 @@ def populate_from_dataset(dataset_dir, category=None):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Populate vector database from dataset files")
-    parser.add_argument("--category", "-c", type=str)
+    parser.add_argument("--category", "-c", type=str, help="Category to assign to ingested documents")
-    parser.add_argument("--dir", "-d", type=str, default=None)
+    parser.add_argument("--dir", "-d", type=str, default=None, help="Dataset directory path")
    args = parser.parse_args()
    if args.dir:
--- a/backend/src/init.py
+++ b/backend/src/init.py
@@ -2,6 +2,7 @@ from flask import Flask
 from flask_cors import CORS
 from .routes.main import main_bp
 from .routes.rag import rag_bp
 from .routes.gemini import gemini_bp
 def create_app():
    app = Flask(__name__)
@@ -9,5 +10,6 @@ def create_app():
    app.register_blueprint(main_bp)
    app.register_blueprint(rag_bp, url_prefix='/api/rag')
    app.register_blueprint(gemini_bp, url_prefix='/api/gemini')
    return app
--- a/backend/src/chroma/vector_store.py
+++ b/backend/src/chroma/vector_store.py
@@ -17,7 +17,9 @@ def get_collection(collection_name=COLLECTION_NAME):
 def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
    collection = get_collection(collection_name)
    ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)]
    if metadata_list:
        collection.add(
            ids=ids,
@@ -31,17 +33,22 @@ def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadat
            embeddings=embeddings,
            documents=texts
        )
    return len(texts)
 def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None):
    collection = get_collection(collection_name)
    query_params = {
        "query_embeddings": [query_embedding],
        "n_results": num_results
    }
    if filter_metadata:
        query_params["where"] = filter_metadata
    results = collection.query(**query_params)
    output = []
    if results and results["documents"]:
        for i, doc in enumerate(results["documents"][0]):
@@ -50,6 +57,7 @@ def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_resul
                "text": doc,
                "score": score
            })
    return output
 def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME):
--- a/backend/src/gemini/init.py
+++ b/backend/src/gemini/init.py
@@ -0,0 +1,57 @@
 from src.rag.embeddings import get_embedding
 from src.chroma.vector_store import search_documents
 from .client import generate_content
 GREENWASHING_ANALYSIS_PROMPT = """
 You are an expert Environmental, Social, and Governance (ESG) Analyst specialized in detecting 'Greenwashing'.
 Your task is to analyze the provided context from a company's data reports and determine if they are engaging in greenwashing.
 Greenwashing is defined as making misleading or unsubstantiated claims about the environmental benefits of a product, service, or company practice.
 Please evaluate the following:
 1. Vague Claims: Are they using broad terms like 'eco-friendly' without specific details?
 2. Lack of Proof: Are claims backed by data, third-party certifications, or specific metrics?
 3. Hidden Trade-offs: Do they highlight one green act while ignoring a much larger environmental harm?
 4. Symbolic Actions: Are they focusing on minor changes while their core business remains highly polluting?
 Based on the context provided, give a final verdict:
 - VERDICT: [Clear/Suspect/High Risk of Greenwashing]
 - REASONING: [Explain your findings clearly]
 - EVIDENCE: [Quote specific parts of the context if possible]
 - BETTER ALTERNATIVES: [Suggest 2-3 similar companies or products that have verified sustainability records or higher transparency in this specific area]
 """
 def analyze_greenwashing(query, category=None, num_results=10):
    try:
        query_embedding = get_embedding(query)
        filter_metadata = None
        if category:
            filter_metadata = {"category": category}
        search_results = search_documents(
            query_embedding, 
            num_results=num_results, 
            filter_metadata=filter_metadata
        )
        context = ""
        if search_results:
            context = "--- START OF REPORT CONTEXT ---\n"
            for res in search_results:
                context += f"RELEVANT DATA CHUNK: {res['text']}\n\n"
            context += "--- END OF REPORT CONTEXT ---\n"
        if context:
            full_prompt = f"{GREENWASHING_ANALYSIS_PROMPT}\n\n{context}\n\nUSER QUERY/COMPANY FOCUS: {query}"
        else:
            return "No data found in the report to analyze. Please upload a report first."
        response = generate_content(full_prompt)
        return response
    except Exception as e:
        return f"Error in Analysis process: {str(e)}"
 def ask_gemini_with_rag(query, category=None, num_results=5):
    return analyze_greenwashing(query, category, num_results)
--- a/backend/src/gemini/client.py
+++ b/backend/src/gemini/client.py
@@ -5,6 +5,7 @@ def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
    api_key = os.environ.get("GOOGLE_API_KEY")
    if not api_key:
        return "Error: GOOGLE_API_KEY not found."
    try:
        client = genai.Client(api_key=api_key)
        response = client.models.generate_content(
--- a/backend/src/routes/gemini.py
+++ b/backend/src/routes/gemini.py
@@ -0,0 +1,62 @@
 from flask import Blueprint, request, jsonify
 from src.rag.gemeni import GeminiClient
 from src.gemini import ask_gemini_with_rag
 gemini_bp = Blueprint('gemini', __name__)
 brain = None
 def get_brain():
    global brain
    if brain is None:
        brain = GeminiClient()
    return brain
@gemini_bp.route('/ask', methods=['POST'])
 def ask():
    data = request.json
    prompt = data.get("prompt")
    context = data.get("context", "")
    if not prompt:
        return jsonify({"error": "No prompt provided"}), 400
    try:
        client = get_brain()
        response = client.ask(prompt, context)
        return jsonify({
            "status": "success",
            "reply": response
        })
    except Exception as e:
        return jsonify({
            "status": "error",
            "message": str(e)
        }), 500
@gemini_bp.route('/rag', methods=['POST'])
 def rag():
    data = request.json
    prompt = data.get("prompt")
    category = data.get("category")
    if not prompt:
        return jsonify({"error": "No prompt provided"}), 400
    try:
        response = ask_gemini_with_rag(prompt, category=category)
        return jsonify({
            "status": "success",
            "reply": response
        })
    except Exception as e:
        return jsonify({
            "status": "error",
            "message": str(e)
        }), 500
@gemini_bp.route('/vision', methods=['POST'])
 def vision():
    return jsonify({
        "status": "error",
        "message": "Vision endpoint not yet implemented"
    }), 501