From c46ec76027ccdfa608585974024d2e188bafebca Mon Sep 17 00:00:00 2001 From: KasaNick Date: Sat, 24 Jan 2026 06:33:29 -0500 Subject: [PATCH] gemeni endpoints --- backend/app.py | 44 +-------------------- backend/scripts/populate_db.py | 4 +- backend/src/__init__.py | 2 + backend/src/chroma/vector_store.py | 8 ++++ backend/src/gemini/__init__.py | 57 +++++++++++++++++++++++++++ backend/src/gemini/client.py | 1 + backend/src/routes/gemini.py | 62 ++++++++++++++++++++++++++++++ 7 files changed, 134 insertions(+), 44 deletions(-) create mode 100644 backend/src/routes/gemini.py diff --git a/backend/app.py b/backend/app.py index 30abf10..6524bdb 100644 --- a/backend/app.py +++ b/backend/app.py @@ -1,47 +1,7 @@ import os -from flask import Flask, request, jsonify -from flask_cors import CORS +from src import create_app -from src.rag.gemeni import GeminiClient -from src.mongo import get_database - -app = Flask(__name__) -CORS(app) - -try: - brain = GeminiClient() - db = get_database() - print("--- Backend Components Initialized Successfully ---") -except Exception as e: - print(f"CRITICAL ERROR during initialization: {e}") - -@app.route('/') -def health_check(): - return { - "status": "online", - "message": "The Waiter is ready at the counter!" - } - -@app.route('/chat', methods=['POST']) -def chat(): - data = request.json - user_query = data.get("message") - - if not user_query: - return jsonify({"error": "You didn't say anything!"}), 400 - - try: - context = "" - ai_reply = brain.ask(user_query, context) - return jsonify({ - "status": "success", - "reply": ai_reply - }) - except Exception as e: - return jsonify({ - "status": "error", - "message": str(e) - }), 500 +app = create_app() if __name__ == "__main__": app.run(debug=True, port=5000) \ No newline at end of file diff --git a/backend/scripts/populate_db.py b/backend/scripts/populate_db.py index 1408974..3865dcd 100644 --- a/backend/scripts/populate_db.py +++ b/backend/scripts/populate_db.py @@ -50,8 +50,8 @@ def populate_from_dataset(dataset_dir, category=None): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Populate vector database from dataset files") - parser.add_argument("--category", "-c", type=str) - parser.add_argument("--dir", "-d", type=str, default=None) + parser.add_argument("--category", "-c", type=str, help="Category to assign to ingested documents") + parser.add_argument("--dir", "-d", type=str, default=None, help="Dataset directory path") args = parser.parse_args() if args.dir: diff --git a/backend/src/__init__.py b/backend/src/__init__.py index b8a9680..63adc7f 100644 --- a/backend/src/__init__.py +++ b/backend/src/__init__.py @@ -2,6 +2,7 @@ from flask import Flask from flask_cors import CORS from .routes.main import main_bp from .routes.rag import rag_bp +from .routes.gemini import gemini_bp def create_app(): app = Flask(__name__) @@ -9,5 +10,6 @@ def create_app(): app.register_blueprint(main_bp) app.register_blueprint(rag_bp, url_prefix='/api/rag') + app.register_blueprint(gemini_bp, url_prefix='/api/gemini') return app diff --git a/backend/src/chroma/vector_store.py b/backend/src/chroma/vector_store.py index 799f584..bcb259a 100644 --- a/backend/src/chroma/vector_store.py +++ b/backend/src/chroma/vector_store.py @@ -17,7 +17,9 @@ def get_collection(collection_name=COLLECTION_NAME): def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None): collection = get_collection(collection_name) + ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)] + if metadata_list: collection.add( ids=ids, @@ -31,17 +33,22 @@ def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadat embeddings=embeddings, documents=texts ) + return len(texts) def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None): collection = get_collection(collection_name) + query_params = { "query_embeddings": [query_embedding], "n_results": num_results } + if filter_metadata: query_params["where"] = filter_metadata + results = collection.query(**query_params) + output = [] if results and results["documents"]: for i, doc in enumerate(results["documents"][0]): @@ -50,6 +57,7 @@ def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_resul "text": doc, "score": score }) + return output def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME): diff --git a/backend/src/gemini/__init__.py b/backend/src/gemini/__init__.py index e69de29..aa52ae2 100644 --- a/backend/src/gemini/__init__.py +++ b/backend/src/gemini/__init__.py @@ -0,0 +1,57 @@ +from src.rag.embeddings import get_embedding +from src.chroma.vector_store import search_documents +from .client import generate_content + +GREENWASHING_ANALYSIS_PROMPT = """ +You are an expert Environmental, Social, and Governance (ESG) Analyst specialized in detecting 'Greenwashing'. +Your task is to analyze the provided context from a company's data reports and determine if they are engaging in greenwashing. + +Greenwashing is defined as making misleading or unsubstantiated claims about the environmental benefits of a product, service, or company practice. + +Please evaluate the following: +1. Vague Claims: Are they using broad terms like 'eco-friendly' without specific details? +2. Lack of Proof: Are claims backed by data, third-party certifications, or specific metrics? +3. Hidden Trade-offs: Do they highlight one green act while ignoring a much larger environmental harm? +4. Symbolic Actions: Are they focusing on minor changes while their core business remains highly polluting? + +Based on the context provided, give a final verdict: +- VERDICT: [Clear/Suspect/High Risk of Greenwashing] +- REASONING: [Explain your findings clearly] +- EVIDENCE: [Quote specific parts of the context if possible] +- BETTER ALTERNATIVES: [Suggest 2-3 similar companies or products that have verified sustainability records or higher transparency in this specific area] +""" + +def analyze_greenwashing(query, category=None, num_results=10): + try: + query_embedding = get_embedding(query) + + filter_metadata = None + if category: + filter_metadata = {"category": category} + + search_results = search_documents( + query_embedding, + num_results=num_results, + filter_metadata=filter_metadata + ) + + context = "" + if search_results: + context = "--- START OF REPORT CONTEXT ---\n" + for res in search_results: + context += f"RELEVANT DATA CHUNK: {res['text']}\n\n" + context += "--- END OF REPORT CONTEXT ---\n" + + if context: + full_prompt = f"{GREENWASHING_ANALYSIS_PROMPT}\n\n{context}\n\nUSER QUERY/COMPANY FOCUS: {query}" + else: + return "No data found in the report to analyze. Please upload a report first." + + response = generate_content(full_prompt) + return response + + except Exception as e: + return f"Error in Analysis process: {str(e)}" + +def ask_gemini_with_rag(query, category=None, num_results=5): + return analyze_greenwashing(query, category, num_results) diff --git a/backend/src/gemini/client.py b/backend/src/gemini/client.py index 5fb5095..fef0733 100644 --- a/backend/src/gemini/client.py +++ b/backend/src/gemini/client.py @@ -5,6 +5,7 @@ def generate_content(prompt, model_name="gemini-2.0-flash-exp"): api_key = os.environ.get("GOOGLE_API_KEY") if not api_key: return "Error: GOOGLE_API_KEY not found." + try: client = genai.Client(api_key=api_key) response = client.models.generate_content( diff --git a/backend/src/routes/gemini.py b/backend/src/routes/gemini.py new file mode 100644 index 0000000..c662ec8 --- /dev/null +++ b/backend/src/routes/gemini.py @@ -0,0 +1,62 @@ +from flask import Blueprint, request, jsonify +from src.rag.gemeni import GeminiClient +from src.gemini import ask_gemini_with_rag + +gemini_bp = Blueprint('gemini', __name__) +brain = None + +def get_brain(): + global brain + if brain is None: + brain = GeminiClient() + return brain + +@gemini_bp.route('/ask', methods=['POST']) +def ask(): + data = request.json + prompt = data.get("prompt") + context = data.get("context", "") + + if not prompt: + return jsonify({"error": "No prompt provided"}), 400 + + try: + client = get_brain() + response = client.ask(prompt, context) + return jsonify({ + "status": "success", + "reply": response + }) + except Exception as e: + return jsonify({ + "status": "error", + "message": str(e) + }), 500 + +@gemini_bp.route('/rag', methods=['POST']) +def rag(): + data = request.json + prompt = data.get("prompt") + category = data.get("category") + + if not prompt: + return jsonify({"error": "No prompt provided"}), 400 + + try: + response = ask_gemini_with_rag(prompt, category=category) + return jsonify({ + "status": "success", + "reply": response + }) + except Exception as e: + return jsonify({ + "status": "error", + "message": str(e) + }), 500 + +@gemini_bp.route('/vision', methods=['POST']) +def vision(): + return jsonify({ + "status": "error", + "message": "Vision endpoint not yet implemented" + }), 501