This commit is contained in:
r-III03
2026-01-24 10:01:06 -05:00
7 changed files with 134 additions and 44 deletions

View File

@@ -1,47 +1,7 @@
import os
from flask import Flask, request, jsonify
from flask_cors import CORS
from src import create_app
from src.rag.gemeni import GeminiClient
from src.mongo import get_database
app = Flask(__name__)
CORS(app)
try:
brain = GeminiClient()
db = get_database()
print("--- Backend Components Initialized Successfully ---")
except Exception as e:
print(f"CRITICAL ERROR during initialization: {e}")
@app.route('/')
def health_check():
return {
"status": "online",
"message": "The Waiter is ready at the counter!"
}
@app.route('/chat', methods=['POST'])
def chat():
data = request.json
user_query = data.get("message")
if not user_query:
return jsonify({"error": "You didn't say anything!"}), 400
try:
context = ""
ai_reply = brain.ask(user_query, context)
return jsonify({
"status": "success",
"reply": ai_reply
})
except Exception as e:
return jsonify({
"status": "error",
"message": str(e)
}), 500
app = create_app()
if __name__ == "__main__":
app.run(debug=True, port=5000)

View File

@@ -50,8 +50,8 @@ def populate_from_dataset(dataset_dir, category=None):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Populate vector database from dataset files")
parser.add_argument("--category", "-c", type=str)
parser.add_argument("--dir", "-d", type=str, default=None)
parser.add_argument("--category", "-c", type=str, help="Category to assign to ingested documents")
parser.add_argument("--dir", "-d", type=str, default=None, help="Dataset directory path")
args = parser.parse_args()
if args.dir:

View File

@@ -2,6 +2,7 @@ from flask import Flask
from flask_cors import CORS
from .routes.main import main_bp
from .routes.rag import rag_bp
from .routes.gemini import gemini_bp
def create_app():
app = Flask(__name__)
@@ -9,5 +10,6 @@ def create_app():
app.register_blueprint(main_bp)
app.register_blueprint(rag_bp, url_prefix='/api/rag')
app.register_blueprint(gemini_bp, url_prefix='/api/gemini')
return app

View File

@@ -17,7 +17,9 @@ def get_collection(collection_name=COLLECTION_NAME):
def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
collection = get_collection(collection_name)
ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)]
if metadata_list:
collection.add(
ids=ids,
@@ -31,17 +33,22 @@ def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadat
embeddings=embeddings,
documents=texts
)
return len(texts)
def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None):
collection = get_collection(collection_name)
query_params = {
"query_embeddings": [query_embedding],
"n_results": num_results
}
if filter_metadata:
query_params["where"] = filter_metadata
results = collection.query(**query_params)
output = []
if results and results["documents"]:
for i, doc in enumerate(results["documents"][0]):
@@ -50,6 +57,7 @@ def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_resul
"text": doc,
"score": score
})
return output
def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME):

View File

@@ -0,0 +1,57 @@
from src.rag.embeddings import get_embedding
from src.chroma.vector_store import search_documents
from .client import generate_content
GREENWASHING_ANALYSIS_PROMPT = """
You are an expert Environmental, Social, and Governance (ESG) Analyst specialized in detecting 'Greenwashing'.
Your task is to analyze the provided context from a company's data reports and determine if they are engaging in greenwashing.
Greenwashing is defined as making misleading or unsubstantiated claims about the environmental benefits of a product, service, or company practice.
Please evaluate the following:
1. Vague Claims: Are they using broad terms like 'eco-friendly' without specific details?
2. Lack of Proof: Are claims backed by data, third-party certifications, or specific metrics?
3. Hidden Trade-offs: Do they highlight one green act while ignoring a much larger environmental harm?
4. Symbolic Actions: Are they focusing on minor changes while their core business remains highly polluting?
Based on the context provided, give a final verdict:
- VERDICT: [Clear/Suspect/High Risk of Greenwashing]
- REASONING: [Explain your findings clearly]
- EVIDENCE: [Quote specific parts of the context if possible]
- BETTER ALTERNATIVES: [Suggest 2-3 similar companies or products that have verified sustainability records or higher transparency in this specific area]
"""
def analyze_greenwashing(query, category=None, num_results=10):
try:
query_embedding = get_embedding(query)
filter_metadata = None
if category:
filter_metadata = {"category": category}
search_results = search_documents(
query_embedding,
num_results=num_results,
filter_metadata=filter_metadata
)
context = ""
if search_results:
context = "--- START OF REPORT CONTEXT ---\n"
for res in search_results:
context += f"RELEVANT DATA CHUNK: {res['text']}\n\n"
context += "--- END OF REPORT CONTEXT ---\n"
if context:
full_prompt = f"{GREENWASHING_ANALYSIS_PROMPT}\n\n{context}\n\nUSER QUERY/COMPANY FOCUS: {query}"
else:
return "No data found in the report to analyze. Please upload a report first."
response = generate_content(full_prompt)
return response
except Exception as e:
return f"Error in Analysis process: {str(e)}"
def ask_gemini_with_rag(query, category=None, num_results=5):
return analyze_greenwashing(query, category, num_results)

View File

@@ -5,6 +5,7 @@ def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
return "Error: GOOGLE_API_KEY not found."
try:
client = genai.Client(api_key=api_key)
response = client.models.generate_content(

View File

@@ -0,0 +1,62 @@
from flask import Blueprint, request, jsonify
from src.rag.gemeni import GeminiClient
from src.gemini import ask_gemini_with_rag
gemini_bp = Blueprint('gemini', __name__)
brain = None
def get_brain():
global brain
if brain is None:
brain = GeminiClient()
return brain
@gemini_bp.route('/ask', methods=['POST'])
def ask():
data = request.json
prompt = data.get("prompt")
context = data.get("context", "")
if not prompt:
return jsonify({"error": "No prompt provided"}), 400
try:
client = get_brain()
response = client.ask(prompt, context)
return jsonify({
"status": "success",
"reply": response
})
except Exception as e:
return jsonify({
"status": "error",
"message": str(e)
}), 500
@gemini_bp.route('/rag', methods=['POST'])
def rag():
data = request.json
prompt = data.get("prompt")
category = data.get("category")
if not prompt:
return jsonify({"error": "No prompt provided"}), 400
try:
response = ask_gemini_with_rag(prompt, category=category)
return jsonify({
"status": "success",
"reply": response
})
except Exception as e:
return jsonify({
"status": "error",
"message": str(e)
}), 500
@gemini_bp.route('/vision', methods=['POST'])
def vision():
return jsonify({
"status": "error",
"message": "Vision endpoint not yet implemented"
}), 501