mirror of
https://github.com/SirBlobby/Hoya26.git
synced 2026-02-03 19:24:34 -05:00
Merge branch 'main' of https://github.com/SirBlobby/Hoya26
This commit is contained in:
@@ -1,47 +1,7 @@
|
||||
import os
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
from src import create_app
|
||||
|
||||
from src.rag.gemeni import GeminiClient
|
||||
from src.mongo import get_database
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
try:
|
||||
brain = GeminiClient()
|
||||
db = get_database()
|
||||
print("--- Backend Components Initialized Successfully ---")
|
||||
except Exception as e:
|
||||
print(f"CRITICAL ERROR during initialization: {e}")
|
||||
|
||||
@app.route('/')
|
||||
def health_check():
|
||||
return {
|
||||
"status": "online",
|
||||
"message": "The Waiter is ready at the counter!"
|
||||
}
|
||||
|
||||
@app.route('/chat', methods=['POST'])
|
||||
def chat():
|
||||
data = request.json
|
||||
user_query = data.get("message")
|
||||
|
||||
if not user_query:
|
||||
return jsonify({"error": "You didn't say anything!"}), 400
|
||||
|
||||
try:
|
||||
context = ""
|
||||
ai_reply = brain.ask(user_query, context)
|
||||
return jsonify({
|
||||
"status": "success",
|
||||
"reply": ai_reply
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
"status": "error",
|
||||
"message": str(e)
|
||||
}), 500
|
||||
app = create_app()
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, port=5000)
|
||||
@@ -50,8 +50,8 @@ def populate_from_dataset(dataset_dir, category=None):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Populate vector database from dataset files")
|
||||
parser.add_argument("--category", "-c", type=str)
|
||||
parser.add_argument("--dir", "-d", type=str, default=None)
|
||||
parser.add_argument("--category", "-c", type=str, help="Category to assign to ingested documents")
|
||||
parser.add_argument("--dir", "-d", type=str, default=None, help="Dataset directory path")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.dir:
|
||||
|
||||
@@ -2,6 +2,7 @@ from flask import Flask
|
||||
from flask_cors import CORS
|
||||
from .routes.main import main_bp
|
||||
from .routes.rag import rag_bp
|
||||
from .routes.gemini import gemini_bp
|
||||
|
||||
def create_app():
|
||||
app = Flask(__name__)
|
||||
@@ -9,5 +10,6 @@ def create_app():
|
||||
|
||||
app.register_blueprint(main_bp)
|
||||
app.register_blueprint(rag_bp, url_prefix='/api/rag')
|
||||
app.register_blueprint(gemini_bp, url_prefix='/api/gemini')
|
||||
|
||||
return app
|
||||
|
||||
@@ -17,7 +17,9 @@ def get_collection(collection_name=COLLECTION_NAME):
|
||||
|
||||
def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
|
||||
collection = get_collection(collection_name)
|
||||
|
||||
ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)]
|
||||
|
||||
if metadata_list:
|
||||
collection.add(
|
||||
ids=ids,
|
||||
@@ -31,17 +33,22 @@ def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadat
|
||||
embeddings=embeddings,
|
||||
documents=texts
|
||||
)
|
||||
|
||||
return len(texts)
|
||||
|
||||
def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None):
|
||||
collection = get_collection(collection_name)
|
||||
|
||||
query_params = {
|
||||
"query_embeddings": [query_embedding],
|
||||
"n_results": num_results
|
||||
}
|
||||
|
||||
if filter_metadata:
|
||||
query_params["where"] = filter_metadata
|
||||
|
||||
results = collection.query(**query_params)
|
||||
|
||||
output = []
|
||||
if results and results["documents"]:
|
||||
for i, doc in enumerate(results["documents"][0]):
|
||||
@@ -50,6 +57,7 @@ def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_resul
|
||||
"text": doc,
|
||||
"score": score
|
||||
})
|
||||
|
||||
return output
|
||||
|
||||
def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME):
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
from src.rag.embeddings import get_embedding
|
||||
from src.chroma.vector_store import search_documents
|
||||
from .client import generate_content
|
||||
|
||||
GREENWASHING_ANALYSIS_PROMPT = """
|
||||
You are an expert Environmental, Social, and Governance (ESG) Analyst specialized in detecting 'Greenwashing'.
|
||||
Your task is to analyze the provided context from a company's data reports and determine if they are engaging in greenwashing.
|
||||
|
||||
Greenwashing is defined as making misleading or unsubstantiated claims about the environmental benefits of a product, service, or company practice.
|
||||
|
||||
Please evaluate the following:
|
||||
1. Vague Claims: Are they using broad terms like 'eco-friendly' without specific details?
|
||||
2. Lack of Proof: Are claims backed by data, third-party certifications, or specific metrics?
|
||||
3. Hidden Trade-offs: Do they highlight one green act while ignoring a much larger environmental harm?
|
||||
4. Symbolic Actions: Are they focusing on minor changes while their core business remains highly polluting?
|
||||
|
||||
Based on the context provided, give a final verdict:
|
||||
- VERDICT: [Clear/Suspect/High Risk of Greenwashing]
|
||||
- REASONING: [Explain your findings clearly]
|
||||
- EVIDENCE: [Quote specific parts of the context if possible]
|
||||
- BETTER ALTERNATIVES: [Suggest 2-3 similar companies or products that have verified sustainability records or higher transparency in this specific area]
|
||||
"""
|
||||
|
||||
def analyze_greenwashing(query, category=None, num_results=10):
|
||||
try:
|
||||
query_embedding = get_embedding(query)
|
||||
|
||||
filter_metadata = None
|
||||
if category:
|
||||
filter_metadata = {"category": category}
|
||||
|
||||
search_results = search_documents(
|
||||
query_embedding,
|
||||
num_results=num_results,
|
||||
filter_metadata=filter_metadata
|
||||
)
|
||||
|
||||
context = ""
|
||||
if search_results:
|
||||
context = "--- START OF REPORT CONTEXT ---\n"
|
||||
for res in search_results:
|
||||
context += f"RELEVANT DATA CHUNK: {res['text']}\n\n"
|
||||
context += "--- END OF REPORT CONTEXT ---\n"
|
||||
|
||||
if context:
|
||||
full_prompt = f"{GREENWASHING_ANALYSIS_PROMPT}\n\n{context}\n\nUSER QUERY/COMPANY FOCUS: {query}"
|
||||
else:
|
||||
return "No data found in the report to analyze. Please upload a report first."
|
||||
|
||||
response = generate_content(full_prompt)
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
return f"Error in Analysis process: {str(e)}"
|
||||
|
||||
def ask_gemini_with_rag(query, category=None, num_results=5):
|
||||
return analyze_greenwashing(query, category, num_results)
|
||||
|
||||
@@ -5,6 +5,7 @@ def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
|
||||
api_key = os.environ.get("GOOGLE_API_KEY")
|
||||
if not api_key:
|
||||
return "Error: GOOGLE_API_KEY not found."
|
||||
|
||||
try:
|
||||
client = genai.Client(api_key=api_key)
|
||||
response = client.models.generate_content(
|
||||
|
||||
62
backend/src/routes/gemini.py
Normal file
62
backend/src/routes/gemini.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from flask import Blueprint, request, jsonify
|
||||
from src.rag.gemeni import GeminiClient
|
||||
from src.gemini import ask_gemini_with_rag
|
||||
|
||||
gemini_bp = Blueprint('gemini', __name__)
|
||||
brain = None
|
||||
|
||||
def get_brain():
|
||||
global brain
|
||||
if brain is None:
|
||||
brain = GeminiClient()
|
||||
return brain
|
||||
|
||||
@gemini_bp.route('/ask', methods=['POST'])
|
||||
def ask():
|
||||
data = request.json
|
||||
prompt = data.get("prompt")
|
||||
context = data.get("context", "")
|
||||
|
||||
if not prompt:
|
||||
return jsonify({"error": "No prompt provided"}), 400
|
||||
|
||||
try:
|
||||
client = get_brain()
|
||||
response = client.ask(prompt, context)
|
||||
return jsonify({
|
||||
"status": "success",
|
||||
"reply": response
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
"status": "error",
|
||||
"message": str(e)
|
||||
}), 500
|
||||
|
||||
@gemini_bp.route('/rag', methods=['POST'])
|
||||
def rag():
|
||||
data = request.json
|
||||
prompt = data.get("prompt")
|
||||
category = data.get("category")
|
||||
|
||||
if not prompt:
|
||||
return jsonify({"error": "No prompt provided"}), 400
|
||||
|
||||
try:
|
||||
response = ask_gemini_with_rag(prompt, category=category)
|
||||
return jsonify({
|
||||
"status": "success",
|
||||
"reply": response
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
"status": "error",
|
||||
"message": str(e)
|
||||
}), 500
|
||||
|
||||
@gemini_bp.route('/vision', methods=['POST'])
|
||||
def vision():
|
||||
return jsonify({
|
||||
"status": "error",
|
||||
"message": "Vision endpoint not yet implemented"
|
||||
}), 501
|
||||
Reference in New Issue
Block a user