mirror of
https://github.com/SirBlobby/Hoya26.git
synced 2026-02-04 03:34:34 -05:00
Merge branch 'main' of https://github.com/SirBlobby/Hoya26
This commit is contained in:
@@ -1,47 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from flask import Flask, request, jsonify
|
from src import create_app
|
||||||
from flask_cors import CORS
|
|
||||||
|
|
||||||
from src.rag.gemeni import GeminiClient
|
app = create_app()
|
||||||
from src.mongo import get_database
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
CORS(app)
|
|
||||||
|
|
||||||
try:
|
|
||||||
brain = GeminiClient()
|
|
||||||
db = get_database()
|
|
||||||
print("--- Backend Components Initialized Successfully ---")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"CRITICAL ERROR during initialization: {e}")
|
|
||||||
|
|
||||||
@app.route('/')
|
|
||||||
def health_check():
|
|
||||||
return {
|
|
||||||
"status": "online",
|
|
||||||
"message": "The Waiter is ready at the counter!"
|
|
||||||
}
|
|
||||||
|
|
||||||
@app.route('/chat', methods=['POST'])
|
|
||||||
def chat():
|
|
||||||
data = request.json
|
|
||||||
user_query = data.get("message")
|
|
||||||
|
|
||||||
if not user_query:
|
|
||||||
return jsonify({"error": "You didn't say anything!"}), 400
|
|
||||||
|
|
||||||
try:
|
|
||||||
context = ""
|
|
||||||
ai_reply = brain.ask(user_query, context)
|
|
||||||
return jsonify({
|
|
||||||
"status": "success",
|
|
||||||
"reply": ai_reply
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
return jsonify({
|
|
||||||
"status": "error",
|
|
||||||
"message": str(e)
|
|
||||||
}), 500
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(debug=True, port=5000)
|
app.run(debug=True, port=5000)
|
||||||
@@ -50,8 +50,8 @@ def populate_from_dataset(dataset_dir, category=None):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description="Populate vector database from dataset files")
|
parser = argparse.ArgumentParser(description="Populate vector database from dataset files")
|
||||||
parser.add_argument("--category", "-c", type=str)
|
parser.add_argument("--category", "-c", type=str, help="Category to assign to ingested documents")
|
||||||
parser.add_argument("--dir", "-d", type=str, default=None)
|
parser.add_argument("--dir", "-d", type=str, default=None, help="Dataset directory path")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.dir:
|
if args.dir:
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from flask import Flask
|
|||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
from .routes.main import main_bp
|
from .routes.main import main_bp
|
||||||
from .routes.rag import rag_bp
|
from .routes.rag import rag_bp
|
||||||
|
from .routes.gemini import gemini_bp
|
||||||
|
|
||||||
def create_app():
|
def create_app():
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
@@ -9,5 +10,6 @@ def create_app():
|
|||||||
|
|
||||||
app.register_blueprint(main_bp)
|
app.register_blueprint(main_bp)
|
||||||
app.register_blueprint(rag_bp, url_prefix='/api/rag')
|
app.register_blueprint(rag_bp, url_prefix='/api/rag')
|
||||||
|
app.register_blueprint(gemini_bp, url_prefix='/api/gemini')
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|||||||
@@ -17,7 +17,9 @@ def get_collection(collection_name=COLLECTION_NAME):
|
|||||||
|
|
||||||
def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
|
def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
|
||||||
collection = get_collection(collection_name)
|
collection = get_collection(collection_name)
|
||||||
|
|
||||||
ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)]
|
ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)]
|
||||||
|
|
||||||
if metadata_list:
|
if metadata_list:
|
||||||
collection.add(
|
collection.add(
|
||||||
ids=ids,
|
ids=ids,
|
||||||
@@ -31,17 +33,22 @@ def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadat
|
|||||||
embeddings=embeddings,
|
embeddings=embeddings,
|
||||||
documents=texts
|
documents=texts
|
||||||
)
|
)
|
||||||
|
|
||||||
return len(texts)
|
return len(texts)
|
||||||
|
|
||||||
def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None):
|
def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None):
|
||||||
collection = get_collection(collection_name)
|
collection = get_collection(collection_name)
|
||||||
|
|
||||||
query_params = {
|
query_params = {
|
||||||
"query_embeddings": [query_embedding],
|
"query_embeddings": [query_embedding],
|
||||||
"n_results": num_results
|
"n_results": num_results
|
||||||
}
|
}
|
||||||
|
|
||||||
if filter_metadata:
|
if filter_metadata:
|
||||||
query_params["where"] = filter_metadata
|
query_params["where"] = filter_metadata
|
||||||
|
|
||||||
results = collection.query(**query_params)
|
results = collection.query(**query_params)
|
||||||
|
|
||||||
output = []
|
output = []
|
||||||
if results and results["documents"]:
|
if results and results["documents"]:
|
||||||
for i, doc in enumerate(results["documents"][0]):
|
for i, doc in enumerate(results["documents"][0]):
|
||||||
@@ -50,6 +57,7 @@ def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_resul
|
|||||||
"text": doc,
|
"text": doc,
|
||||||
"score": score
|
"score": score
|
||||||
})
|
})
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME):
|
def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME):
|
||||||
|
|||||||
@@ -0,0 +1,57 @@
|
|||||||
|
from src.rag.embeddings import get_embedding
|
||||||
|
from src.chroma.vector_store import search_documents
|
||||||
|
from .client import generate_content
|
||||||
|
|
||||||
|
GREENWASHING_ANALYSIS_PROMPT = """
|
||||||
|
You are an expert Environmental, Social, and Governance (ESG) Analyst specialized in detecting 'Greenwashing'.
|
||||||
|
Your task is to analyze the provided context from a company's data reports and determine if they are engaging in greenwashing.
|
||||||
|
|
||||||
|
Greenwashing is defined as making misleading or unsubstantiated claims about the environmental benefits of a product, service, or company practice.
|
||||||
|
|
||||||
|
Please evaluate the following:
|
||||||
|
1. Vague Claims: Are they using broad terms like 'eco-friendly' without specific details?
|
||||||
|
2. Lack of Proof: Are claims backed by data, third-party certifications, or specific metrics?
|
||||||
|
3. Hidden Trade-offs: Do they highlight one green act while ignoring a much larger environmental harm?
|
||||||
|
4. Symbolic Actions: Are they focusing on minor changes while their core business remains highly polluting?
|
||||||
|
|
||||||
|
Based on the context provided, give a final verdict:
|
||||||
|
- VERDICT: [Clear/Suspect/High Risk of Greenwashing]
|
||||||
|
- REASONING: [Explain your findings clearly]
|
||||||
|
- EVIDENCE: [Quote specific parts of the context if possible]
|
||||||
|
- BETTER ALTERNATIVES: [Suggest 2-3 similar companies or products that have verified sustainability records or higher transparency in this specific area]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def analyze_greenwashing(query, category=None, num_results=10):
|
||||||
|
try:
|
||||||
|
query_embedding = get_embedding(query)
|
||||||
|
|
||||||
|
filter_metadata = None
|
||||||
|
if category:
|
||||||
|
filter_metadata = {"category": category}
|
||||||
|
|
||||||
|
search_results = search_documents(
|
||||||
|
query_embedding,
|
||||||
|
num_results=num_results,
|
||||||
|
filter_metadata=filter_metadata
|
||||||
|
)
|
||||||
|
|
||||||
|
context = ""
|
||||||
|
if search_results:
|
||||||
|
context = "--- START OF REPORT CONTEXT ---\n"
|
||||||
|
for res in search_results:
|
||||||
|
context += f"RELEVANT DATA CHUNK: {res['text']}\n\n"
|
||||||
|
context += "--- END OF REPORT CONTEXT ---\n"
|
||||||
|
|
||||||
|
if context:
|
||||||
|
full_prompt = f"{GREENWASHING_ANALYSIS_PROMPT}\n\n{context}\n\nUSER QUERY/COMPANY FOCUS: {query}"
|
||||||
|
else:
|
||||||
|
return "No data found in the report to analyze. Please upload a report first."
|
||||||
|
|
||||||
|
response = generate_content(full_prompt)
|
||||||
|
return response
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return f"Error in Analysis process: {str(e)}"
|
||||||
|
|
||||||
|
def ask_gemini_with_rag(query, category=None, num_results=5):
|
||||||
|
return analyze_greenwashing(query, category, num_results)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
|
|||||||
api_key = os.environ.get("GOOGLE_API_KEY")
|
api_key = os.environ.get("GOOGLE_API_KEY")
|
||||||
if not api_key:
|
if not api_key:
|
||||||
return "Error: GOOGLE_API_KEY not found."
|
return "Error: GOOGLE_API_KEY not found."
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client = genai.Client(api_key=api_key)
|
client = genai.Client(api_key=api_key)
|
||||||
response = client.models.generate_content(
|
response = client.models.generate_content(
|
||||||
|
|||||||
62
backend/src/routes/gemini.py
Normal file
62
backend/src/routes/gemini.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
from flask import Blueprint, request, jsonify
|
||||||
|
from src.rag.gemeni import GeminiClient
|
||||||
|
from src.gemini import ask_gemini_with_rag
|
||||||
|
|
||||||
|
gemini_bp = Blueprint('gemini', __name__)
|
||||||
|
brain = None
|
||||||
|
|
||||||
|
def get_brain():
|
||||||
|
global brain
|
||||||
|
if brain is None:
|
||||||
|
brain = GeminiClient()
|
||||||
|
return brain
|
||||||
|
|
||||||
|
@gemini_bp.route('/ask', methods=['POST'])
|
||||||
|
def ask():
|
||||||
|
data = request.json
|
||||||
|
prompt = data.get("prompt")
|
||||||
|
context = data.get("context", "")
|
||||||
|
|
||||||
|
if not prompt:
|
||||||
|
return jsonify({"error": "No prompt provided"}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = get_brain()
|
||||||
|
response = client.ask(prompt, context)
|
||||||
|
return jsonify({
|
||||||
|
"status": "success",
|
||||||
|
"reply": response
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({
|
||||||
|
"status": "error",
|
||||||
|
"message": str(e)
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
@gemini_bp.route('/rag', methods=['POST'])
|
||||||
|
def rag():
|
||||||
|
data = request.json
|
||||||
|
prompt = data.get("prompt")
|
||||||
|
category = data.get("category")
|
||||||
|
|
||||||
|
if not prompt:
|
||||||
|
return jsonify({"error": "No prompt provided"}), 400
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = ask_gemini_with_rag(prompt, category=category)
|
||||||
|
return jsonify({
|
||||||
|
"status": "success",
|
||||||
|
"reply": response
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({
|
||||||
|
"status": "error",
|
||||||
|
"message": str(e)
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
@gemini_bp.route('/vision', methods=['POST'])
|
||||||
|
def vision():
|
||||||
|
return jsonify({
|
||||||
|
"status": "error",
|
||||||
|
"message": "Vision endpoint not yet implemented"
|
||||||
|
}), 501
|
||||||
Reference in New Issue
Block a user