mirror of
https://github.com/SirBlobby/Hoya26.git
synced 2026-02-04 11:44:34 -05:00
Database Viewer Update
This commit is contained in:
405
backend/src/routes/incidents.py
Normal file
405
backend/src/routes/incidents.py
Normal file
@@ -0,0 +1,405 @@
|
||||
"""
|
||||
Incident Report API - Handles greenwashing report submissions
|
||||
Uses structured outputs with Pydantic for reliable JSON responses
|
||||
"""
|
||||
import base64
|
||||
import os
|
||||
from datetime import datetime
|
||||
from flask import Blueprint, request, jsonify
|
||||
from google import genai
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional, Literal
|
||||
|
||||
from src.ollama.detector import OllamaLogoDetector
|
||||
from src.chroma.vector_store import search_documents, insert_documents
|
||||
from src.rag.embeddings import get_embedding
|
||||
from src.mongo.connection import get_mongo_client
|
||||
|
||||
incidents_bp = Blueprint('incidents', __name__)
|
||||
|
||||
# Initialize detector lazily
|
||||
_detector = None
|
||||
|
||||
def get_detector():
|
||||
global _detector
|
||||
if _detector is None:
|
||||
_detector = OllamaLogoDetector()
|
||||
return _detector
|
||||
|
||||
|
||||
# ============= Pydantic Models for Structured Outputs =============
|
||||
|
||||
class GreenwashingAnalysis(BaseModel):
|
||||
"""Structured output for greenwashing analysis"""
|
||||
is_greenwashing: bool = Field(description="Whether this is a case of greenwashing")
|
||||
confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of the analysis")
|
||||
verdict: str = Field(description="Brief one-sentence verdict")
|
||||
reasoning: str = Field(description="Detailed explanation of why this is or isn't greenwashing")
|
||||
severity: Literal["high", "medium", "low"] = Field(description="Severity of the greenwashing if detected")
|
||||
recommendations: str = Field(description="What consumers should know about this case")
|
||||
key_claims: List[str] = Field(description="List of specific environmental claims made by the company")
|
||||
red_flags: List[str] = Field(description="List of red flags or concerning practices identified")
|
||||
|
||||
|
||||
class LogoDetection(BaseModel):
|
||||
"""Structured output for logo detection from Ollama"""
|
||||
brand: str = Field(description="The company or brand name detected")
|
||||
confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of detection")
|
||||
location: str = Field(description="Location in image (e.g., center, top-left)")
|
||||
category: str = Field(description="Product category if identifiable")
|
||||
|
||||
|
||||
class ImageAnalysis(BaseModel):
|
||||
"""Structured output for full image analysis"""
|
||||
logos_detected: List[LogoDetection] = Field(description="List of logos/brands detected in the image")
|
||||
total_count: int = Field(description="Total number of logos detected")
|
||||
description: str = Field(description="Brief description of what's in the image")
|
||||
environmental_claims: List[str] = Field(description="Any environmental or eco-friendly claims visible in the image")
|
||||
packaging_description: str = Field(description="Description of the product packaging and design")
|
||||
|
||||
|
||||
# ============= Analysis Functions =============
|
||||
|
||||
GREENWASHING_ANALYSIS_PROMPT = """You are an expert at detecting greenwashing - misleading environmental claims by companies.
|
||||
|
||||
Analyze the following user-submitted report about a potential greenwashing incident:
|
||||
|
||||
PRODUCT/COMPANY: {product_name}
|
||||
USER COMPLAINT: {user_description}
|
||||
DETECTED BRAND FROM IMAGE: {detected_brand}
|
||||
IMAGE DESCRIPTION: {image_description}
|
||||
|
||||
RELEVANT CONTEXT FROM OUR DATABASE:
|
||||
{context}
|
||||
|
||||
Based on this information, determine if this is a valid case of greenwashing. Consider:
|
||||
1. Does the company have a history of misleading environmental claims?
|
||||
2. Are their eco-friendly claims vague or unsubstantiated?
|
||||
3. Is there a disconnect between their marketing and actual practices?
|
||||
4. Are they using green imagery or terms without substance?
|
||||
|
||||
Provide your analysis in the structured format requested."""
|
||||
|
||||
|
||||
def analyze_with_gemini(product_name: str, user_description: str, detected_brand: str,
|
||||
image_description: str, context: str) -> GreenwashingAnalysis:
|
||||
"""Send analysis request to Gemini with structured output"""
|
||||
api_key = os.environ.get("GOOGLE_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("GOOGLE_API_KEY not set")
|
||||
|
||||
prompt = GREENWASHING_ANALYSIS_PROMPT.format(
|
||||
product_name=product_name,
|
||||
user_description=user_description,
|
||||
detected_brand=detected_brand,
|
||||
image_description=image_description,
|
||||
context=context
|
||||
)
|
||||
|
||||
client = genai.Client(api_key=api_key)
|
||||
|
||||
# Use structured output with Pydantic schema
|
||||
response = client.models.generate_content(
|
||||
model="gemini-3-flash-preview",
|
||||
contents=prompt,
|
||||
config={
|
||||
"response_mime_type": "application/json",
|
||||
"response_json_schema": GreenwashingAnalysis.model_json_schema(),
|
||||
}
|
||||
)
|
||||
|
||||
# Validate and parse the response
|
||||
analysis = GreenwashingAnalysis.model_validate_json(response.text)
|
||||
return analysis
|
||||
|
||||
|
||||
def analyze_image_with_ollama(image_bytes: bytes) -> ImageAnalysis:
|
||||
"""Analyze image using Ollama with structured output"""
|
||||
try:
|
||||
import ollama
|
||||
|
||||
client = ollama.Client(host="https://ollama.sirblob.co")
|
||||
|
||||
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
||||
|
||||
prompt = """Analyze this image for a greenwashing detection system.
|
||||
|
||||
Identify:
|
||||
1. All visible logos, brand names, and company names
|
||||
2. Any environmental or eco-friendly claims (text, symbols, certifications)
|
||||
3. Describe the packaging design and any "green" visual elements
|
||||
|
||||
Respond with structured JSON matching the schema provided."""
|
||||
|
||||
response = client.chat(
|
||||
model="ministral-3:latest",
|
||||
messages=[{
|
||||
'role': 'user',
|
||||
'content': prompt,
|
||||
'images': [image_base64],
|
||||
}],
|
||||
format=ImageAnalysis.model_json_schema(),
|
||||
options={'temperature': 0.1}
|
||||
)
|
||||
|
||||
# Validate and parse
|
||||
analysis = ImageAnalysis.model_validate_json(response['message']['content'])
|
||||
return analysis
|
||||
|
||||
except Exception as e:
|
||||
print(f"Ollama structured analysis failed: {e}")
|
||||
# Fall back to basic detection
|
||||
detector = get_detector()
|
||||
result = detector.detect_from_bytes(image_bytes)
|
||||
|
||||
# Convert to structured format
|
||||
logos = []
|
||||
for logo in result.get('logos_detected', []):
|
||||
logos.append(LogoDetection(
|
||||
brand=logo.get('brand', 'Unknown'),
|
||||
confidence=logo.get('confidence', 'low'),
|
||||
location=logo.get('location', 'unknown'),
|
||||
category=logo.get('category', 'unknown')
|
||||
))
|
||||
|
||||
return ImageAnalysis(
|
||||
logos_detected=logos,
|
||||
total_count=result.get('total_count', 0),
|
||||
description=result.get('description', 'No description available'),
|
||||
environmental_claims=[],
|
||||
packaging_description=""
|
||||
)
|
||||
|
||||
|
||||
def save_to_mongodb(incident_data: dict) -> str:
|
||||
"""Save incident to MongoDB and return the ID"""
|
||||
client = get_mongo_client()
|
||||
db = client["ethix"]
|
||||
collection = db["incidents"]
|
||||
|
||||
result = collection.insert_one(incident_data)
|
||||
return str(result.inserted_id)
|
||||
|
||||
|
||||
def save_to_chromadb(incident_data: dict, incident_id: str):
|
||||
"""Save incident as context for the chatbot"""
|
||||
analysis = incident_data['analysis']
|
||||
|
||||
# Create a rich text representation of the incident
|
||||
red_flags = "\n".join(f"- {flag}" for flag in analysis.get('red_flags', []))
|
||||
key_claims = "\n".join(f"- {claim}" for claim in analysis.get('key_claims', []))
|
||||
|
||||
text = f"""GREENWASHING INCIDENT REPORT #{incident_id}
|
||||
Date: {incident_data['created_at']}
|
||||
Company/Product: {incident_data['product_name']} ({incident_data.get('detected_brand', 'Unknown brand')})
|
||||
|
||||
USER REPORT: {incident_data['user_description']}
|
||||
|
||||
ANALYSIS VERDICT: {analysis['verdict']}
|
||||
Confidence: {analysis['confidence']}
|
||||
Severity: {analysis['severity']}
|
||||
|
||||
DETAILED REASONING:
|
||||
{analysis['reasoning']}
|
||||
|
||||
KEY ENVIRONMENTAL CLAIMS MADE:
|
||||
{key_claims}
|
||||
|
||||
RED FLAGS IDENTIFIED:
|
||||
{red_flags}
|
||||
|
||||
CONSUMER RECOMMENDATIONS:
|
||||
{analysis['recommendations']}
|
||||
"""
|
||||
|
||||
# Get embedding for the incident
|
||||
embedding = get_embedding(text)
|
||||
|
||||
# Store in ChromaDB with metadata
|
||||
metadata = {
|
||||
"type": "incident_report",
|
||||
"source": f"incident_{incident_id}",
|
||||
"product_name": incident_data['product_name'],
|
||||
"brand": incident_data.get('detected_brand', 'Unknown'),
|
||||
"severity": analysis['severity'],
|
||||
"confidence": analysis['confidence'],
|
||||
"is_greenwashing": True,
|
||||
"created_at": incident_data['created_at']
|
||||
}
|
||||
|
||||
insert_documents(
|
||||
texts=[text],
|
||||
embeddings=[embedding],
|
||||
metadata_list=[metadata]
|
||||
)
|
||||
|
||||
|
||||
# ============= API Endpoints =============
|
||||
|
||||
@incidents_bp.route('/submit', methods=['POST'])
|
||||
def submit_incident():
|
||||
"""
|
||||
Submit a greenwashing incident report
|
||||
|
||||
Expects JSON with:
|
||||
- product_name: Name of the product/company
|
||||
- description: User's description of the misleading claim
|
||||
- image: Base64 encoded image (optional, but recommended)
|
||||
"""
|
||||
data = request.json
|
||||
|
||||
if not data:
|
||||
return jsonify({"error": "No data provided"}), 400
|
||||
|
||||
product_name = data.get('product_name', '').strip()
|
||||
user_description = data.get('description', '').strip()
|
||||
image_base64 = data.get('image') # Base64 encoded image
|
||||
|
||||
if not product_name:
|
||||
return jsonify({"error": "Product name is required"}), 400
|
||||
|
||||
if not user_description:
|
||||
return jsonify({"error": "Description is required"}), 400
|
||||
|
||||
try:
|
||||
# Step 1: Analyze image with Ollama (structured output)
|
||||
detected_brand = "Unknown"
|
||||
image_description = "No image provided"
|
||||
environmental_claims = []
|
||||
|
||||
if image_base64:
|
||||
try:
|
||||
# Remove data URL prefix if present
|
||||
if ',' in image_base64:
|
||||
image_base64 = image_base64.split(',')[1]
|
||||
|
||||
image_bytes = base64.b64decode(image_base64)
|
||||
|
||||
# Use structured image analysis
|
||||
image_analysis = analyze_image_with_ollama(image_bytes)
|
||||
|
||||
if image_analysis.logos_detected:
|
||||
detected_brand = image_analysis.logos_detected[0].brand
|
||||
|
||||
image_description = image_analysis.description
|
||||
environmental_claims = image_analysis.environmental_claims
|
||||
|
||||
except Exception as e:
|
||||
print(f"Image analysis error: {e}")
|
||||
# Continue without image analysis
|
||||
|
||||
# Step 2: Get relevant context from vector database
|
||||
search_query = f"{product_name} {detected_brand} environmental claims sustainability greenwashing"
|
||||
query_embedding = get_embedding(search_query)
|
||||
search_results = search_documents(query_embedding, num_results=5)
|
||||
|
||||
context = ""
|
||||
for res in search_results:
|
||||
context += f"--- Document ---\n{res['text'][:500]}\n\n"
|
||||
|
||||
if not context:
|
||||
context = "No prior information found about this company in our database."
|
||||
|
||||
# Add environmental claims from image to context
|
||||
if environmental_claims:
|
||||
context += "\n--- Claims visible in submitted image ---\n"
|
||||
context += "\n".join(f"- {claim}" for claim in environmental_claims)
|
||||
|
||||
# Step 3: Analyze with Gemini (structured output)
|
||||
analysis = analyze_with_gemini(
|
||||
product_name=product_name,
|
||||
user_description=user_description,
|
||||
detected_brand=detected_brand,
|
||||
image_description=image_description,
|
||||
context=context
|
||||
)
|
||||
|
||||
# Convert Pydantic model to dict
|
||||
analysis_dict = analysis.model_dump()
|
||||
|
||||
# Step 4: Prepare incident data
|
||||
incident_data = {
|
||||
"product_name": product_name,
|
||||
"user_description": user_description,
|
||||
"detected_brand": detected_brand,
|
||||
"image_description": image_description,
|
||||
"environmental_claims": environmental_claims,
|
||||
"analysis": analysis_dict,
|
||||
"is_greenwashing": analysis.is_greenwashing,
|
||||
"created_at": datetime.utcnow().isoformat(),
|
||||
"status": "confirmed" if analysis.is_greenwashing else "dismissed"
|
||||
}
|
||||
|
||||
incident_id = None
|
||||
|
||||
# Step 5: If greenwashing detected, save to databases
|
||||
if analysis.is_greenwashing:
|
||||
# Save to MongoDB
|
||||
incident_id = save_to_mongodb(incident_data)
|
||||
|
||||
# Save to ChromaDB for chatbot context
|
||||
save_to_chromadb(incident_data, incident_id)
|
||||
|
||||
return jsonify({
|
||||
"status": "success",
|
||||
"is_greenwashing": analysis.is_greenwashing,
|
||||
"incident_id": incident_id,
|
||||
"analysis": analysis_dict,
|
||||
"detected_brand": detected_brand,
|
||||
"environmental_claims": environmental_claims
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return jsonify({
|
||||
"status": "error",
|
||||
"message": str(e)
|
||||
}), 500
|
||||
|
||||
|
||||
@incidents_bp.route('/list', methods=['GET'])
|
||||
def list_incidents():
|
||||
"""Get all confirmed greenwashing incidents"""
|
||||
try:
|
||||
client = get_mongo_client()
|
||||
db = client["ethix"]
|
||||
collection = db["incidents"]
|
||||
|
||||
# Get recent incidents with full analysis details
|
||||
incidents = list(collection.find(
|
||||
{"is_greenwashing": True},
|
||||
{"_id": 1, "product_name": 1, "detected_brand": 1,
|
||||
"user_description": 1, "analysis": 1, "created_at": 1}
|
||||
).sort("created_at", -1).limit(50))
|
||||
|
||||
# Convert ObjectId to string
|
||||
for inc in incidents:
|
||||
inc["_id"] = str(inc["_id"])
|
||||
|
||||
return jsonify(incidents)
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@incidents_bp.route('/<incident_id>', methods=['GET'])
|
||||
def get_incident(incident_id):
|
||||
"""Get a specific incident by ID"""
|
||||
try:
|
||||
from bson import ObjectId
|
||||
|
||||
client = get_mongo_client()
|
||||
db = client["ethix"]
|
||||
collection = db["incidents"]
|
||||
|
||||
incident = collection.find_one({"_id": ObjectId(incident_id)})
|
||||
|
||||
if not incident:
|
||||
return jsonify({"error": "Incident not found"}), 404
|
||||
|
||||
incident["_id"] = str(incident["_id"])
|
||||
return jsonify(incident)
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
Reference in New Issue
Block a user