|
|
|
|
@@ -0,0 +1,405 @@
|
|
|
|
|
"""
|
|
|
|
|
Incident Report API - Handles greenwashing report submissions
|
|
|
|
|
Uses structured outputs with Pydantic for reliable JSON responses
|
|
|
|
|
"""
|
|
|
|
|
import base64
|
|
|
|
|
import os
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from flask import Blueprint, request, jsonify
|
|
|
|
|
from google import genai
|
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
from typing import List, Optional, Literal
|
|
|
|
|
|
|
|
|
|
from src.ollama.detector import OllamaLogoDetector
|
|
|
|
|
from src.chroma.vector_store import search_documents, insert_documents
|
|
|
|
|
from src.rag.embeddings import get_embedding
|
|
|
|
|
from src.mongo.connection import get_mongo_client
|
|
|
|
|
|
|
|
|
|
incidents_bp = Blueprint('incidents', __name__)
|
|
|
|
|
|
|
|
|
|
# Initialize detector lazily
|
|
|
|
|
_detector = None
|
|
|
|
|
|
|
|
|
|
def get_detector():
|
|
|
|
|
global _detector
|
|
|
|
|
if _detector is None:
|
|
|
|
|
_detector = OllamaLogoDetector()
|
|
|
|
|
return _detector
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============= Pydantic Models for Structured Outputs =============
|
|
|
|
|
|
|
|
|
|
class GreenwashingAnalysis(BaseModel):
|
|
|
|
|
"""Structured output for greenwashing analysis"""
|
|
|
|
|
is_greenwashing: bool = Field(description="Whether this is a case of greenwashing")
|
|
|
|
|
confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of the analysis")
|
|
|
|
|
verdict: str = Field(description="Brief one-sentence verdict")
|
|
|
|
|
reasoning: str = Field(description="Detailed explanation of why this is or isn't greenwashing")
|
|
|
|
|
severity: Literal["high", "medium", "low"] = Field(description="Severity of the greenwashing if detected")
|
|
|
|
|
recommendations: str = Field(description="What consumers should know about this case")
|
|
|
|
|
key_claims: List[str] = Field(description="List of specific environmental claims made by the company")
|
|
|
|
|
red_flags: List[str] = Field(description="List of red flags or concerning practices identified")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LogoDetection(BaseModel):
|
|
|
|
|
"""Structured output for logo detection from Ollama"""
|
|
|
|
|
brand: str = Field(description="The company or brand name detected")
|
|
|
|
|
confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of detection")
|
|
|
|
|
location: str = Field(description="Location in image (e.g., center, top-left)")
|
|
|
|
|
category: str = Field(description="Product category if identifiable")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ImageAnalysis(BaseModel):
|
|
|
|
|
"""Structured output for full image analysis"""
|
|
|
|
|
logos_detected: List[LogoDetection] = Field(description="List of logos/brands detected in the image")
|
|
|
|
|
total_count: int = Field(description="Total number of logos detected")
|
|
|
|
|
description: str = Field(description="Brief description of what's in the image")
|
|
|
|
|
environmental_claims: List[str] = Field(description="Any environmental or eco-friendly claims visible in the image")
|
|
|
|
|
packaging_description: str = Field(description="Description of the product packaging and design")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============= Analysis Functions =============
|
|
|
|
|
|
|
|
|
|
GREENWASHING_ANALYSIS_PROMPT = """You are an expert at detecting greenwashing - misleading environmental claims by companies.
|
|
|
|
|
|
|
|
|
|
Analyze the following user-submitted report about a potential greenwashing incident:
|
|
|
|
|
|
|
|
|
|
PRODUCT/COMPANY: {product_name}
|
|
|
|
|
USER COMPLAINT: {user_description}
|
|
|
|
|
DETECTED BRAND FROM IMAGE: {detected_brand}
|
|
|
|
|
IMAGE DESCRIPTION: {image_description}
|
|
|
|
|
|
|
|
|
|
RELEVANT CONTEXT FROM OUR DATABASE:
|
|
|
|
|
{context}
|
|
|
|
|
|
|
|
|
|
Based on this information, determine if this is a valid case of greenwashing. Consider:
|
|
|
|
|
1. Does the company have a history of misleading environmental claims?
|
|
|
|
|
2. Are their eco-friendly claims vague or unsubstantiated?
|
|
|
|
|
3. Is there a disconnect between their marketing and actual practices?
|
|
|
|
|
4. Are they using green imagery or terms without substance?
|
|
|
|
|
|
|
|
|
|
Provide your analysis in the structured format requested."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_with_gemini(product_name: str, user_description: str, detected_brand: str,
|
|
|
|
|
image_description: str, context: str) -> GreenwashingAnalysis:
|
|
|
|
|
"""Send analysis request to Gemini with structured output"""
|
|
|
|
|
api_key = os.environ.get("GOOGLE_API_KEY")
|
|
|
|
|
if not api_key:
|
|
|
|
|
raise ValueError("GOOGLE_API_KEY not set")
|
|
|
|
|
|
|
|
|
|
prompt = GREENWASHING_ANALYSIS_PROMPT.format(
|
|
|
|
|
product_name=product_name,
|
|
|
|
|
user_description=user_description,
|
|
|
|
|
detected_brand=detected_brand,
|
|
|
|
|
image_description=image_description,
|
|
|
|
|
context=context
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
client = genai.Client(api_key=api_key)
|
|
|
|
|
|
|
|
|
|
# Use structured output with Pydantic schema
|
|
|
|
|
response = client.models.generate_content(
|
|
|
|
|
model="gemini-3-flash-preview",
|
|
|
|
|
contents=prompt,
|
|
|
|
|
config={
|
|
|
|
|
"response_mime_type": "application/json",
|
|
|
|
|
"response_json_schema": GreenwashingAnalysis.model_json_schema(),
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Validate and parse the response
|
|
|
|
|
analysis = GreenwashingAnalysis.model_validate_json(response.text)
|
|
|
|
|
return analysis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_image_with_ollama(image_bytes: bytes) -> ImageAnalysis:
|
|
|
|
|
"""Analyze image using Ollama with structured output"""
|
|
|
|
|
try:
|
|
|
|
|
import ollama
|
|
|
|
|
|
|
|
|
|
client = ollama.Client(host="https://ollama.sirblob.co")
|
|
|
|
|
|
|
|
|
|
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
|
|
|
|
|
|
|
|
|
prompt = """Analyze this image for a greenwashing detection system.
|
|
|
|
|
|
|
|
|
|
Identify:
|
|
|
|
|
1. All visible logos, brand names, and company names
|
|
|
|
|
2. Any environmental or eco-friendly claims (text, symbols, certifications)
|
|
|
|
|
3. Describe the packaging design and any "green" visual elements
|
|
|
|
|
|
|
|
|
|
Respond with structured JSON matching the schema provided."""
|
|
|
|
|
|
|
|
|
|
response = client.chat(
|
|
|
|
|
model="ministral-3:latest",
|
|
|
|
|
messages=[{
|
|
|
|
|
'role': 'user',
|
|
|
|
|
'content': prompt,
|
|
|
|
|
'images': [image_base64],
|
|
|
|
|
}],
|
|
|
|
|
format=ImageAnalysis.model_json_schema(),
|
|
|
|
|
options={'temperature': 0.1}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Validate and parse
|
|
|
|
|
analysis = ImageAnalysis.model_validate_json(response['message']['content'])
|
|
|
|
|
return analysis
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Ollama structured analysis failed: {e}")
|
|
|
|
|
# Fall back to basic detection
|
|
|
|
|
detector = get_detector()
|
|
|
|
|
result = detector.detect_from_bytes(image_bytes)
|
|
|
|
|
|
|
|
|
|
# Convert to structured format
|
|
|
|
|
logos = []
|
|
|
|
|
for logo in result.get('logos_detected', []):
|
|
|
|
|
logos.append(LogoDetection(
|
|
|
|
|
brand=logo.get('brand', 'Unknown'),
|
|
|
|
|
confidence=logo.get('confidence', 'low'),
|
|
|
|
|
location=logo.get('location', 'unknown'),
|
|
|
|
|
category=logo.get('category', 'unknown')
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
return ImageAnalysis(
|
|
|
|
|
logos_detected=logos,
|
|
|
|
|
total_count=result.get('total_count', 0),
|
|
|
|
|
description=result.get('description', 'No description available'),
|
|
|
|
|
environmental_claims=[],
|
|
|
|
|
packaging_description=""
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_to_mongodb(incident_data: dict) -> str:
|
|
|
|
|
"""Save incident to MongoDB and return the ID"""
|
|
|
|
|
client = get_mongo_client()
|
|
|
|
|
db = client["ethix"]
|
|
|
|
|
collection = db["incidents"]
|
|
|
|
|
|
|
|
|
|
result = collection.insert_one(incident_data)
|
|
|
|
|
return str(result.inserted_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_to_chromadb(incident_data: dict, incident_id: str):
|
|
|
|
|
"""Save incident as context for the chatbot"""
|
|
|
|
|
analysis = incident_data['analysis']
|
|
|
|
|
|
|
|
|
|
# Create a rich text representation of the incident
|
|
|
|
|
red_flags = "\n".join(f"- {flag}" for flag in analysis.get('red_flags', []))
|
|
|
|
|
key_claims = "\n".join(f"- {claim}" for claim in analysis.get('key_claims', []))
|
|
|
|
|
|
|
|
|
|
text = f"""GREENWASHING INCIDENT REPORT #{incident_id}
|
|
|
|
|
Date: {incident_data['created_at']}
|
|
|
|
|
Company/Product: {incident_data['product_name']} ({incident_data.get('detected_brand', 'Unknown brand')})
|
|
|
|
|
|
|
|
|
|
USER REPORT: {incident_data['user_description']}
|
|
|
|
|
|
|
|
|
|
ANALYSIS VERDICT: {analysis['verdict']}
|
|
|
|
|
Confidence: {analysis['confidence']}
|
|
|
|
|
Severity: {analysis['severity']}
|
|
|
|
|
|
|
|
|
|
DETAILED REASONING:
|
|
|
|
|
{analysis['reasoning']}
|
|
|
|
|
|
|
|
|
|
KEY ENVIRONMENTAL CLAIMS MADE:
|
|
|
|
|
{key_claims}
|
|
|
|
|
|
|
|
|
|
RED FLAGS IDENTIFIED:
|
|
|
|
|
{red_flags}
|
|
|
|
|
|
|
|
|
|
CONSUMER RECOMMENDATIONS:
|
|
|
|
|
{analysis['recommendations']}
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
# Get embedding for the incident
|
|
|
|
|
embedding = get_embedding(text)
|
|
|
|
|
|
|
|
|
|
# Store in ChromaDB with metadata
|
|
|
|
|
metadata = {
|
|
|
|
|
"type": "incident_report",
|
|
|
|
|
"source": f"incident_{incident_id}",
|
|
|
|
|
"product_name": incident_data['product_name'],
|
|
|
|
|
"brand": incident_data.get('detected_brand', 'Unknown'),
|
|
|
|
|
"severity": analysis['severity'],
|
|
|
|
|
"confidence": analysis['confidence'],
|
|
|
|
|
"is_greenwashing": True,
|
|
|
|
|
"created_at": incident_data['created_at']
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
insert_documents(
|
|
|
|
|
texts=[text],
|
|
|
|
|
embeddings=[embedding],
|
|
|
|
|
metadata_list=[metadata]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============= API Endpoints =============
|
|
|
|
|
|
|
|
|
|
@incidents_bp.route('/submit', methods=['POST'])
|
|
|
|
|
def submit_incident():
|
|
|
|
|
"""
|
|
|
|
|
Submit a greenwashing incident report
|
|
|
|
|
|
|
|
|
|
Expects JSON with:
|
|
|
|
|
- product_name: Name of the product/company
|
|
|
|
|
- description: User's description of the misleading claim
|
|
|
|
|
- image: Base64 encoded image (optional, but recommended)
|
|
|
|
|
"""
|
|
|
|
|
data = request.json
|
|
|
|
|
|
|
|
|
|
if not data:
|
|
|
|
|
return jsonify({"error": "No data provided"}), 400
|
|
|
|
|
|
|
|
|
|
product_name = data.get('product_name', '').strip()
|
|
|
|
|
user_description = data.get('description', '').strip()
|
|
|
|
|
image_base64 = data.get('image') # Base64 encoded image
|
|
|
|
|
|
|
|
|
|
if not product_name:
|
|
|
|
|
return jsonify({"error": "Product name is required"}), 400
|
|
|
|
|
|
|
|
|
|
if not user_description:
|
|
|
|
|
return jsonify({"error": "Description is required"}), 400
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Step 1: Analyze image with Ollama (structured output)
|
|
|
|
|
detected_brand = "Unknown"
|
|
|
|
|
image_description = "No image provided"
|
|
|
|
|
environmental_claims = []
|
|
|
|
|
|
|
|
|
|
if image_base64:
|
|
|
|
|
try:
|
|
|
|
|
# Remove data URL prefix if present
|
|
|
|
|
if ',' in image_base64:
|
|
|
|
|
image_base64 = image_base64.split(',')[1]
|
|
|
|
|
|
|
|
|
|
image_bytes = base64.b64decode(image_base64)
|
|
|
|
|
|
|
|
|
|
# Use structured image analysis
|
|
|
|
|
image_analysis = analyze_image_with_ollama(image_bytes)
|
|
|
|
|
|
|
|
|
|
if image_analysis.logos_detected:
|
|
|
|
|
detected_brand = image_analysis.logos_detected[0].brand
|
|
|
|
|
|
|
|
|
|
image_description = image_analysis.description
|
|
|
|
|
environmental_claims = image_analysis.environmental_claims
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Image analysis error: {e}")
|
|
|
|
|
# Continue without image analysis
|
|
|
|
|
|
|
|
|
|
# Step 2: Get relevant context from vector database
|
|
|
|
|
search_query = f"{product_name} {detected_brand} environmental claims sustainability greenwashing"
|
|
|
|
|
query_embedding = get_embedding(search_query)
|
|
|
|
|
search_results = search_documents(query_embedding, num_results=5)
|
|
|
|
|
|
|
|
|
|
context = ""
|
|
|
|
|
for res in search_results:
|
|
|
|
|
context += f"--- Document ---\n{res['text'][:500]}\n\n"
|
|
|
|
|
|
|
|
|
|
if not context:
|
|
|
|
|
context = "No prior information found about this company in our database."
|
|
|
|
|
|
|
|
|
|
# Add environmental claims from image to context
|
|
|
|
|
if environmental_claims:
|
|
|
|
|
context += "\n--- Claims visible in submitted image ---\n"
|
|
|
|
|
context += "\n".join(f"- {claim}" for claim in environmental_claims)
|
|
|
|
|
|
|
|
|
|
# Step 3: Analyze with Gemini (structured output)
|
|
|
|
|
analysis = analyze_with_gemini(
|
|
|
|
|
product_name=product_name,
|
|
|
|
|
user_description=user_description,
|
|
|
|
|
detected_brand=detected_brand,
|
|
|
|
|
image_description=image_description,
|
|
|
|
|
context=context
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Convert Pydantic model to dict
|
|
|
|
|
analysis_dict = analysis.model_dump()
|
|
|
|
|
|
|
|
|
|
# Step 4: Prepare incident data
|
|
|
|
|
incident_data = {
|
|
|
|
|
"product_name": product_name,
|
|
|
|
|
"user_description": user_description,
|
|
|
|
|
"detected_brand": detected_brand,
|
|
|
|
|
"image_description": image_description,
|
|
|
|
|
"environmental_claims": environmental_claims,
|
|
|
|
|
"analysis": analysis_dict,
|
|
|
|
|
"is_greenwashing": analysis.is_greenwashing,
|
|
|
|
|
"created_at": datetime.utcnow().isoformat(),
|
|
|
|
|
"status": "confirmed" if analysis.is_greenwashing else "dismissed"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
incident_id = None
|
|
|
|
|
|
|
|
|
|
# Step 5: If greenwashing detected, save to databases
|
|
|
|
|
if analysis.is_greenwashing:
|
|
|
|
|
# Save to MongoDB
|
|
|
|
|
incident_id = save_to_mongodb(incident_data)
|
|
|
|
|
|
|
|
|
|
# Save to ChromaDB for chatbot context
|
|
|
|
|
save_to_chromadb(incident_data, incident_id)
|
|
|
|
|
|
|
|
|
|
return jsonify({
|
|
|
|
|
"status": "success",
|
|
|
|
|
"is_greenwashing": analysis.is_greenwashing,
|
|
|
|
|
"incident_id": incident_id,
|
|
|
|
|
"analysis": analysis_dict,
|
|
|
|
|
"detected_brand": detected_brand,
|
|
|
|
|
"environmental_claims": environmental_claims
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
import traceback
|
|
|
|
|
traceback.print_exc()
|
|
|
|
|
return jsonify({
|
|
|
|
|
"status": "error",
|
|
|
|
|
"message": str(e)
|
|
|
|
|
}), 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@incidents_bp.route('/list', methods=['GET'])
|
|
|
|
|
def list_incidents():
|
|
|
|
|
"""Get all confirmed greenwashing incidents"""
|
|
|
|
|
try:
|
|
|
|
|
client = get_mongo_client()
|
|
|
|
|
db = client["ethix"]
|
|
|
|
|
collection = db["incidents"]
|
|
|
|
|
|
|
|
|
|
# Get recent incidents with full analysis details
|
|
|
|
|
incidents = list(collection.find(
|
|
|
|
|
{"is_greenwashing": True},
|
|
|
|
|
{"_id": 1, "product_name": 1, "detected_brand": 1,
|
|
|
|
|
"user_description": 1, "analysis": 1, "created_at": 1}
|
|
|
|
|
).sort("created_at", -1).limit(50))
|
|
|
|
|
|
|
|
|
|
# Convert ObjectId to string
|
|
|
|
|
for inc in incidents:
|
|
|
|
|
inc["_id"] = str(inc["_id"])
|
|
|
|
|
|
|
|
|
|
return jsonify(incidents)
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@incidents_bp.route('/<incident_id>', methods=['GET'])
|
|
|
|
|
def get_incident(incident_id):
|
|
|
|
|
"""Get a specific incident by ID"""
|
|
|
|
|
try:
|
|
|
|
|
from bson import ObjectId
|
|
|
|
|
|
|
|
|
|
client = get_mongo_client()
|
|
|
|
|
db = client["ethix"]
|
|
|
|
|
collection = db["incidents"]
|
|
|
|
|
|
|
|
|
|
incident = collection.find_one({"_id": ObjectId(incident_id)})
|
|
|
|
|
|
|
|
|
|
if not incident:
|
|
|
|
|
return jsonify({"error": "Incident not found"}), 404
|
|
|
|
|
|
|
|
|
|
incident["_id"] = str(incident["_id"])
|
|
|
|
|
return jsonify(incident)
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return jsonify({"error": str(e)}), 500
|