Files
Hoya26/backend/src/routes/incidents.py

496 lines
16 KiB
Python

"""
Incident Report API - Handles greenwashing report submissions
Uses structured outputs with Pydantic for reliable JSON responses
"""
import base64
import os
import cv2
import numpy as np
from datetime import datetime
from flask import Blueprint ,request ,jsonify
from google import genai
from pydantic import BaseModel ,Field
from typing import List ,Optional ,Literal
from src .ollama .detector import OllamaLogoDetector
from src .chroma .vector_store import search_documents ,insert_documents
from src .rag .embeddings import get_embedding
from src .mongo .connection import get_mongo_client
incidents_bp =Blueprint ('incidents',__name__ )
_detector =None
def get_detector ():
global _detector
if _detector is None :
_detector =OllamaLogoDetector ()
return _detector
def compress_image (image_bytes :bytes ,max_width :int =800 ,quality :int =85 )->str :
"""
Compress image using OpenCV and return Base64 string
Args:
image_bytes: Original image bytes
max_width: Maximum width for resized image
quality: JPEG quality (1-100)
Returns:
Base64 encoded compressed image
"""
try :
nparr =np .frombuffer (image_bytes ,np .uint8 )
img =cv2 .imdecode (nparr ,cv2 .IMREAD_COLOR )
if img is None :
raise ValueError ("Failed to decode image")
height ,width =img .shape [:2 ]
if width >max_width :
ratio =max_width /width
new_width =max_width
new_height =int (height *ratio )
img =cv2 .resize (img ,(new_width ,new_height ),interpolation =cv2 .INTER_AREA )
encode_param =[int (cv2 .IMWRITE_JPEG_QUALITY ),quality ]
_ ,buffer =cv2 .imencode ('.jpg',img ,encode_param )
compressed_base64 =base64 .b64encode (buffer ).decode ('utf-8')
return compressed_base64
except Exception as e :
print (f"Image compression error: {e }")
return base64 .b64encode (image_bytes ).decode ('utf-8')
class GreenwashingAnalysis (BaseModel ):
"""Structured output for greenwashing analysis"""
is_greenwashing :bool =Field (description ="Whether this is a case of greenwashing")
confidence :Literal ["high","medium","low"]=Field (description ="Confidence level of the analysis")
verdict :str =Field (description ="Brief one-sentence verdict")
reasoning :str =Field (description ="Detailed explanation of why this is or isn't greenwashing")
severity :Literal ["high","medium","low"]=Field (description ="Severity of the greenwashing if detected")
recommendations :str =Field (description ="What consumers should know about this case")
key_claims :List [str ]=Field (description ="List of specific environmental claims made by the company")
red_flags :List [str ]=Field (description ="List of red flags or concerning practices identified")
class LogoDetection (BaseModel ):
"""Structured output for logo detection from Ollama"""
brand :str =Field (description ="The company or brand name detected")
confidence :Literal ["high","medium","low"]=Field (description ="Confidence level of detection")
location :str =Field (description ="Location in image (e.g., center, top-left)")
category :str =Field (description ="Product category if identifiable")
class ImageAnalysis (BaseModel ):
"""Structured output for full image analysis"""
logos_detected :List [LogoDetection ]=Field (description ="List of logos/brands detected in the image")
total_count :int =Field (description ="Total number of logos detected")
description :str =Field (description ="Brief description of what's in the image")
environmental_claims :List [str ]=Field (description ="Any environmental or eco-friendly claims visible in the image")
packaging_description :str =Field (description ="Description of the product packaging and design")
GREENWASHING_ANALYSIS_PROMPT ="""You are an expert at detecting greenwashing - misleading environmental claims by companies.
Analyze the following user-submitted report about a potential greenwashing incident:
PRODUCT/COMPANY: {product_name}
USER COMPLAINT: {user_description}
DETECTED BRAND FROM IMAGE: {detected_brand}
IMAGE DESCRIPTION: {image_description}
RELEVANT CONTEXT FROM OUR DATABASE:
{context}
Based on this information, determine if this is a valid case of greenwashing. Consider:
1. Does the company have a history of misleading environmental claims?
2. Are their eco-friendly claims vague or unsubstantiated?
3. Is there a disconnect between their marketing and actual practices?
4. Are they using green imagery or terms without substance?
Provide your analysis in the structured format requested."""
def analyze_with_gemini (product_name :str ,user_description :str ,detected_brand :str ,
image_description :str ,context :str )->GreenwashingAnalysis :
"""Send analysis request to Gemini with structured output"""
api_key =os .environ .get ("GOOGLE_API_KEY")
if not api_key :
raise ValueError ("GOOGLE_API_KEY not set")
prompt =GREENWASHING_ANALYSIS_PROMPT .format (
product_name =product_name ,
user_description =user_description ,
detected_brand =detected_brand ,
image_description =image_description ,
context =context
)
client =genai .Client (api_key =api_key )
response =client .models .generate_content (
model ="gemini-3-pro-preview",
contents =prompt ,
config ={
"response_mime_type":"application/json",
"response_json_schema":GreenwashingAnalysis .model_json_schema (),
}
)
analysis =GreenwashingAnalysis .model_validate_json (response .text )
return analysis
def analyze_image_with_ollama (image_bytes :bytes )->ImageAnalysis :
"""Analyze image using Ollama with structured output"""
try :
import ollama
client =ollama .Client (host ="https://ollama.sirblob.co")
image_base64 =base64 .b64encode (image_bytes ).decode ('utf-8')
prompt ="""Analyze this image for a greenwashing detection system.
Identify:
1. All visible logos, brand names, and company names
2. Any environmental or eco-friendly claims (text, symbols, certifications)
3. Describe the packaging design and any "green" visual elements
Respond with structured JSON matching the schema provided."""
response =client .chat (
model ="ministral-3:latest",
messages =[{
'role':'user',
'content':prompt ,
'images':[image_base64 ],
}],
format =ImageAnalysis .model_json_schema (),
options ={'temperature':0.1 }
)
analysis =ImageAnalysis .model_validate_json (response ['message']['content'])
return analysis
except Exception as e :
print (f"Ollama structured analysis failed: {e }")
detector =get_detector ()
result =detector .detect_from_bytes (image_bytes )
logos =[]
for logo in result .get ('logos_detected',[]):
logos .append (LogoDetection (
brand =logo .get ('brand','Unknown'),
confidence =logo .get ('confidence','low'),
location =logo .get ('location','unknown'),
category =logo .get ('category','unknown')
))
return ImageAnalysis (
logos_detected =logos ,
total_count =result .get ('total_count',0 ),
description =result .get ('description','No description available'),
environmental_claims =[],
packaging_description =""
)
def save_to_mongodb (incident_data :dict )->str :
"""Save incident to MongoDB and return the ID"""
client =get_mongo_client ()
db =client ["ethix"]
collection =db ["incidents"]
result =collection .insert_one (incident_data )
return str (result .inserted_id )
def save_to_chromadb (incident_data :dict ,incident_id :str ):
"""
Save incident as context for the chatbot
Includes verdict, full analysis, and environmental impact information
"""
analysis =incident_data ['analysis']
red_flags ="\n".join (f"- {flag }"for flag in analysis .get ('red_flags',[]))
key_claims ="\n".join (f"- {claim }"for claim in analysis .get ('key_claims',[]))
env_claims ="\n".join (f"- {claim }"for claim in incident_data .get ('environmental_claims',[]))
text =f"""GREENWASHING INCIDENT REPORT #{incident_id }
Report Date: {incident_data ['created_at']}
Company/Product: {incident_data ['product_name']}
Detected Brand: {incident_data .get ('detected_brand','Unknown brand')}
Status: {incident_data ['status']}
=== VERDICT ===
{analysis ['verdict']}
Greenwashing Detected: {'YES'if analysis ['is_greenwashing']else 'NO'}
Confidence Level: {analysis ['confidence']}
Severity Assessment: {analysis ['severity']}
=== USER COMPLAINT ===
{incident_data ['user_description']}
=== IMAGE ANALYSIS ===
{incident_data .get ('image_description','No image analysis available')}
=== ENVIRONMENTAL CLAIMS IDENTIFIED ===
{env_claims if env_claims else 'No specific environmental claims identified'}
=== DETAILED ANALYSIS & REASONING ===
{analysis ['reasoning']}
=== KEY MARKETING CLAIMS ===
{key_claims if key_claims else 'No key claims identified'}
=== RED FLAGS IDENTIFIED ===
{red_flags if red_flags else 'No specific red flags identified'}
=== CONSUMER RECOMMENDATIONS ===
{analysis ['recommendations']}
=== ENVIRONMENTAL IMPACT ASSESSMENT ===
This report highlights potential misleading environmental claims by {incident_data .get ('detected_brand','the company')}.
Consumers should be aware that {analysis ['severity']} severity greenwashing has been identified with {analysis ['confidence']} confidence.
This incident has been documented for future reference and to help inform sustainable purchasing decisions.
"""
embedding =get_embedding (text )
metadata ={
"type":"incident_report",
"source":f"incident_{incident_id }",
"product_name":incident_data ['product_name'],
"brand":incident_data .get ('detected_brand','Unknown'),
"severity":analysis ['severity'],
"confidence":analysis ['confidence'],
"is_greenwashing":True ,
"verdict":analysis ['verdict'],
"status":incident_data ['status'],
"created_at":incident_data ['created_at'],
"num_red_flags":len (analysis .get ('red_flags',[])),
"num_claims":len (analysis .get ('key_claims',[]))
}
insert_documents (
texts =[text ],
embeddings =[embedding ],
metadata_list =[metadata ]
)
print (f"✓ Incident #{incident_id } saved to ChromaDB for AI chat context")
@incidents_bp .route ('/submit',methods =['POST'])
def submit_incident ():
"""
Submit a greenwashing incident report
Expects JSON with:
- product_name: Name of the product/company
- description: User's description of the misleading claim
- report_type: 'product' or 'company'
- image: Base64 encoded image (for product reports)
- pdf_data: Base64 encoded PDF (for company reports)
"""
data =request .json
if not data :
return jsonify ({"error":"No data provided"}),400
product_name =data .get ('product_name','').strip ()
user_description =data .get ('description','').strip ()
report_type =data .get ('report_type','product')
image_base64 =data .get ('image')
if not product_name :
return jsonify ({"error":"Product name is required"}),400
if not user_description :
return jsonify ({"error":"Description is required"}),400
try :
detected_brand ="Unknown"
image_description ="No image provided"
environmental_claims =[]
compressed_image_base64 =None
if report_type =='product'and image_base64 :
try :
if ','in image_base64 :
image_base64 =image_base64 .split (',')[1 ]
image_bytes =base64 .b64decode (image_base64 )
print ("Compressing image with OpenCV...")
compressed_image_base64 =compress_image (image_bytes ,max_width =600 ,quality =75 )
image_analysis =analyze_image_with_ollama (image_bytes )
if image_analysis .logos_detected :
detected_brand =image_analysis .logos_detected [0 ].brand
image_description =image_analysis .description
environmental_claims =image_analysis .environmental_claims
except Exception as e :
print (f"Image processing error: {e }")
search_query =f"{product_name } {detected_brand } environmental claims sustainability greenwashing"
query_embedding =get_embedding (search_query )
search_results =search_documents (query_embedding ,num_results =5 )
context =""
for res in search_results :
context +=f"--- Document ---\n{res ['text'][:500 ]}\n\n"
if not context :
context ="No prior information found about this company in our database."
if environmental_claims :
context +="\n--- Claims visible in submitted image ---\n"
context +="\n".join (f"- {claim }"for claim in environmental_claims )
analysis =analyze_with_gemini (
product_name =product_name ,
user_description =user_description ,
detected_brand =detected_brand ,
image_description =image_description ,
context =context
)
analysis_dict =analysis .model_dump ()
incident_data ={
"product_name":product_name ,
"user_description":user_description ,
"detected_brand":detected_brand ,
"image_description":image_description ,
"environmental_claims":environmental_claims ,
"analysis":analysis_dict ,
"is_greenwashing":analysis .is_greenwashing ,
"created_at":datetime .utcnow ().isoformat (),
"status":"confirmed"if analysis .is_greenwashing else "dismissed",
"report_type":report_type
}
if compressed_image_base64 :
incident_data ["image_base64"]=compressed_image_base64
incident_id =None
if analysis .is_greenwashing :
incident_id =save_to_mongodb (incident_data )
save_to_chromadb (incident_data ,incident_id )
return jsonify ({
"status":"success",
"is_greenwashing":analysis .is_greenwashing ,
"incident_id":incident_id ,
"analysis":analysis_dict ,
"detected_brand":detected_brand ,
"environmental_claims":environmental_claims
})
except Exception as e :
import traceback
traceback .print_exc ()
return jsonify ({
"status":"error",
"message":str (e )
}),500
@incidents_bp .route ('/list',methods =['GET'])
def list_incidents ():
"""Get all confirmed greenwashing incidents"""
try :
client =get_mongo_client ()
db =client ["ethix"]
collection =db ["incidents"]
incidents =list (collection .find (
{"is_greenwashing":True },
{"_id":1 ,"product_name":1 ,"detected_brand":1 ,
"user_description":1 ,"analysis":1 ,"created_at":1 ,
"image_base64":1 ,"report_type":1 }
).sort ("created_at",-1 ).limit (50 ))
for inc in incidents :
inc ["_id"]=str (inc ["_id"])
return jsonify (incidents )
except Exception as e :
return jsonify ({"error":str (e )}),500
@incidents_bp .route ('/<incident_id>',methods =['GET'])
def get_incident (incident_id ):
"""Get a specific incident by ID"""
try :
from bson import ObjectId
client =get_mongo_client ()
db =client ["ethix"]
collection =db ["incidents"]
incident =collection .find_one ({"_id":ObjectId (incident_id )})
if not incident :
return jsonify ({"error":"Incident not found"}),404
incident ["_id"]=str (incident ["_id"])
return jsonify (incident )
except Exception as e :
return jsonify ({"error":str (e )}),500