mirror of
https://github.com/SirBlobby/Hoya26.git
synced 2026-02-04 03:34:34 -05:00
496 lines
16 KiB
Python
496 lines
16 KiB
Python
"""
|
|
Incident Report API - Handles greenwashing report submissions
|
|
Uses structured outputs with Pydantic for reliable JSON responses
|
|
"""
|
|
import base64
|
|
import os
|
|
import cv2
|
|
import numpy as np
|
|
from datetime import datetime
|
|
from flask import Blueprint ,request ,jsonify
|
|
from google import genai
|
|
from pydantic import BaseModel ,Field
|
|
from typing import List ,Optional ,Literal
|
|
|
|
from src .ollama .detector import OllamaLogoDetector
|
|
from src .chroma .vector_store import search_documents ,insert_documents
|
|
from src .rag .embeddings import get_embedding
|
|
from src .mongo .connection import get_mongo_client
|
|
|
|
incidents_bp =Blueprint ('incidents',__name__ )
|
|
|
|
|
|
_detector =None
|
|
|
|
def get_detector ():
|
|
global _detector
|
|
if _detector is None :
|
|
_detector =OllamaLogoDetector ()
|
|
return _detector
|
|
|
|
|
|
def compress_image (image_bytes :bytes ,max_width :int =800 ,quality :int =85 )->str :
|
|
"""
|
|
Compress image using OpenCV and return Base64 string
|
|
|
|
Args:
|
|
image_bytes: Original image bytes
|
|
max_width: Maximum width for resized image
|
|
quality: JPEG quality (1-100)
|
|
|
|
Returns:
|
|
Base64 encoded compressed image
|
|
"""
|
|
try :
|
|
|
|
nparr =np .frombuffer (image_bytes ,np .uint8 )
|
|
img =cv2 .imdecode (nparr ,cv2 .IMREAD_COLOR )
|
|
|
|
if img is None :
|
|
raise ValueError ("Failed to decode image")
|
|
|
|
|
|
height ,width =img .shape [:2 ]
|
|
|
|
|
|
if width >max_width :
|
|
ratio =max_width /width
|
|
new_width =max_width
|
|
new_height =int (height *ratio )
|
|
img =cv2 .resize (img ,(new_width ,new_height ),interpolation =cv2 .INTER_AREA )
|
|
|
|
|
|
encode_param =[int (cv2 .IMWRITE_JPEG_QUALITY ),quality ]
|
|
_ ,buffer =cv2 .imencode ('.jpg',img ,encode_param )
|
|
|
|
|
|
compressed_base64 =base64 .b64encode (buffer ).decode ('utf-8')
|
|
|
|
return compressed_base64
|
|
|
|
except Exception as e :
|
|
print (f"Image compression error: {e }")
|
|
|
|
return base64 .b64encode (image_bytes ).decode ('utf-8')
|
|
|
|
|
|
|
|
|
|
class GreenwashingAnalysis (BaseModel ):
|
|
"""Structured output for greenwashing analysis"""
|
|
is_greenwashing :bool =Field (description ="Whether this is a case of greenwashing")
|
|
confidence :Literal ["high","medium","low"]=Field (description ="Confidence level of the analysis")
|
|
verdict :str =Field (description ="Brief one-sentence verdict")
|
|
reasoning :str =Field (description ="Detailed explanation of why this is or isn't greenwashing")
|
|
severity :Literal ["high","medium","low"]=Field (description ="Severity of the greenwashing if detected")
|
|
recommendations :str =Field (description ="What consumers should know about this case")
|
|
key_claims :List [str ]=Field (description ="List of specific environmental claims made by the company")
|
|
red_flags :List [str ]=Field (description ="List of red flags or concerning practices identified")
|
|
|
|
|
|
class LogoDetection (BaseModel ):
|
|
"""Structured output for logo detection from Ollama"""
|
|
brand :str =Field (description ="The company or brand name detected")
|
|
confidence :Literal ["high","medium","low"]=Field (description ="Confidence level of detection")
|
|
location :str =Field (description ="Location in image (e.g., center, top-left)")
|
|
category :str =Field (description ="Product category if identifiable")
|
|
|
|
|
|
class ImageAnalysis (BaseModel ):
|
|
"""Structured output for full image analysis"""
|
|
logos_detected :List [LogoDetection ]=Field (description ="List of logos/brands detected in the image")
|
|
total_count :int =Field (description ="Total number of logos detected")
|
|
description :str =Field (description ="Brief description of what's in the image")
|
|
environmental_claims :List [str ]=Field (description ="Any environmental or eco-friendly claims visible in the image")
|
|
packaging_description :str =Field (description ="Description of the product packaging and design")
|
|
|
|
|
|
|
|
|
|
GREENWASHING_ANALYSIS_PROMPT ="""You are an expert at detecting greenwashing - misleading environmental claims by companies.
|
|
|
|
Analyze the following user-submitted report about a potential greenwashing incident:
|
|
|
|
PRODUCT/COMPANY: {product_name}
|
|
USER COMPLAINT: {user_description}
|
|
DETECTED BRAND FROM IMAGE: {detected_brand}
|
|
IMAGE DESCRIPTION: {image_description}
|
|
|
|
RELEVANT CONTEXT FROM OUR DATABASE:
|
|
{context}
|
|
|
|
Based on this information, determine if this is a valid case of greenwashing. Consider:
|
|
1. Does the company have a history of misleading environmental claims?
|
|
2. Are their eco-friendly claims vague or unsubstantiated?
|
|
3. Is there a disconnect between their marketing and actual practices?
|
|
4. Are they using green imagery or terms without substance?
|
|
|
|
Provide your analysis in the structured format requested."""
|
|
|
|
|
|
def analyze_with_gemini (product_name :str ,user_description :str ,detected_brand :str ,
|
|
image_description :str ,context :str )->GreenwashingAnalysis :
|
|
"""Send analysis request to Gemini with structured output"""
|
|
api_key =os .environ .get ("GOOGLE_API_KEY")
|
|
if not api_key :
|
|
raise ValueError ("GOOGLE_API_KEY not set")
|
|
|
|
prompt =GREENWASHING_ANALYSIS_PROMPT .format (
|
|
product_name =product_name ,
|
|
user_description =user_description ,
|
|
detected_brand =detected_brand ,
|
|
image_description =image_description ,
|
|
context =context
|
|
)
|
|
|
|
client =genai .Client (api_key =api_key )
|
|
|
|
|
|
response =client .models .generate_content (
|
|
model ="gemini-3-pro-preview",
|
|
contents =prompt ,
|
|
config ={
|
|
"response_mime_type":"application/json",
|
|
"response_json_schema":GreenwashingAnalysis .model_json_schema (),
|
|
}
|
|
)
|
|
|
|
|
|
analysis =GreenwashingAnalysis .model_validate_json (response .text )
|
|
return analysis
|
|
|
|
|
|
def analyze_image_with_ollama (image_bytes :bytes )->ImageAnalysis :
|
|
"""Analyze image using Ollama with structured output"""
|
|
try :
|
|
import ollama
|
|
|
|
client =ollama .Client (host ="https://ollama.sirblob.co")
|
|
|
|
image_base64 =base64 .b64encode (image_bytes ).decode ('utf-8')
|
|
|
|
prompt ="""Analyze this image for a greenwashing detection system.
|
|
|
|
Identify:
|
|
1. All visible logos, brand names, and company names
|
|
2. Any environmental or eco-friendly claims (text, symbols, certifications)
|
|
3. Describe the packaging design and any "green" visual elements
|
|
|
|
Respond with structured JSON matching the schema provided."""
|
|
|
|
response =client .chat (
|
|
model ="ministral-3:latest",
|
|
messages =[{
|
|
'role':'user',
|
|
'content':prompt ,
|
|
'images':[image_base64 ],
|
|
}],
|
|
format =ImageAnalysis .model_json_schema (),
|
|
options ={'temperature':0.1 }
|
|
)
|
|
|
|
|
|
analysis =ImageAnalysis .model_validate_json (response ['message']['content'])
|
|
return analysis
|
|
|
|
except Exception as e :
|
|
print (f"Ollama structured analysis failed: {e }")
|
|
|
|
detector =get_detector ()
|
|
result =detector .detect_from_bytes (image_bytes )
|
|
|
|
|
|
logos =[]
|
|
for logo in result .get ('logos_detected',[]):
|
|
logos .append (LogoDetection (
|
|
brand =logo .get ('brand','Unknown'),
|
|
confidence =logo .get ('confidence','low'),
|
|
location =logo .get ('location','unknown'),
|
|
category =logo .get ('category','unknown')
|
|
))
|
|
|
|
return ImageAnalysis (
|
|
logos_detected =logos ,
|
|
total_count =result .get ('total_count',0 ),
|
|
description =result .get ('description','No description available'),
|
|
environmental_claims =[],
|
|
packaging_description =""
|
|
)
|
|
|
|
|
|
def save_to_mongodb (incident_data :dict )->str :
|
|
"""Save incident to MongoDB and return the ID"""
|
|
client =get_mongo_client ()
|
|
db =client ["ethix"]
|
|
collection =db ["incidents"]
|
|
|
|
result =collection .insert_one (incident_data )
|
|
return str (result .inserted_id )
|
|
|
|
|
|
def save_to_chromadb (incident_data :dict ,incident_id :str ):
|
|
"""
|
|
Save incident as context for the chatbot
|
|
Includes verdict, full analysis, and environmental impact information
|
|
"""
|
|
analysis =incident_data ['analysis']
|
|
|
|
|
|
red_flags ="\n".join (f"- {flag }"for flag in analysis .get ('red_flags',[]))
|
|
key_claims ="\n".join (f"- {claim }"for claim in analysis .get ('key_claims',[]))
|
|
env_claims ="\n".join (f"- {claim }"for claim in incident_data .get ('environmental_claims',[]))
|
|
|
|
|
|
text =f"""GREENWASHING INCIDENT REPORT #{incident_id }
|
|
Report Date: {incident_data ['created_at']}
|
|
Company/Product: {incident_data ['product_name']}
|
|
Detected Brand: {incident_data .get ('detected_brand','Unknown brand')}
|
|
Status: {incident_data ['status']}
|
|
|
|
=== VERDICT ===
|
|
{analysis ['verdict']}
|
|
|
|
Greenwashing Detected: {'YES'if analysis ['is_greenwashing']else 'NO'}
|
|
Confidence Level: {analysis ['confidence']}
|
|
Severity Assessment: {analysis ['severity']}
|
|
|
|
=== USER COMPLAINT ===
|
|
{incident_data ['user_description']}
|
|
|
|
=== IMAGE ANALYSIS ===
|
|
{incident_data .get ('image_description','No image analysis available')}
|
|
|
|
=== ENVIRONMENTAL CLAIMS IDENTIFIED ===
|
|
{env_claims if env_claims else 'No specific environmental claims identified'}
|
|
|
|
=== DETAILED ANALYSIS & REASONING ===
|
|
{analysis ['reasoning']}
|
|
|
|
=== KEY MARKETING CLAIMS ===
|
|
{key_claims if key_claims else 'No key claims identified'}
|
|
|
|
=== RED FLAGS IDENTIFIED ===
|
|
{red_flags if red_flags else 'No specific red flags identified'}
|
|
|
|
=== CONSUMER RECOMMENDATIONS ===
|
|
{analysis ['recommendations']}
|
|
|
|
=== ENVIRONMENTAL IMPACT ASSESSMENT ===
|
|
This report highlights potential misleading environmental claims by {incident_data .get ('detected_brand','the company')}.
|
|
Consumers should be aware that {analysis ['severity']} severity greenwashing has been identified with {analysis ['confidence']} confidence.
|
|
This incident has been documented for future reference and to help inform sustainable purchasing decisions.
|
|
"""
|
|
|
|
|
|
embedding =get_embedding (text )
|
|
|
|
|
|
metadata ={
|
|
"type":"incident_report",
|
|
"source":f"incident_{incident_id }",
|
|
"product_name":incident_data ['product_name'],
|
|
"brand":incident_data .get ('detected_brand','Unknown'),
|
|
"severity":analysis ['severity'],
|
|
"confidence":analysis ['confidence'],
|
|
"is_greenwashing":True ,
|
|
"verdict":analysis ['verdict'],
|
|
"status":incident_data ['status'],
|
|
"created_at":incident_data ['created_at'],
|
|
"num_red_flags":len (analysis .get ('red_flags',[])),
|
|
"num_claims":len (analysis .get ('key_claims',[]))
|
|
}
|
|
|
|
insert_documents (
|
|
texts =[text ],
|
|
embeddings =[embedding ],
|
|
metadata_list =[metadata ]
|
|
)
|
|
|
|
print (f"✓ Incident #{incident_id } saved to ChromaDB for AI chat context")
|
|
|
|
|
|
|
|
|
|
@incidents_bp .route ('/submit',methods =['POST'])
|
|
def submit_incident ():
|
|
"""
|
|
Submit a greenwashing incident report
|
|
|
|
Expects JSON with:
|
|
- product_name: Name of the product/company
|
|
- description: User's description of the misleading claim
|
|
- report_type: 'product' or 'company'
|
|
- image: Base64 encoded image (for product reports)
|
|
- pdf_data: Base64 encoded PDF (for company reports)
|
|
"""
|
|
data =request .json
|
|
|
|
if not data :
|
|
return jsonify ({"error":"No data provided"}),400
|
|
|
|
product_name =data .get ('product_name','').strip ()
|
|
user_description =data .get ('description','').strip ()
|
|
report_type =data .get ('report_type','product')
|
|
image_base64 =data .get ('image')
|
|
|
|
if not product_name :
|
|
return jsonify ({"error":"Product name is required"}),400
|
|
|
|
if not user_description :
|
|
return jsonify ({"error":"Description is required"}),400
|
|
|
|
try :
|
|
|
|
detected_brand ="Unknown"
|
|
image_description ="No image provided"
|
|
environmental_claims =[]
|
|
compressed_image_base64 =None
|
|
|
|
if report_type =='product'and image_base64 :
|
|
try :
|
|
|
|
if ','in image_base64 :
|
|
image_base64 =image_base64 .split (',')[1 ]
|
|
|
|
image_bytes =base64 .b64decode (image_base64 )
|
|
|
|
|
|
print ("Compressing image with OpenCV...")
|
|
compressed_image_base64 =compress_image (image_bytes ,max_width =600 ,quality =75 )
|
|
|
|
|
|
image_analysis =analyze_image_with_ollama (image_bytes )
|
|
|
|
if image_analysis .logos_detected :
|
|
detected_brand =image_analysis .logos_detected [0 ].brand
|
|
|
|
image_description =image_analysis .description
|
|
environmental_claims =image_analysis .environmental_claims
|
|
|
|
except Exception as e :
|
|
print (f"Image processing error: {e }")
|
|
|
|
|
|
|
|
|
|
search_query =f"{product_name } {detected_brand } environmental claims sustainability greenwashing"
|
|
query_embedding =get_embedding (search_query )
|
|
search_results =search_documents (query_embedding ,num_results =5 )
|
|
|
|
context =""
|
|
for res in search_results :
|
|
context +=f"--- Document ---\n{res ['text'][:500 ]}\n\n"
|
|
|
|
if not context :
|
|
context ="No prior information found about this company in our database."
|
|
|
|
|
|
if environmental_claims :
|
|
context +="\n--- Claims visible in submitted image ---\n"
|
|
context +="\n".join (f"- {claim }"for claim in environmental_claims )
|
|
|
|
|
|
analysis =analyze_with_gemini (
|
|
product_name =product_name ,
|
|
user_description =user_description ,
|
|
detected_brand =detected_brand ,
|
|
image_description =image_description ,
|
|
context =context
|
|
)
|
|
|
|
|
|
analysis_dict =analysis .model_dump ()
|
|
|
|
|
|
incident_data ={
|
|
"product_name":product_name ,
|
|
"user_description":user_description ,
|
|
"detected_brand":detected_brand ,
|
|
"image_description":image_description ,
|
|
"environmental_claims":environmental_claims ,
|
|
"analysis":analysis_dict ,
|
|
"is_greenwashing":analysis .is_greenwashing ,
|
|
"created_at":datetime .utcnow ().isoformat (),
|
|
"status":"confirmed"if analysis .is_greenwashing else "dismissed",
|
|
"report_type":report_type
|
|
}
|
|
|
|
|
|
if compressed_image_base64 :
|
|
incident_data ["image_base64"]=compressed_image_base64
|
|
|
|
incident_id =None
|
|
|
|
|
|
if analysis .is_greenwashing :
|
|
|
|
incident_id =save_to_mongodb (incident_data )
|
|
|
|
|
|
save_to_chromadb (incident_data ,incident_id )
|
|
|
|
return jsonify ({
|
|
"status":"success",
|
|
"is_greenwashing":analysis .is_greenwashing ,
|
|
"incident_id":incident_id ,
|
|
"analysis":analysis_dict ,
|
|
"detected_brand":detected_brand ,
|
|
"environmental_claims":environmental_claims
|
|
})
|
|
|
|
except Exception as e :
|
|
import traceback
|
|
traceback .print_exc ()
|
|
return jsonify ({
|
|
"status":"error",
|
|
"message":str (e )
|
|
}),500
|
|
|
|
|
|
@incidents_bp .route ('/list',methods =['GET'])
|
|
def list_incidents ():
|
|
"""Get all confirmed greenwashing incidents"""
|
|
try :
|
|
client =get_mongo_client ()
|
|
db =client ["ethix"]
|
|
collection =db ["incidents"]
|
|
|
|
|
|
incidents =list (collection .find (
|
|
{"is_greenwashing":True },
|
|
{"_id":1 ,"product_name":1 ,"detected_brand":1 ,
|
|
"user_description":1 ,"analysis":1 ,"created_at":1 ,
|
|
"image_base64":1 ,"report_type":1 }
|
|
).sort ("created_at",-1 ).limit (50 ))
|
|
|
|
|
|
for inc in incidents :
|
|
inc ["_id"]=str (inc ["_id"])
|
|
|
|
return jsonify (incidents )
|
|
|
|
except Exception as e :
|
|
return jsonify ({"error":str (e )}),500
|
|
|
|
|
|
@incidents_bp .route ('/<incident_id>',methods =['GET'])
|
|
def get_incident (incident_id ):
|
|
"""Get a specific incident by ID"""
|
|
try :
|
|
from bson import ObjectId
|
|
|
|
client =get_mongo_client ()
|
|
db =client ["ethix"]
|
|
collection =db ["incidents"]
|
|
|
|
incident =collection .find_one ({"_id":ObjectId (incident_id )})
|
|
|
|
if not incident :
|
|
return jsonify ({"error":"Incident not found"}),404
|
|
|
|
incident ["_id"]=str (incident ["_id"])
|
|
return jsonify (incident )
|
|
|
|
except Exception as e :
|
|
return jsonify ({"error":str (e )}),500
|