Restore code and save recent updates

This commit is contained in:
2026-01-25 03:31:01 +00:00
parent bae861c71f
commit 5ce0b4d278
54 changed files with 2963 additions and 2899 deletions

View File

@@ -2,65 +2,112 @@
Incident Report API - Handles greenwashing report submissions
Uses structured outputs with Pydantic for reliable JSON responses
"""
import base64
import os
from datetime import datetime
from flask import Blueprint, request, jsonify
from google import genai
from pydantic import BaseModel, Field
from typing import List, Optional, Literal
import base64
import os
import cv2
import numpy as np
from datetime import datetime
from flask import Blueprint ,request ,jsonify
from google import genai
from pydantic import BaseModel ,Field
from typing import List ,Optional ,Literal
from src.ollama.detector import OllamaLogoDetector
from src.chroma.vector_store import search_documents, insert_documents
from src.rag.embeddings import get_embedding
from src.mongo.connection import get_mongo_client
from src .ollama .detector import OllamaLogoDetector
from src .chroma .vector_store import search_documents ,insert_documents
from src .rag .embeddings import get_embedding
from src .mongo .connection import get_mongo_client
incidents_bp = Blueprint('incidents', __name__)
# Initialize detector lazily
_detector = None
def get_detector():
global _detector
if _detector is None:
_detector = OllamaLogoDetector()
return _detector
incidents_bp =Blueprint ('incidents',__name__ )
# ============= Pydantic Models for Structured Outputs =============
_detector =None
class GreenwashingAnalysis(BaseModel):
def get_detector ():
global _detector
if _detector is None :
_detector =OllamaLogoDetector ()
return _detector
def compress_image (image_bytes :bytes ,max_width :int =800 ,quality :int =85 )->str :
"""
Compress image using OpenCV and return Base64 string
Args:
image_bytes: Original image bytes
max_width: Maximum width for resized image
quality: JPEG quality (1-100)
Returns:
Base64 encoded compressed image
"""
try :
nparr =np .frombuffer (image_bytes ,np .uint8 )
img =cv2 .imdecode (nparr ,cv2 .IMREAD_COLOR )
if img is None :
raise ValueError ("Failed to decode image")
height ,width =img .shape [:2 ]
if width >max_width :
ratio =max_width /width
new_width =max_width
new_height =int (height *ratio )
img =cv2 .resize (img ,(new_width ,new_height ),interpolation =cv2 .INTER_AREA )
encode_param =[int (cv2 .IMWRITE_JPEG_QUALITY ),quality ]
_ ,buffer =cv2 .imencode ('.jpg',img ,encode_param )
compressed_base64 =base64 .b64encode (buffer ).decode ('utf-8')
return compressed_base64
except Exception as e :
print (f"Image compression error: {e }")
return base64 .b64encode (image_bytes ).decode ('utf-8')
class GreenwashingAnalysis (BaseModel ):
"""Structured output for greenwashing analysis"""
is_greenwashing: bool = Field(description="Whether this is a case of greenwashing")
confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of the analysis")
verdict: str = Field(description="Brief one-sentence verdict")
reasoning: str = Field(description="Detailed explanation of why this is or isn't greenwashing")
severity: Literal["high", "medium", "low"] = Field(description="Severity of the greenwashing if detected")
recommendations: str = Field(description="What consumers should know about this case")
key_claims: List[str] = Field(description="List of specific environmental claims made by the company")
red_flags: List[str] = Field(description="List of red flags or concerning practices identified")
is_greenwashing :bool =Field (description ="Whether this is a case of greenwashing")
confidence :Literal ["high","medium","low"]=Field (description ="Confidence level of the analysis")
verdict :str =Field (description ="Brief one-sentence verdict")
reasoning :str =Field (description ="Detailed explanation of why this is or isn't greenwashing")
severity :Literal ["high","medium","low"]=Field (description ="Severity of the greenwashing if detected")
recommendations :str =Field (description ="What consumers should know about this case")
key_claims :List [str ]=Field (description ="List of specific environmental claims made by the company")
red_flags :List [str ]=Field (description ="List of red flags or concerning practices identified")
class LogoDetection(BaseModel):
class LogoDetection (BaseModel ):
"""Structured output for logo detection from Ollama"""
brand: str = Field(description="The company or brand name detected")
confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of detection")
location: str = Field(description="Location in image (e.g., center, top-left)")
category: str = Field(description="Product category if identifiable")
brand :str =Field (description ="The company or brand name detected")
confidence :Literal ["high","medium","low"]=Field (description ="Confidence level of detection")
location :str =Field (description ="Location in image (e.g., center, top-left)")
category :str =Field (description ="Product category if identifiable")
class ImageAnalysis(BaseModel):
class ImageAnalysis (BaseModel ):
"""Structured output for full image analysis"""
logos_detected: List[LogoDetection] = Field(description="List of logos/brands detected in the image")
total_count: int = Field(description="Total number of logos detected")
description: str = Field(description="Brief description of what's in the image")
environmental_claims: List[str] = Field(description="Any environmental or eco-friendly claims visible in the image")
packaging_description: str = Field(description="Description of the product packaging and design")
logos_detected :List [LogoDetection ]=Field (description ="List of logos/brands detected in the image")
total_count :int =Field (description ="Total number of logos detected")
description :str =Field (description ="Brief description of what's in the image")
environmental_claims :List [str ]=Field (description ="Any environmental or eco-friendly claims visible in the image")
packaging_description :str =Field (description ="Description of the product packaging and design")
# ============= Analysis Functions =============
GREENWASHING_ANALYSIS_PROMPT = """You are an expert at detecting greenwashing - misleading environmental claims by companies.
GREENWASHING_ANALYSIS_PROMPT ="""You are an expert at detecting greenwashing - misleading environmental claims by companies.
Analyze the following user-submitted report about a potential greenwashing incident:
@@ -81,48 +128,48 @@ Based on this information, determine if this is a valid case of greenwashing. Co
Provide your analysis in the structured format requested."""
def analyze_with_gemini(product_name: str, user_description: str, detected_brand: str,
image_description: str, context: str) -> GreenwashingAnalysis:
def analyze_with_gemini (product_name :str ,user_description :str ,detected_brand :str ,
image_description :str ,context :str )->GreenwashingAnalysis :
"""Send analysis request to Gemini with structured output"""
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY not set")
prompt = GREENWASHING_ANALYSIS_PROMPT.format(
product_name=product_name,
user_description=user_description,
detected_brand=detected_brand,
image_description=image_description,
context=context
api_key =os .environ .get ("GOOGLE_API_KEY")
if not api_key :
raise ValueError ("GOOGLE_API_KEY not set")
prompt =GREENWASHING_ANALYSIS_PROMPT .format (
product_name =product_name ,
user_description =user_description ,
detected_brand =detected_brand ,
image_description =image_description ,
context =context
)
client = genai.Client(api_key=api_key)
# Use structured output with Pydantic schema
response = client.models.generate_content(
model="gemini-3-flash-preview",
contents=prompt,
config={
"response_mime_type": "application/json",
"response_json_schema": GreenwashingAnalysis.model_json_schema(),
}
)
# Validate and parse the response
analysis = GreenwashingAnalysis.model_validate_json(response.text)
return analysis
client =genai .Client (api_key =api_key )
def analyze_image_with_ollama(image_bytes: bytes) -> ImageAnalysis:
response =client .models .generate_content (
model ="gemini-3-pro-preview",
contents =prompt ,
config ={
"response_mime_type":"application/json",
"response_json_schema":GreenwashingAnalysis .model_json_schema (),
}
)
analysis =GreenwashingAnalysis .model_validate_json (response .text )
return analysis
def analyze_image_with_ollama (image_bytes :bytes )->ImageAnalysis :
"""Analyze image using Ollama with structured output"""
try:
import ollama
client = ollama.Client(host="https://ollama.sirblob.co")
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
prompt = """Analyze this image for a greenwashing detection system.
try :
import ollama
client =ollama .Client (host ="https://ollama.sirblob.co")
image_base64 =base64 .b64encode (image_bytes ).decode ('utf-8')
prompt ="""Analyze this image for a greenwashing detection system.
Identify:
1. All visible logos, brand names, and company names
@@ -131,275 +178,318 @@ Identify:
Respond with structured JSON matching the schema provided."""
response = client.chat(
model="ministral-3:latest",
messages=[{
'role': 'user',
'content': prompt,
'images': [image_base64],
}],
format=ImageAnalysis.model_json_schema(),
options={'temperature': 0.1}
response =client .chat (
model ="ministral-3:latest",
messages =[{
'role':'user',
'content':prompt ,
'images':[image_base64 ],
}],
format =ImageAnalysis .model_json_schema (),
options ={'temperature':0.1 }
)
# Validate and parse
analysis = ImageAnalysis.model_validate_json(response['message']['content'])
return analysis
except Exception as e:
print(f"Ollama structured analysis failed: {e}")
# Fall back to basic detection
detector = get_detector()
result = detector.detect_from_bytes(image_bytes)
# Convert to structured format
logos = []
for logo in result.get('logos_detected', []):
logos.append(LogoDetection(
brand=logo.get('brand', 'Unknown'),
confidence=logo.get('confidence', 'low'),
location=logo.get('location', 'unknown'),
category=logo.get('category', 'unknown')
analysis =ImageAnalysis .model_validate_json (response ['message']['content'])
return analysis
except Exception as e :
print (f"Ollama structured analysis failed: {e }")
detector =get_detector ()
result =detector .detect_from_bytes (image_bytes )
logos =[]
for logo in result .get ('logos_detected',[]):
logos .append (LogoDetection (
brand =logo .get ('brand','Unknown'),
confidence =logo .get ('confidence','low'),
location =logo .get ('location','unknown'),
category =logo .get ('category','unknown')
))
return ImageAnalysis(
logos_detected=logos,
total_count=result.get('total_count', 0),
description=result.get('description', 'No description available'),
environmental_claims=[],
packaging_description=""
return ImageAnalysis (
logos_detected =logos ,
total_count =result .get ('total_count',0 ),
description =result .get ('description','No description available'),
environmental_claims =[],
packaging_description =""
)
def save_to_mongodb(incident_data: dict) -> str:
def save_to_mongodb (incident_data :dict )->str :
"""Save incident to MongoDB and return the ID"""
client = get_mongo_client()
db = client["ethix"]
collection = db["incidents"]
result = collection.insert_one(incident_data)
return str(result.inserted_id)
client =get_mongo_client ()
db =client ["ethix"]
collection =db ["incidents"]
result =collection .insert_one (incident_data )
return str (result .inserted_id )
def save_to_chromadb(incident_data: dict, incident_id: str):
"""Save incident as context for the chatbot"""
analysis = incident_data['analysis']
# Create a rich text representation of the incident
red_flags = "\n".join(f"- {flag}" for flag in analysis.get('red_flags', []))
key_claims = "\n".join(f"- {claim}" for claim in analysis.get('key_claims', []))
text = f"""GREENWASHING INCIDENT REPORT #{incident_id}
Date: {incident_data['created_at']}
Company/Product: {incident_data['product_name']} ({incident_data.get('detected_brand', 'Unknown brand')})
def save_to_chromadb (incident_data :dict ,incident_id :str ):
"""
Save incident as context for the chatbot
Includes verdict, full analysis, and environmental impact information
"""
analysis =incident_data ['analysis']
USER REPORT: {incident_data['user_description']}
ANALYSIS VERDICT: {analysis['verdict']}
Confidence: {analysis['confidence']}
Severity: {analysis['severity']}
red_flags ="\n".join (f"- {flag }"for flag in analysis .get ('red_flags',[]))
key_claims ="\n".join (f"- {claim }"for claim in analysis .get ('key_claims',[]))
env_claims ="\n".join (f"- {claim }"for claim in incident_data .get ('environmental_claims',[]))
DETAILED REASONING:
{analysis['reasoning']}
KEY ENVIRONMENTAL CLAIMS MADE:
{key_claims}
text =f"""GREENWASHING INCIDENT REPORT #{incident_id }
Report Date: {incident_data ['created_at']}
Company/Product: {incident_data ['product_name']}
Detected Brand: {incident_data .get ('detected_brand','Unknown brand')}
Status: {incident_data ['status']}
RED FLAGS IDENTIFIED:
{red_flags}
=== VERDICT ===
{analysis ['verdict']}
CONSUMER RECOMMENDATIONS:
{analysis['recommendations']}
Greenwashing Detected: {'YES'if analysis ['is_greenwashing']else 'NO'}
Confidence Level: {analysis ['confidence']}
Severity Assessment: {analysis ['severity']}
=== USER COMPLAINT ===
{incident_data ['user_description']}
=== IMAGE ANALYSIS ===
{incident_data .get ('image_description','No image analysis available')}
=== ENVIRONMENTAL CLAIMS IDENTIFIED ===
{env_claims if env_claims else 'No specific environmental claims identified'}
=== DETAILED ANALYSIS & REASONING ===
{analysis ['reasoning']}
=== KEY MARKETING CLAIMS ===
{key_claims if key_claims else 'No key claims identified'}
=== RED FLAGS IDENTIFIED ===
{red_flags if red_flags else 'No specific red flags identified'}
=== CONSUMER RECOMMENDATIONS ===
{analysis ['recommendations']}
=== ENVIRONMENTAL IMPACT ASSESSMENT ===
This report highlights potential misleading environmental claims by {incident_data .get ('detected_brand','the company')}.
Consumers should be aware that {analysis ['severity']} severity greenwashing has been identified with {analysis ['confidence']} confidence.
This incident has been documented for future reference and to help inform sustainable purchasing decisions.
"""
# Get embedding for the incident
embedding = get_embedding(text)
# Store in ChromaDB with metadata
metadata = {
"type": "incident_report",
"source": f"incident_{incident_id}",
"product_name": incident_data['product_name'],
"brand": incident_data.get('detected_brand', 'Unknown'),
"severity": analysis['severity'],
"confidence": analysis['confidence'],
"is_greenwashing": True,
"created_at": incident_data['created_at']
embedding =get_embedding (text )
metadata ={
"type":"incident_report",
"source":f"incident_{incident_id }",
"product_name":incident_data ['product_name'],
"brand":incident_data .get ('detected_brand','Unknown'),
"severity":analysis ['severity'],
"confidence":analysis ['confidence'],
"is_greenwashing":True ,
"verdict":analysis ['verdict'],
"status":incident_data ['status'],
"created_at":incident_data ['created_at'],
"num_red_flags":len (analysis .get ('red_flags',[])),
"num_claims":len (analysis .get ('key_claims',[]))
}
insert_documents(
texts=[text],
embeddings=[embedding],
metadata_list=[metadata]
insert_documents (
texts =[text ],
embeddings =[embedding ],
metadata_list =[metadata ]
)
print (f"✓ Incident #{incident_id } saved to ChromaDB for AI chat context")
# ============= API Endpoints =============
@incidents_bp.route('/submit', methods=['POST'])
def submit_incident():
@incidents_bp .route ('/submit',methods =['POST'])
def submit_incident ():
"""
Submit a greenwashing incident report
Expects JSON with:
- product_name: Name of the product/company
- description: User's description of the misleading claim
- image: Base64 encoded image (optional, but recommended)
- report_type: 'product' or 'company'
- image: Base64 encoded image (for product reports)
- pdf_data: Base64 encoded PDF (for company reports)
"""
data = request.json
if not data:
return jsonify({"error": "No data provided"}), 400
product_name = data.get('product_name', '').strip()
user_description = data.get('description', '').strip()
image_base64 = data.get('image') # Base64 encoded image
if not product_name:
return jsonify({"error": "Product name is required"}), 400
if not user_description:
return jsonify({"error": "Description is required"}), 400
try:
# Step 1: Analyze image with Ollama (structured output)
detected_brand = "Unknown"
image_description = "No image provided"
environmental_claims = []
if image_base64:
try:
# Remove data URL prefix if present
if ',' in image_base64:
image_base64 = image_base64.split(',')[1]
image_bytes = base64.b64decode(image_base64)
# Use structured image analysis
image_analysis = analyze_image_with_ollama(image_bytes)
if image_analysis.logos_detected:
detected_brand = image_analysis.logos_detected[0].brand
image_description = image_analysis.description
environmental_claims = image_analysis.environmental_claims
except Exception as e:
print(f"Image analysis error: {e}")
# Continue without image analysis
# Step 2: Get relevant context from vector database
search_query = f"{product_name} {detected_brand} environmental claims sustainability greenwashing"
query_embedding = get_embedding(search_query)
search_results = search_documents(query_embedding, num_results=5)
context = ""
for res in search_results:
context += f"--- Document ---\n{res['text'][:500]}\n\n"
if not context:
context = "No prior information found about this company in our database."
# Add environmental claims from image to context
if environmental_claims:
context += "\n--- Claims visible in submitted image ---\n"
context += "\n".join(f"- {claim}" for claim in environmental_claims)
# Step 3: Analyze with Gemini (structured output)
analysis = analyze_with_gemini(
product_name=product_name,
user_description=user_description,
detected_brand=detected_brand,
image_description=image_description,
context=context
data =request .json
if not data :
return jsonify ({"error":"No data provided"}),400
product_name =data .get ('product_name','').strip ()
user_description =data .get ('description','').strip ()
report_type =data .get ('report_type','product')
image_base64 =data .get ('image')
if not product_name :
return jsonify ({"error":"Product name is required"}),400
if not user_description :
return jsonify ({"error":"Description is required"}),400
try :
detected_brand ="Unknown"
image_description ="No image provided"
environmental_claims =[]
compressed_image_base64 =None
if report_type =='product'and image_base64 :
try :
if ','in image_base64 :
image_base64 =image_base64 .split (',')[1 ]
image_bytes =base64 .b64decode (image_base64 )
print ("Compressing image with OpenCV...")
compressed_image_base64 =compress_image (image_bytes ,max_width =600 ,quality =75 )
image_analysis =analyze_image_with_ollama (image_bytes )
if image_analysis .logos_detected :
detected_brand =image_analysis .logos_detected [0 ].brand
image_description =image_analysis .description
environmental_claims =image_analysis .environmental_claims
except Exception as e :
print (f"Image processing error: {e }")
search_query =f"{product_name } {detected_brand } environmental claims sustainability greenwashing"
query_embedding =get_embedding (search_query )
search_results =search_documents (query_embedding ,num_results =5 )
context =""
for res in search_results :
context +=f"--- Document ---\n{res ['text'][:500 ]}\n\n"
if not context :
context ="No prior information found about this company in our database."
if environmental_claims :
context +="\n--- Claims visible in submitted image ---\n"
context +="\n".join (f"- {claim }"for claim in environmental_claims )
analysis =analyze_with_gemini (
product_name =product_name ,
user_description =user_description ,
detected_brand =detected_brand ,
image_description =image_description ,
context =context
)
# Convert Pydantic model to dict
analysis_dict = analysis.model_dump()
# Step 4: Prepare incident data
incident_data = {
"product_name": product_name,
"user_description": user_description,
"detected_brand": detected_brand,
"image_description": image_description,
"environmental_claims": environmental_claims,
"analysis": analysis_dict,
"is_greenwashing": analysis.is_greenwashing,
"created_at": datetime.utcnow().isoformat(),
"status": "confirmed" if analysis.is_greenwashing else "dismissed"
analysis_dict =analysis .model_dump ()
incident_data ={
"product_name":product_name ,
"user_description":user_description ,
"detected_brand":detected_brand ,
"image_description":image_description ,
"environmental_claims":environmental_claims ,
"analysis":analysis_dict ,
"is_greenwashing":analysis .is_greenwashing ,
"created_at":datetime .utcnow ().isoformat (),
"status":"confirmed"if analysis .is_greenwashing else "dismissed",
"report_type":report_type
}
incident_id = None
# Step 5: If greenwashing detected, save to databases
if analysis.is_greenwashing:
# Save to MongoDB
incident_id = save_to_mongodb(incident_data)
# Save to ChromaDB for chatbot context
save_to_chromadb(incident_data, incident_id)
return jsonify({
"status": "success",
"is_greenwashing": analysis.is_greenwashing,
"incident_id": incident_id,
"analysis": analysis_dict,
"detected_brand": detected_brand,
"environmental_claims": environmental_claims
if compressed_image_base64 :
incident_data ["image_base64"]=compressed_image_base64
incident_id =None
if analysis .is_greenwashing :
incident_id =save_to_mongodb (incident_data )
save_to_chromadb (incident_data ,incident_id )
return jsonify ({
"status":"success",
"is_greenwashing":analysis .is_greenwashing ,
"incident_id":incident_id ,
"analysis":analysis_dict ,
"detected_brand":detected_brand ,
"environmental_claims":environmental_claims
})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({
"status": "error",
"message": str(e)
}), 500
except Exception as e :
import traceback
traceback .print_exc ()
return jsonify ({
"status":"error",
"message":str (e )
}),500
@incidents_bp.route('/list', methods=['GET'])
def list_incidents():
@incidents_bp .route ('/list',methods =['GET'])
def list_incidents ():
"""Get all confirmed greenwashing incidents"""
try:
client = get_mongo_client()
db = client["ethix"]
collection = db["incidents"]
# Get recent incidents with full analysis details
incidents = list(collection.find(
{"is_greenwashing": True},
{"_id": 1, "product_name": 1, "detected_brand": 1,
"user_description": 1, "analysis": 1, "created_at": 1}
).sort("created_at", -1).limit(50))
# Convert ObjectId to string
for inc in incidents:
inc["_id"] = str(inc["_id"])
return jsonify(incidents)
except Exception as e:
return jsonify({"error": str(e)}), 500
try :
client =get_mongo_client ()
db =client ["ethix"]
collection =db ["incidents"]
@incidents_bp.route('/<incident_id>', methods=['GET'])
def get_incident(incident_id):
incidents =list (collection .find (
{"is_greenwashing":True },
{"_id":1 ,"product_name":1 ,"detected_brand":1 ,
"user_description":1 ,"analysis":1 ,"created_at":1 ,
"image_base64":1 ,"report_type":1 }
).sort ("created_at",-1 ).limit (50 ))
for inc in incidents :
inc ["_id"]=str (inc ["_id"])
return jsonify (incidents )
except Exception as e :
return jsonify ({"error":str (e )}),500
@incidents_bp .route ('/<incident_id>',methods =['GET'])
def get_incident (incident_id ):
"""Get a specific incident by ID"""
try:
from bson import ObjectId
client = get_mongo_client()
db = client["ethix"]
collection = db["incidents"]
incident = collection.find_one({"_id": ObjectId(incident_id)})
if not incident:
return jsonify({"error": "Incident not found"}), 404
incident["_id"] = str(incident["_id"])
return jsonify(incident)
except Exception as e:
return jsonify({"error": str(e)}), 500
try :
from bson import ObjectId
client =get_mongo_client ()
db =client ["ethix"]
collection =db ["incidents"]
incident =collection .find_one ({"_id":ObjectId (incident_id )})
if not incident :
return jsonify ({"error":"Incident not found"}),404
incident ["_id"]=str (incident ["_id"])
return jsonify (incident )
except Exception as e :
return jsonify ({"error":str (e )}),500