Database Viewer Update

This commit is contained in:
2026-01-25 00:16:48 +00:00
parent d37d925150
commit bae861c71f
15 changed files with 1726 additions and 846 deletions

View File

@@ -13,5 +13,8 @@ def create_app():
app.register_blueprint(gemini_bp, url_prefix='/api/gemini')
from .routes.reports import reports_bp
app.register_blueprint(reports_bp, url_prefix='/api/reports')
from .routes.incidents import incidents_bp
app.register_blueprint(incidents_bp, url_prefix='/api/incidents')
return app

View File

@@ -23,7 +23,33 @@ Based on the context provided, give a final verdict:
def ask(prompt):
client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
return client.models.generate_content(model="gemini-2.0-flash", contents=prompt).text
return client.models.generate_content(model="gemini-3-flash-preview", contents=prompt).text
def ask_gemini_with_rag(prompt, category=None):
"""Ask Gemini with RAG context from the vector database."""
# Get embedding for the prompt
query_embedding = get_embedding(prompt)
# Search for relevant documents
results = search_documents(query_embedding, num_results=5)
# Build context from results
context = ""
for res in results:
context += f"--- Document ---\n{res['text']}\n\n"
# Create full prompt with context
full_prompt = f"""You are a helpful sustainability assistant. Use the following context to answer the user's question.
If the context doesn't contain relevant information, you can use your general knowledge but mention that.
CONTEXT:
{context}
USER QUESTION: {prompt}
Please provide a helpful and concise response."""
return ask(full_prompt)
def analyze(query, query_embedding, num_results=5, num_alternatives=3):
try:

View File

@@ -1,4 +0,0 @@
from . import scan_and_analyze
if __name__ == "__main__":
scan_and_analyze()

View File

@@ -3,15 +3,9 @@ import os
def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
return "Error: GOOGLE_API_KEY not found."
try:
client = genai.Client(api_key=api_key)
response = client.models.generate_content(
model=model_name,
contents=prompt,
)
return response.text
except Exception as e:
return f"Error interacting with Gemini API: {str(e)}"
client = genai.Client(api_key=api_key)
response = client.models.generate_content(
model=model_name,
contents=prompt,
)
return response.text

View File

@@ -0,0 +1,405 @@
"""
Incident Report API - Handles greenwashing report submissions
Uses structured outputs with Pydantic for reliable JSON responses
"""
import base64
import os
from datetime import datetime
from flask import Blueprint, request, jsonify
from google import genai
from pydantic import BaseModel, Field
from typing import List, Optional, Literal
from src.ollama.detector import OllamaLogoDetector
from src.chroma.vector_store import search_documents, insert_documents
from src.rag.embeddings import get_embedding
from src.mongo.connection import get_mongo_client
incidents_bp = Blueprint('incidents', __name__)
# Initialize detector lazily
_detector = None
def get_detector():
global _detector
if _detector is None:
_detector = OllamaLogoDetector()
return _detector
# ============= Pydantic Models for Structured Outputs =============
class GreenwashingAnalysis(BaseModel):
"""Structured output for greenwashing analysis"""
is_greenwashing: bool = Field(description="Whether this is a case of greenwashing")
confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of the analysis")
verdict: str = Field(description="Brief one-sentence verdict")
reasoning: str = Field(description="Detailed explanation of why this is or isn't greenwashing")
severity: Literal["high", "medium", "low"] = Field(description="Severity of the greenwashing if detected")
recommendations: str = Field(description="What consumers should know about this case")
key_claims: List[str] = Field(description="List of specific environmental claims made by the company")
red_flags: List[str] = Field(description="List of red flags or concerning practices identified")
class LogoDetection(BaseModel):
"""Structured output for logo detection from Ollama"""
brand: str = Field(description="The company or brand name detected")
confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of detection")
location: str = Field(description="Location in image (e.g., center, top-left)")
category: str = Field(description="Product category if identifiable")
class ImageAnalysis(BaseModel):
"""Structured output for full image analysis"""
logos_detected: List[LogoDetection] = Field(description="List of logos/brands detected in the image")
total_count: int = Field(description="Total number of logos detected")
description: str = Field(description="Brief description of what's in the image")
environmental_claims: List[str] = Field(description="Any environmental or eco-friendly claims visible in the image")
packaging_description: str = Field(description="Description of the product packaging and design")
# ============= Analysis Functions =============
GREENWASHING_ANALYSIS_PROMPT = """You are an expert at detecting greenwashing - misleading environmental claims by companies.
Analyze the following user-submitted report about a potential greenwashing incident:
PRODUCT/COMPANY: {product_name}
USER COMPLAINT: {user_description}
DETECTED BRAND FROM IMAGE: {detected_brand}
IMAGE DESCRIPTION: {image_description}
RELEVANT CONTEXT FROM OUR DATABASE:
{context}
Based on this information, determine if this is a valid case of greenwashing. Consider:
1. Does the company have a history of misleading environmental claims?
2. Are their eco-friendly claims vague or unsubstantiated?
3. Is there a disconnect between their marketing and actual practices?
4. Are they using green imagery or terms without substance?
Provide your analysis in the structured format requested."""
def analyze_with_gemini(product_name: str, user_description: str, detected_brand: str,
image_description: str, context: str) -> GreenwashingAnalysis:
"""Send analysis request to Gemini with structured output"""
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY not set")
prompt = GREENWASHING_ANALYSIS_PROMPT.format(
product_name=product_name,
user_description=user_description,
detected_brand=detected_brand,
image_description=image_description,
context=context
)
client = genai.Client(api_key=api_key)
# Use structured output with Pydantic schema
response = client.models.generate_content(
model="gemini-3-flash-preview",
contents=prompt,
config={
"response_mime_type": "application/json",
"response_json_schema": GreenwashingAnalysis.model_json_schema(),
}
)
# Validate and parse the response
analysis = GreenwashingAnalysis.model_validate_json(response.text)
return analysis
def analyze_image_with_ollama(image_bytes: bytes) -> ImageAnalysis:
"""Analyze image using Ollama with structured output"""
try:
import ollama
client = ollama.Client(host="https://ollama.sirblob.co")
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
prompt = """Analyze this image for a greenwashing detection system.
Identify:
1. All visible logos, brand names, and company names
2. Any environmental or eco-friendly claims (text, symbols, certifications)
3. Describe the packaging design and any "green" visual elements
Respond with structured JSON matching the schema provided."""
response = client.chat(
model="ministral-3:latest",
messages=[{
'role': 'user',
'content': prompt,
'images': [image_base64],
}],
format=ImageAnalysis.model_json_schema(),
options={'temperature': 0.1}
)
# Validate and parse
analysis = ImageAnalysis.model_validate_json(response['message']['content'])
return analysis
except Exception as e:
print(f"Ollama structured analysis failed: {e}")
# Fall back to basic detection
detector = get_detector()
result = detector.detect_from_bytes(image_bytes)
# Convert to structured format
logos = []
for logo in result.get('logos_detected', []):
logos.append(LogoDetection(
brand=logo.get('brand', 'Unknown'),
confidence=logo.get('confidence', 'low'),
location=logo.get('location', 'unknown'),
category=logo.get('category', 'unknown')
))
return ImageAnalysis(
logos_detected=logos,
total_count=result.get('total_count', 0),
description=result.get('description', 'No description available'),
environmental_claims=[],
packaging_description=""
)
def save_to_mongodb(incident_data: dict) -> str:
"""Save incident to MongoDB and return the ID"""
client = get_mongo_client()
db = client["ethix"]
collection = db["incidents"]
result = collection.insert_one(incident_data)
return str(result.inserted_id)
def save_to_chromadb(incident_data: dict, incident_id: str):
"""Save incident as context for the chatbot"""
analysis = incident_data['analysis']
# Create a rich text representation of the incident
red_flags = "\n".join(f"- {flag}" for flag in analysis.get('red_flags', []))
key_claims = "\n".join(f"- {claim}" for claim in analysis.get('key_claims', []))
text = f"""GREENWASHING INCIDENT REPORT #{incident_id}
Date: {incident_data['created_at']}
Company/Product: {incident_data['product_name']} ({incident_data.get('detected_brand', 'Unknown brand')})
USER REPORT: {incident_data['user_description']}
ANALYSIS VERDICT: {analysis['verdict']}
Confidence: {analysis['confidence']}
Severity: {analysis['severity']}
DETAILED REASONING:
{analysis['reasoning']}
KEY ENVIRONMENTAL CLAIMS MADE:
{key_claims}
RED FLAGS IDENTIFIED:
{red_flags}
CONSUMER RECOMMENDATIONS:
{analysis['recommendations']}
"""
# Get embedding for the incident
embedding = get_embedding(text)
# Store in ChromaDB with metadata
metadata = {
"type": "incident_report",
"source": f"incident_{incident_id}",
"product_name": incident_data['product_name'],
"brand": incident_data.get('detected_brand', 'Unknown'),
"severity": analysis['severity'],
"confidence": analysis['confidence'],
"is_greenwashing": True,
"created_at": incident_data['created_at']
}
insert_documents(
texts=[text],
embeddings=[embedding],
metadata_list=[metadata]
)
# ============= API Endpoints =============
@incidents_bp.route('/submit', methods=['POST'])
def submit_incident():
"""
Submit a greenwashing incident report
Expects JSON with:
- product_name: Name of the product/company
- description: User's description of the misleading claim
- image: Base64 encoded image (optional, but recommended)
"""
data = request.json
if not data:
return jsonify({"error": "No data provided"}), 400
product_name = data.get('product_name', '').strip()
user_description = data.get('description', '').strip()
image_base64 = data.get('image') # Base64 encoded image
if not product_name:
return jsonify({"error": "Product name is required"}), 400
if not user_description:
return jsonify({"error": "Description is required"}), 400
try:
# Step 1: Analyze image with Ollama (structured output)
detected_brand = "Unknown"
image_description = "No image provided"
environmental_claims = []
if image_base64:
try:
# Remove data URL prefix if present
if ',' in image_base64:
image_base64 = image_base64.split(',')[1]
image_bytes = base64.b64decode(image_base64)
# Use structured image analysis
image_analysis = analyze_image_with_ollama(image_bytes)
if image_analysis.logos_detected:
detected_brand = image_analysis.logos_detected[0].brand
image_description = image_analysis.description
environmental_claims = image_analysis.environmental_claims
except Exception as e:
print(f"Image analysis error: {e}")
# Continue without image analysis
# Step 2: Get relevant context from vector database
search_query = f"{product_name} {detected_brand} environmental claims sustainability greenwashing"
query_embedding = get_embedding(search_query)
search_results = search_documents(query_embedding, num_results=5)
context = ""
for res in search_results:
context += f"--- Document ---\n{res['text'][:500]}\n\n"
if not context:
context = "No prior information found about this company in our database."
# Add environmental claims from image to context
if environmental_claims:
context += "\n--- Claims visible in submitted image ---\n"
context += "\n".join(f"- {claim}" for claim in environmental_claims)
# Step 3: Analyze with Gemini (structured output)
analysis = analyze_with_gemini(
product_name=product_name,
user_description=user_description,
detected_brand=detected_brand,
image_description=image_description,
context=context
)
# Convert Pydantic model to dict
analysis_dict = analysis.model_dump()
# Step 4: Prepare incident data
incident_data = {
"product_name": product_name,
"user_description": user_description,
"detected_brand": detected_brand,
"image_description": image_description,
"environmental_claims": environmental_claims,
"analysis": analysis_dict,
"is_greenwashing": analysis.is_greenwashing,
"created_at": datetime.utcnow().isoformat(),
"status": "confirmed" if analysis.is_greenwashing else "dismissed"
}
incident_id = None
# Step 5: If greenwashing detected, save to databases
if analysis.is_greenwashing:
# Save to MongoDB
incident_id = save_to_mongodb(incident_data)
# Save to ChromaDB for chatbot context
save_to_chromadb(incident_data, incident_id)
return jsonify({
"status": "success",
"is_greenwashing": analysis.is_greenwashing,
"incident_id": incident_id,
"analysis": analysis_dict,
"detected_brand": detected_brand,
"environmental_claims": environmental_claims
})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({
"status": "error",
"message": str(e)
}), 500
@incidents_bp.route('/list', methods=['GET'])
def list_incidents():
"""Get all confirmed greenwashing incidents"""
try:
client = get_mongo_client()
db = client["ethix"]
collection = db["incidents"]
# Get recent incidents with full analysis details
incidents = list(collection.find(
{"is_greenwashing": True},
{"_id": 1, "product_name": 1, "detected_brand": 1,
"user_description": 1, "analysis": 1, "created_at": 1}
).sort("created_at", -1).limit(50))
# Convert ObjectId to string
for inc in incidents:
inc["_id"] = str(inc["_id"])
return jsonify(incidents)
except Exception as e:
return jsonify({"error": str(e)}), 500
@incidents_bp.route('/<incident_id>', methods=['GET'])
def get_incident(incident_id):
"""Get a specific incident by ID"""
try:
from bson import ObjectId
client = get_mongo_client()
db = client["ethix"]
collection = db["incidents"]
incident = collection.find_one({"_id": ObjectId(incident_id)})
if not incident:
return jsonify({"error": "Incident not found"}), 404
incident["_id"] = str(incident["_id"])
return jsonify(incident)
except Exception as e:
return jsonify({"error": str(e)}), 500

View File

@@ -17,6 +17,11 @@ def get_reports():
filename = meta.get('source') or meta.get('filename')
if not filename:
continue
# Skip incident reports - these are user-submitted greenwashing reports
if meta.get('type') == 'incident_report' or filename.startswith('incident_'):
continue
if filename not in unique_reports:
# Attempt to extract info from filename