Database Viewer Update

2026-02-03 19:24:34 -05:00 · 2026-01-25 00:16:48 +00:00
parent d37d925150
commit bae861c71f
15 changed files with 1726 additions and 846 deletions
--- a/backend/src/init.py
+++ b/backend/src/init.py
@@ -13,5 +13,8 @@ def create_app():
    app.register_blueprint(gemini_bp, url_prefix='/api/gemini')
    from .routes.reports import reports_bp
    app.register_blueprint(reports_bp, url_prefix='/api/reports')
+    from .routes.incidents import incidents_bp
+    app.register_blueprint(incidents_bp, url_prefix='/api/incidents')
    
    return app
+
--- a/backend/src/gemini/init.py
+++ b/backend/src/gemini/init.py
@@ -23,7 +23,33 @@ Based on the context provided, give a final verdict:

 def ask(prompt):
    client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
-    return client.models.generate_content(model="gemini-2.0-flash", contents=prompt).text
+    return client.models.generate_content(model="gemini-3-flash-preview", contents=prompt).text
+
+def ask_gemini_with_rag(prompt, category=None):
+    """Ask Gemini with RAG context from the vector database."""
+    # Get embedding for the prompt
+    query_embedding = get_embedding(prompt)
+    
+    # Search for relevant documents
+    results = search_documents(query_embedding, num_results=5)
+    
+    # Build context from results
+    context = ""
+    for res in results:
+        context += f"--- Document ---\n{res['text']}\n\n"
+    
+    # Create full prompt with context
+    full_prompt = f"""You are a helpful sustainability assistant. Use the following context to answer the user's question.
+If the context doesn't contain relevant information, you can use your general knowledge but mention that.
+
+CONTEXT:
+{context}
+
+USER QUESTION: {prompt}
+
+Please provide a helpful and concise response."""
+
+    return ask(full_prompt)

 def analyze(query, query_embedding, num_results=5, num_alternatives=3):
    try:
--- a/backend/src/gemini/main.py
+++ b/backend/src/gemini/main.py
@@ -1,4 +0,0 @@
-from . import scan_and_analyze
-
-if __name__ == "__main__":
-    scan_and_analyze()
--- a/backend/src/gemini/client.py
+++ b/backend/src/gemini/client.py
@@ -3,15 +3,9 @@ import os

 def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
    api_key = os.environ.get("GOOGLE_API_KEY")
-    if not api_key:
-        return "Error: GOOGLE_API_KEY not found."
-
-    try:
-        client = genai.Client(api_key=api_key)
-        response = client.models.generate_content(
-            model=model_name,
-            contents=prompt,
-        )
-        return response.text
-    except Exception as e:
-        return f"Error interacting with Gemini API: {str(e)}"
+    client = genai.Client(api_key=api_key)
+    response = client.models.generate_content(
+        model=model_name,
+        contents=prompt,
+    )
+    return response.text
--- a/backend/src/gemini/reporting.py
+++ b/backend/src/gemini/reporting.py
--- a/backend/src/routes/incidents.py
+++ b/backend/src/routes/incidents.py
@@ -0,0 +1,405 @@
+"""
+Incident Report API - Handles greenwashing report submissions
+Uses structured outputs with Pydantic for reliable JSON responses
+"""
+import base64
+import os
+from datetime import datetime
+from flask import Blueprint, request, jsonify
+from google import genai
+from pydantic import BaseModel, Field
+from typing import List, Optional, Literal
+
+from src.ollama.detector import OllamaLogoDetector
+from src.chroma.vector_store import search_documents, insert_documents
+from src.rag.embeddings import get_embedding
+from src.mongo.connection import get_mongo_client
+
+incidents_bp = Blueprint('incidents', __name__)
+
+# Initialize detector lazily
+_detector = None
+
+def get_detector():
+    global _detector
+    if _detector is None:
+        _detector = OllamaLogoDetector()
+    return _detector
+
+
+# ============= Pydantic Models for Structured Outputs =============
+
+class GreenwashingAnalysis(BaseModel):
+    """Structured output for greenwashing analysis"""
+    is_greenwashing: bool = Field(description="Whether this is a case of greenwashing")
+    confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of the analysis")
+    verdict: str = Field(description="Brief one-sentence verdict")
+    reasoning: str = Field(description="Detailed explanation of why this is or isn't greenwashing")
+    severity: Literal["high", "medium", "low"] = Field(description="Severity of the greenwashing if detected")
+    recommendations: str = Field(description="What consumers should know about this case")
+    key_claims: List[str] = Field(description="List of specific environmental claims made by the company")
+    red_flags: List[str] = Field(description="List of red flags or concerning practices identified")
+
+
+class LogoDetection(BaseModel):
+    """Structured output for logo detection from Ollama"""
+    brand: str = Field(description="The company or brand name detected")
+    confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of detection")
+    location: str = Field(description="Location in image (e.g., center, top-left)")
+    category: str = Field(description="Product category if identifiable")
+
+
+class ImageAnalysis(BaseModel):
+    """Structured output for full image analysis"""
+    logos_detected: List[LogoDetection] = Field(description="List of logos/brands detected in the image")
+    total_count: int = Field(description="Total number of logos detected")
+    description: str = Field(description="Brief description of what's in the image")
+    environmental_claims: List[str] = Field(description="Any environmental or eco-friendly claims visible in the image")
+    packaging_description: str = Field(description="Description of the product packaging and design")
+
+
+# ============= Analysis Functions =============
+
+GREENWASHING_ANALYSIS_PROMPT = """You are an expert at detecting greenwashing - misleading environmental claims by companies.
+
+Analyze the following user-submitted report about a potential greenwashing incident:
+
+PRODUCT/COMPANY: {product_name}
+USER COMPLAINT: {user_description}
+DETECTED BRAND FROM IMAGE: {detected_brand}
+IMAGE DESCRIPTION: {image_description}
+
+RELEVANT CONTEXT FROM OUR DATABASE:
+{context}
+
+Based on this information, determine if this is a valid case of greenwashing. Consider:
+1. Does the company have a history of misleading environmental claims?
+2. Are their eco-friendly claims vague or unsubstantiated?
+3. Is there a disconnect between their marketing and actual practices?
+4. Are they using green imagery or terms without substance?
+
+Provide your analysis in the structured format requested."""
+
+
+def analyze_with_gemini(product_name: str, user_description: str, detected_brand: str, 
+                        image_description: str, context: str) -> GreenwashingAnalysis:
+    """Send analysis request to Gemini with structured output"""
+    api_key = os.environ.get("GOOGLE_API_KEY")
+    if not api_key:
+        raise ValueError("GOOGLE_API_KEY not set")
+    
+    prompt = GREENWASHING_ANALYSIS_PROMPT.format(
+        product_name=product_name,
+        user_description=user_description,
+        detected_brand=detected_brand,
+        image_description=image_description,
+        context=context
+    )
+    
+    client = genai.Client(api_key=api_key)
+    
+    # Use structured output with Pydantic schema
+    response = client.models.generate_content(
+        model="gemini-3-flash-preview",
+        contents=prompt,
+        config={
+            "response_mime_type": "application/json",
+            "response_json_schema": GreenwashingAnalysis.model_json_schema(),
+        }
+    )
+    
+    # Validate and parse the response
+    analysis = GreenwashingAnalysis.model_validate_json(response.text)
+    return analysis
+
+
+def analyze_image_with_ollama(image_bytes: bytes) -> ImageAnalysis:
+    """Analyze image using Ollama with structured output"""
+    try:
+        import ollama
+        
+        client = ollama.Client(host="https://ollama.sirblob.co")
+        
+        image_base64 = base64.b64encode(image_bytes).decode('utf-8')
+        
+        prompt = """Analyze this image for a greenwashing detection system.
+        
+Identify:
+1. All visible logos, brand names, and company names
+2. Any environmental or eco-friendly claims (text, symbols, certifications)
+3. Describe the packaging design and any "green" visual elements
+
+Respond with structured JSON matching the schema provided."""
+
+        response = client.chat(
+            model="ministral-3:latest",
+            messages=[{
+                'role': 'user',
+                'content': prompt,
+                'images': [image_base64],
+            }],
+            format=ImageAnalysis.model_json_schema(),
+            options={'temperature': 0.1}
+        )
+        
+        # Validate and parse
+        analysis = ImageAnalysis.model_validate_json(response['message']['content'])
+        return analysis
+        
+    except Exception as e:
+        print(f"Ollama structured analysis failed: {e}")
+        # Fall back to basic detection
+        detector = get_detector()
+        result = detector.detect_from_bytes(image_bytes)
+        
+        # Convert to structured format
+        logos = []
+        for logo in result.get('logos_detected', []):
+            logos.append(LogoDetection(
+                brand=logo.get('brand', 'Unknown'),
+                confidence=logo.get('confidence', 'low'),
+                location=logo.get('location', 'unknown'),
+                category=logo.get('category', 'unknown')
+            ))
+        
+        return ImageAnalysis(
+            logos_detected=logos,
+            total_count=result.get('total_count', 0),
+            description=result.get('description', 'No description available'),
+            environmental_claims=[],
+            packaging_description=""
+        )
+
+
+def save_to_mongodb(incident_data: dict) -> str:
+    """Save incident to MongoDB and return the ID"""
+    client = get_mongo_client()
+    db = client["ethix"]
+    collection = db["incidents"]
+    
+    result = collection.insert_one(incident_data)
+    return str(result.inserted_id)
+
+
+def save_to_chromadb(incident_data: dict, incident_id: str):
+    """Save incident as context for the chatbot"""
+    analysis = incident_data['analysis']
+    
+    # Create a rich text representation of the incident
+    red_flags = "\n".join(f"- {flag}" for flag in analysis.get('red_flags', []))
+    key_claims = "\n".join(f"- {claim}" for claim in analysis.get('key_claims', []))
+    
+    text = f"""GREENWASHING INCIDENT REPORT #{incident_id}
+Date: {incident_data['created_at']}
+Company/Product: {incident_data['product_name']} ({incident_data.get('detected_brand', 'Unknown brand')})
+
+USER REPORT: {incident_data['user_description']}
+
+ANALYSIS VERDICT: {analysis['verdict']}
+Confidence: {analysis['confidence']}
+Severity: {analysis['severity']}
+
+DETAILED REASONING:
+{analysis['reasoning']}
+
+KEY ENVIRONMENTAL CLAIMS MADE:
+{key_claims}
+
+RED FLAGS IDENTIFIED:
+{red_flags}
+
+CONSUMER RECOMMENDATIONS:
+{analysis['recommendations']}
+"""
+    
+    # Get embedding for the incident
+    embedding = get_embedding(text)
+    
+    # Store in ChromaDB with metadata
+    metadata = {
+        "type": "incident_report",
+        "source": f"incident_{incident_id}",
+        "product_name": incident_data['product_name'],
+        "brand": incident_data.get('detected_brand', 'Unknown'),
+        "severity": analysis['severity'],
+        "confidence": analysis['confidence'],
+        "is_greenwashing": True,
+        "created_at": incident_data['created_at']
+    }
+    
+    insert_documents(
+        texts=[text],
+        embeddings=[embedding],
+        metadata_list=[metadata]
+    )
+
+
+# ============= API Endpoints =============
+
+@incidents_bp.route('/submit', methods=['POST'])
+def submit_incident():
+    """
+    Submit a greenwashing incident report
+    
+    Expects JSON with:
+    - product_name: Name of the product/company
+    - description: User's description of the misleading claim
+    - image: Base64 encoded image (optional, but recommended)
+    """
+    data = request.json
+    
+    if not data:
+        return jsonify({"error": "No data provided"}), 400
+    
+    product_name = data.get('product_name', '').strip()
+    user_description = data.get('description', '').strip()
+    image_base64 = data.get('image')  # Base64 encoded image
+    
+    if not product_name:
+        return jsonify({"error": "Product name is required"}), 400
+    
+    if not user_description:
+        return jsonify({"error": "Description is required"}), 400
+    
+    try:
+        # Step 1: Analyze image with Ollama (structured output)
+        detected_brand = "Unknown"
+        image_description = "No image provided"
+        environmental_claims = []
+        
+        if image_base64:
+            try:
+                # Remove data URL prefix if present
+                if ',' in image_base64:
+                    image_base64 = image_base64.split(',')[1]
+                
+                image_bytes = base64.b64decode(image_base64)
+                
+                # Use structured image analysis
+                image_analysis = analyze_image_with_ollama(image_bytes)
+                
+                if image_analysis.logos_detected:
+                    detected_brand = image_analysis.logos_detected[0].brand
+                
+                image_description = image_analysis.description
+                environmental_claims = image_analysis.environmental_claims
+                
+            except Exception as e:
+                print(f"Image analysis error: {e}")
+                # Continue without image analysis
+        
+        # Step 2: Get relevant context from vector database
+        search_query = f"{product_name} {detected_brand} environmental claims sustainability greenwashing"
+        query_embedding = get_embedding(search_query)
+        search_results = search_documents(query_embedding, num_results=5)
+        
+        context = ""
+        for res in search_results:
+            context += f"--- Document ---\n{res['text'][:500]}\n\n"
+        
+        if not context:
+            context = "No prior information found about this company in our database."
+        
+        # Add environmental claims from image to context
+        if environmental_claims:
+            context += "\n--- Claims visible in submitted image ---\n"
+            context += "\n".join(f"- {claim}" for claim in environmental_claims)
+        
+        # Step 3: Analyze with Gemini (structured output)
+        analysis = analyze_with_gemini(
+            product_name=product_name,
+            user_description=user_description,
+            detected_brand=detected_brand,
+            image_description=image_description,
+            context=context
+        )
+        
+        # Convert Pydantic model to dict
+        analysis_dict = analysis.model_dump()
+        
+        # Step 4: Prepare incident data
+        incident_data = {
+            "product_name": product_name,
+            "user_description": user_description,
+            "detected_brand": detected_brand,
+            "image_description": image_description,
+            "environmental_claims": environmental_claims,
+            "analysis": analysis_dict,
+            "is_greenwashing": analysis.is_greenwashing,
+            "created_at": datetime.utcnow().isoformat(),
+            "status": "confirmed" if analysis.is_greenwashing else "dismissed"
+        }
+        
+        incident_id = None
+        
+        # Step 5: If greenwashing detected, save to databases
+        if analysis.is_greenwashing:
+            # Save to MongoDB
+            incident_id = save_to_mongodb(incident_data)
+            
+            # Save to ChromaDB for chatbot context
+            save_to_chromadb(incident_data, incident_id)
+        
+        return jsonify({
+            "status": "success",
+            "is_greenwashing": analysis.is_greenwashing,
+            "incident_id": incident_id,
+            "analysis": analysis_dict,
+            "detected_brand": detected_brand,
+            "environmental_claims": environmental_claims
+        })
+        
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return jsonify({
+            "status": "error",
+            "message": str(e)
+        }), 500
+
+
+@incidents_bp.route('/list', methods=['GET'])
+def list_incidents():
+    """Get all confirmed greenwashing incidents"""
+    try:
+        client = get_mongo_client()
+        db = client["ethix"]
+        collection = db["incidents"]
+        
+        # Get recent incidents with full analysis details
+        incidents = list(collection.find(
+            {"is_greenwashing": True},
+            {"_id": 1, "product_name": 1, "detected_brand": 1, 
+             "user_description": 1, "analysis": 1, "created_at": 1}
+        ).sort("created_at", -1).limit(50))
+        
+        # Convert ObjectId to string
+        for inc in incidents:
+            inc["_id"] = str(inc["_id"])
+        
+        return jsonify(incidents)
+        
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+@incidents_bp.route('/<incident_id>', methods=['GET'])
+def get_incident(incident_id):
+    """Get a specific incident by ID"""
+    try:
+        from bson import ObjectId
+        
+        client = get_mongo_client()
+        db = client["ethix"]
+        collection = db["incidents"]
+        
+        incident = collection.find_one({"_id": ObjectId(incident_id)})
+        
+        if not incident:
+            return jsonify({"error": "Incident not found"}), 404
+        
+        incident["_id"] = str(incident["_id"])
+        return jsonify(incident)
+        
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
--- a/backend/src/routes/reports.py
+++ b/backend/src/routes/reports.py
@@ -17,6 +17,11 @@ def get_reports():
            filename = meta.get('source') or meta.get('filename')
            if not filename:
                continue
+            
+            # Skip incident reports - these are user-submitted greenwashing reports
+            if meta.get('type') == 'incident_report' or filename.startswith('incident_'):
+                continue
+                
                
            if filename not in unique_reports:
                # Attempt to extract info from filename