Hoya26/backend/src/gemini/__init__.py

import os
from google import genai
from src .chroma .vector_store import search_documents
from src .rag .embeddings import get_embedding

GREENWASHING_ANALYSIS_PROMPT = """
You are a Forensics ESG Analyst specialized in detecting Greenwashing.
Your objective is to audit the provided company report excerpts and expose any misleading sustainability claims.

### DEFINITION
Greenwashing: The practice of making unsubstantiated or misleading claims about the environmental benefits of a product, service, or practice.

### ANALYSIS FRAMEWORK
Analyze the provided text against these criteria:
1.  **Vague Terminology**: Usage of buzzwords ("eco-friendly", "green", "sustainable") without quantifiable definitions.
2.  **No Proof**: Claims lacking specific metrics (e.g., "reduced emissions" vs "reduced CO2 by 15% compared to 2020 baseline").
3.  **Hidden Trade-offs**: Emphasizing a minor eco-feature while ignoring major negative impacts (e.g., recyclable packaging on a toxic product).
4.  **Irrelevance**: Citing standard compliance (legal requirements) as proactive sustainability achievements.

### OUTPUT FORMAT
Provide a structured analysis:

**Verdict**: [LOW RISK / MODERATE RISK / HIGH RISK / CONFIRMED GREENWASHING]

**Key Findings**:
*   [Finding 1]: [Explanation]
*   [Finding 2]: [Explanation]

**Evidence**:
*   "[Quote from text]" -> *Critique of why this is problematic or good.*

**Conclusion**:
A brief 1-2 sentence summary of the brand's honesty regarding this topic.
"""

def ask (prompt ):
    client =genai .Client (api_key =os .environ .get ("GOOGLE_API_KEY"))
    return client .models .generate_content (model ="gemini-3-pro-preview",contents =prompt ).text

def ask_gemini_with_rag (prompt ,category =None ):
    """Ask Gemini with RAG context from the vector database."""

    query_embedding =get_embedding (prompt )


    results =search_documents (query_embedding ,num_results =5 )


    context =""
    for res in results :
        context +=f"--- Document ---\n{res ['text']}\n\n"


    full_prompt =f"""You are a helpful sustainability assistant. Use the following context to answer the user's question.
If the context doesn't contain relevant information, you can use your general knowledge but mention that.

CONTEXT:
{context }

USER QUESTION: {prompt }

Please provide a helpful and concise response."""

    return ask (full_prompt )

def analyze (query ,query_embedding ,num_results =5 ,num_alternatives =3 ):
    try :
        results =search_documents (query_embedding ,num_results =num_results +num_alternatives +5 )
    except Exception as e :
        print (f"Chroma error: {e }")
        results =[]

    if not results :
        context ="No data found in database for this brand."
    else :
        context ="--- START OF REPORT CONTEXT ---\n"
        for res in results [:num_results ]:
            context +=f"RELEVANT DATA CHUNK: {res ['text']}\n\n"
        context +="--- END OF REPORT CONTEXT ---\n"

    full_prompt =f"{GREENWASHING_ANALYSIS_PROMPT }\n\n{context }\n\nUSER QUERY/COMPANY FOCUS: {query }"
    analysis_text =ask (full_prompt )

    alternatives =[]
    seen_texts =set ()
    for res in results [num_results :]:
        text_preview =res ['text'][:200 ]
        if text_preview not in seen_texts :
            seen_texts .add (text_preview )
            alternatives .append ({"text":res ['text'],"score":res .get ('score'),"summary":text_preview })
        if len (alternatives )>=num_alternatives :
            break

    return {"analysis":analysis_text ,"alternatives":alternatives }

def ask_gemini_with_rag (query ,category =None ,num_results =5 ):
    embedding =get_embedding (query )
    result =analyze (query ,embedding ,num_results =num_results )
    return result ["analysis"]

def analyze_brand (brand_name ):
    print (f"\n{'='*60 }")
    print (f"Analyzing brand: {brand_name }")
    print ('='*60 )

    try :
        print ("\n[1/3] Getting embedding for brand...")
        embedding =get_embedding (brand_name )

        print ("[2/3] Querying Chroma database...")
        result =analyze (brand_name ,embedding )

        print ("[3/3] Gemini Analysis Complete!\n")
        print ("-"*60 )
        print ("ANALYSIS:")
        print ("-"*60 )
        print (result ["analysis"])

        print ("\n"+"-"*60 )
        print ("ALTERNATIVES FROM DATABASE:")
        print ("-"*60 )
        if result ["alternatives"]:
            for i ,alt in enumerate (result ["alternatives"],1 ):
                print (f"\n{i }. {alt ['summary']}...")
        else :
            print ("No alternatives found in database.")

        print ("\n"+"="*60 )
        return result
    except Exception as e :
        print (f"\nError during analysis: {e }")
        return None

def scan_and_analyze ():
    from src .cv .scanner import capture_and_analyze as cv_capture

    print ("\n"+"="*60 )
    print ("CV + Gemini Greenwashing Scanner")
    print ("="*60 )
    print ("Using camera to detect brands...")

    cv_result =cv_capture ()

    logos =cv_result .get ("logos_detected",[])
    if not logos :
        print ("No brands detected. Try again!")
        return None

    brand =logos [0 ].get ("brand","Unknown")
    print (f"\nDetected brand: {brand }")

    return analyze_brand (brand )

if __name__ =="__main__":
    scan_and_analyze ()