mirror of
https://github.com/SirBlobby/Hoya26.git
synced 2026-02-03 19:24:34 -05:00
156 lines
5.2 KiB
Python
156 lines
5.2 KiB
Python
import os
|
|
from google import genai
|
|
from src .chroma .vector_store import search_documents
|
|
from src .rag .embeddings import get_embedding
|
|
|
|
GREENWASHING_ANALYSIS_PROMPT = """
|
|
You are a Forensics ESG Analyst specialized in detecting Greenwashing.
|
|
Your objective is to audit the provided company report excerpts and expose any misleading sustainability claims.
|
|
|
|
### DEFINITION
|
|
Greenwashing: The practice of making unsubstantiated or misleading claims about the environmental benefits of a product, service, or practice.
|
|
|
|
### ANALYSIS FRAMEWORK
|
|
Analyze the provided text against these criteria:
|
|
1. **Vague Terminology**: Usage of buzzwords ("eco-friendly", "green", "sustainable") without quantifiable definitions.
|
|
2. **No Proof**: Claims lacking specific metrics (e.g., "reduced emissions" vs "reduced CO2 by 15% compared to 2020 baseline").
|
|
3. **Hidden Trade-offs**: Emphasizing a minor eco-feature while ignoring major negative impacts (e.g., recyclable packaging on a toxic product).
|
|
4. **Irrelevance**: Citing standard compliance (legal requirements) as proactive sustainability achievements.
|
|
|
|
### OUTPUT FORMAT
|
|
Provide a structured analysis:
|
|
|
|
**Verdict**: [LOW RISK / MODERATE RISK / HIGH RISK / CONFIRMED GREENWASHING]
|
|
|
|
**Key Findings**:
|
|
* [Finding 1]: [Explanation]
|
|
* [Finding 2]: [Explanation]
|
|
|
|
**Evidence**:
|
|
* "[Quote from text]" -> *Critique of why this is problematic or good.*
|
|
|
|
**Conclusion**:
|
|
A brief 1-2 sentence summary of the brand's honesty regarding this topic.
|
|
"""
|
|
|
|
def ask (prompt ):
|
|
client =genai .Client (api_key =os .environ .get ("GOOGLE_API_KEY"))
|
|
return client .models .generate_content (model ="gemini-3-pro-preview",contents =prompt ).text
|
|
|
|
def ask_gemini_with_rag (prompt ,category =None ):
|
|
"""Ask Gemini with RAG context from the vector database."""
|
|
|
|
query_embedding =get_embedding (prompt )
|
|
|
|
|
|
results =search_documents (query_embedding ,num_results =5 )
|
|
|
|
|
|
context =""
|
|
for res in results :
|
|
context +=f"--- Document ---\n{res ['text']}\n\n"
|
|
|
|
|
|
full_prompt =f"""You are a helpful sustainability assistant. Use the following context to answer the user's question.
|
|
If the context doesn't contain relevant information, you can use your general knowledge but mention that.
|
|
|
|
CONTEXT:
|
|
{context }
|
|
|
|
USER QUESTION: {prompt }
|
|
|
|
Please provide a helpful and concise response."""
|
|
|
|
return ask (full_prompt )
|
|
|
|
def analyze (query ,query_embedding ,num_results =5 ,num_alternatives =3 ):
|
|
try :
|
|
results =search_documents (query_embedding ,num_results =num_results +num_alternatives +5 )
|
|
except Exception as e :
|
|
print (f"Chroma error: {e }")
|
|
results =[]
|
|
|
|
if not results :
|
|
context ="No data found in database for this brand."
|
|
else :
|
|
context ="--- START OF REPORT CONTEXT ---\n"
|
|
for res in results [:num_results ]:
|
|
context +=f"RELEVANT DATA CHUNK: {res ['text']}\n\n"
|
|
context +="--- END OF REPORT CONTEXT ---\n"
|
|
|
|
full_prompt =f"{GREENWASHING_ANALYSIS_PROMPT }\n\n{context }\n\nUSER QUERY/COMPANY FOCUS: {query }"
|
|
analysis_text =ask (full_prompt )
|
|
|
|
alternatives =[]
|
|
seen_texts =set ()
|
|
for res in results [num_results :]:
|
|
text_preview =res ['text'][:200 ]
|
|
if text_preview not in seen_texts :
|
|
seen_texts .add (text_preview )
|
|
alternatives .append ({"text":res ['text'],"score":res .get ('score'),"summary":text_preview })
|
|
if len (alternatives )>=num_alternatives :
|
|
break
|
|
|
|
return {"analysis":analysis_text ,"alternatives":alternatives }
|
|
|
|
def ask_gemini_with_rag (query ,category =None ,num_results =5 ):
|
|
embedding =get_embedding (query )
|
|
result =analyze (query ,embedding ,num_results =num_results )
|
|
return result ["analysis"]
|
|
|
|
def analyze_brand (brand_name ):
|
|
print (f"\n{'='*60 }")
|
|
print (f"Analyzing brand: {brand_name }")
|
|
print ('='*60 )
|
|
|
|
try :
|
|
print ("\n[1/3] Getting embedding for brand...")
|
|
embedding =get_embedding (brand_name )
|
|
|
|
print ("[2/3] Querying Chroma database...")
|
|
result =analyze (brand_name ,embedding )
|
|
|
|
print ("[3/3] Gemini Analysis Complete!\n")
|
|
print ("-"*60 )
|
|
print ("ANALYSIS:")
|
|
print ("-"*60 )
|
|
print (result ["analysis"])
|
|
|
|
print ("\n"+"-"*60 )
|
|
print ("ALTERNATIVES FROM DATABASE:")
|
|
print ("-"*60 )
|
|
if result ["alternatives"]:
|
|
for i ,alt in enumerate (result ["alternatives"],1 ):
|
|
print (f"\n{i }. {alt ['summary']}...")
|
|
else :
|
|
print ("No alternatives found in database.")
|
|
|
|
print ("\n"+"="*60 )
|
|
return result
|
|
except Exception as e :
|
|
print (f"\nError during analysis: {e }")
|
|
return None
|
|
|
|
def scan_and_analyze ():
|
|
from src .cv .scanner import capture_and_analyze as cv_capture
|
|
|
|
print ("\n"+"="*60 )
|
|
print ("CV + Gemini Greenwashing Scanner")
|
|
print ("="*60 )
|
|
print ("Using camera to detect brands...")
|
|
|
|
cv_result =cv_capture ()
|
|
|
|
logos =cv_result .get ("logos_detected",[])
|
|
if not logos :
|
|
print ("No brands detected. Try again!")
|
|
return None
|
|
|
|
brand =logos [0 ].get ("brand","Unknown")
|
|
print (f"\nDetected brand: {brand }")
|
|
|
|
return analyze_brand (brand )
|
|
|
|
if __name__ =="__main__":
|
|
scan_and_analyze ()
|