AI Prompt and RAG improvements

This commit is contained in:
2026-01-26 12:34:00 +00:00
parent 001ff97518
commit f941d8342b
3 changed files with 66 additions and 32 deletions

View File

@@ -1,5 +1,6 @@
import chromadb
import os
import hashlib
CHROMA_HOST = os.environ.get("CHROMA_HOST", "http://chroma.sirblob.co")
COLLECTION_NAME = "rag_documents"
@@ -16,26 +17,38 @@ def get_collection (collection_name =COLLECTION_NAME ):
client =get_chroma_client ()
return client .get_or_create_collection (name =collection_name )
def insert_documents (texts ,embeddings ,collection_name =COLLECTION_NAME ,metadata_list =None ):
collection =get_collection (collection_name )
def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
collection = get_collection(collection_name)
ids =[f"doc_{i }_{hash (text )}"for i ,text in enumerate (texts )]
# Generate stable IDs using SHA256
ids = [f"doc_{i}_{hashlib.sha256(text.encode('utf-8')).hexdigest()[:16]}" for i, text in enumerate(texts)]
if metadata_list :
collection .add (
ids =ids ,
embeddings =embeddings ,
documents =texts ,
metadatas =metadata_list
)
else :
collection .add (
ids =ids ,
embeddings =embeddings ,
documents =texts
)
total_count = len(texts)
batch_size = 2000
return len (texts )
for i in range(0, total_count, batch_size):
end = min(i + batch_size, total_count)
batch_ids = ids[i:end]
batch_texts = texts[i:end]
batch_embeddings = embeddings[i:end]
batch_metadatas = metadata_list[i:end] if metadata_list else None
if batch_metadatas:
collection.add(
ids=batch_ids,
embeddings=batch_embeddings,
documents=batch_texts,
metadatas=batch_metadatas
)
else:
collection.add(
ids=batch_ids,
embeddings=batch_embeddings,
documents=batch_texts
)
return total_count
def search_documents (query_embedding ,collection_name =COLLECTION_NAME ,num_results =5 ,filter_metadata =None ):
collection =get_collection (collection_name )

View File

@@ -3,22 +3,34 @@ from google import genai
from src .chroma .vector_store import search_documents
from src .rag .embeddings import get_embedding
GREENWASHING_ANALYSIS_PROMPT ="""
You are an expert Environmental, Social, and Governance (ESG) Analyst specialized in detecting 'Greenwashing'.
Your task is to analyze the provided context from a company's data reports and determine if they are engaging in greenwashing.
GREENWASHING_ANALYSIS_PROMPT = """
You are a Forensics ESG Analyst specialized in detecting Greenwashing.
Your objective is to audit the provided company report excerpts and expose any misleading sustainability claims.
Greenwashing is defined as making misleading or unsubstantiated claims about the environmental benefits of a product, service, or company practice.
### DEFINITION
Greenwashing: The practice of making unsubstantiated or misleading claims about the environmental benefits of a product, service, or practice.
Please evaluate the following:
1. Vague Claims: Are they using broad terms like 'eco-friendly' without specific details?
2. Lack of Proof: Are claims backed by data, third-party certifications, or specific metrics?
3. Hidden Trade-offs: Do they highlight one green act while ignoring a much larger environmental harm?
4. Symbolic Actions: Are they focusing on minor changes while their core business remains highly polluting?
### ANALYSIS FRAMEWORK
Analyze the provided text against these criteria:
1. **Vague Terminology**: Usage of buzzwords ("eco-friendly", "green", "sustainable") without quantifiable definitions.
2. **No Proof**: Claims lacking specific metrics (e.g., "reduced emissions" vs "reduced CO2 by 15% compared to 2020 baseline").
3. **Hidden Trade-offs**: Emphasizing a minor eco-feature while ignoring major negative impacts (e.g., recyclable packaging on a toxic product).
4. **Irrelevance**: Citing standard compliance (legal requirements) as proactive sustainability achievements.
Based on the context provided, give a final verdict:
- VERDICT: [Clear/Suspect/High Risk of Greenwashing]
- REASONING: [Explain your findings clearly]
- EVIDENCE: [Quote specific parts of the context if possible]
### OUTPUT FORMAT
Provide a structured analysis:
**Verdict**: [LOW RISK / MODERATE RISK / HIGH RISK / CONFIRMED GREENWASHING]
**Key Findings**:
* [Finding 1]: [Explanation]
* [Finding 2]: [Explanation]
**Evidence**:
* "[Quote from text]" -> *Critique of why this is problematic or good.*
**Conclusion**:
A brief 1-2 sentence summary of the brand's honesty regarding this topic.
"""
def ask (prompt ):

View File

@@ -13,7 +13,7 @@ class GeminiClient :
raise ValueError ("No GOOGLE_API_KEY found in .env file!")
self .client =genai .Client (api_key =self .api_key )
self .model_name ="gemini-2.0-flash"
self .model_name ="gemini-3-pro-preview"
def ask (self ,prompt ,context =""):
try :
@@ -26,7 +26,16 @@ class GeminiClient :
model =self .model_name ,
contents =full_message ,
config ={
'system_instruction':'You are Ethix, an expert sustainability assistant. You have access to a database including Georgetown University sustainability reports. SEARCH THE PROVIDED CONTEXT CAREFULLY. If the context contains ANY information about Georgetown University or the user\'s query, matches, or partial matches, YOU MUST USE IT to answer. Ignore irrelevant parts of the context. If no context matches, provide general expert advice. Keep responses concise (max 6 sentences).'
'system_instruction': (
"You are Ethix, an expert sustainability AI assistant. "
"Your mission is to analyze sustainability reports and provide data-driven insights. "
"You have access to a database of ESG reports (including Georgetown University). "
"CRITICAL INSTRUCTIONS: "
"1. PRIORITIZE CONTEXT: If the provided background context contains relevant data, YOU MUST USE IT and explicitly cite it. "
"2. DETECT GREENWASHING: Be vigilant for vague buzzwords ('eco-friendly', 'green') lacking specific metrics. Flag them. "
"3. ACCURACY: Do not hallucinate data. If the context is empty or irrelevant to the query, state 'I don't have specific data on this in my current reports' before offering general expert knowledge. "
"4. FORMATTING: Use Markdown. Keep answers professional, concise, and structured."
)
}
)
return response .text