Restore code and save recent updates

This commit is contained in:
2026-01-25 03:31:01 +00:00
parent bae861c71f
commit 5ce0b4d278
54 changed files with 2963 additions and 2899 deletions

View File

@@ -1,24 +1,24 @@
# Use a lightweight Python image
FROM python:3.9-slim
# Set working directory inside the container
WORKDIR /app
# Copy requirements first (for better caching)
COPY requirements.txt .
# Install dependencies
# 'gunicorn' must be in your requirements.txt or installed here
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install gunicorn
# Copy the rest of the application
COPY . .
# Expose the internal port (Gunicorn default is 8000, or we choose one)
EXPOSE 5000
# Command to run production server
# -w 4: 4 worker processes
# -b 0.0.0.0:5000: Bind to all interfaces inside container on port 5000
CMD ["gunicorn", "--workers", "4", "--bind", "0.0.0.0:5000", "app:app"]

View File

@@ -1,7 +1,7 @@
import os
from src import create_app
import os
from src import create_app
app = create_app()
app =create_app ()
if __name__ == "__main__":
app.run(debug=True, port=5000, host="0.0.0.0")
if __name__ =="__main__":
app .run (debug =True ,port =5000 ,host ="0.0.0.0")

View File

@@ -1,7 +1,7 @@
flask
gunicorn
ultralytics
opencv-python
opencv-python-headless
transformers
torch
pandas

View File

@@ -1,62 +1,62 @@
import os
import sys
import argparse
from pathlib import Path
import os
import sys
import argparse
from pathlib import Path
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
sys .path .append (os .path .join (os .path .dirname (__file__ ),'..'))
from dotenv import load_dotenv
load_dotenv()
from dotenv import load_dotenv
load_dotenv ()
from src.rag.ingest import process_file
from src.rag.store import ingest_documents
from src.mongo.metadata import is_file_processed, log_processed_file
from src .rag .ingest import process_file
from src .rag .store import ingest_documents
from src .mongo .metadata import is_file_processed ,log_processed_file
def populate_from_dataset(dataset_dir, category=None):
dataset_path = Path(dataset_dir)
if not dataset_path.exists():
print(f"Dataset directory not found: {dataset_dir}")
return
def populate_from_dataset (dataset_dir ,category =None ):
dataset_path =Path (dataset_dir )
if not dataset_path .exists ():
print (f"Dataset directory not found: {dataset_dir }")
return
print(f"Scanning {dataset_dir}...")
if category:
print(f"Category: {category}")
total_chunks = 0
files_processed = 0
print (f"Scanning {dataset_dir }...")
if category :
print (f"Category: {category }")
for file_path in dataset_path.glob('*'):
if file_path.is_file() and file_path.suffix.lower() in ['.csv', '.pdf', '.txt', '.xlsx']:
if is_file_processed(file_path.name):
print(f"Skipping {file_path.name} (already processed)")
continue
total_chunks =0
files_processed =0
print(f"Processing {file_path.name}...")
try:
chunks = process_file(str(file_path))
if chunks:
count = ingest_documents(chunks, source_file=file_path.name, category=category)
print(f" Ingested {count} chunks.")
if count > 0:
log_processed_file(file_path.name, category=category, chunk_count=count)
total_chunks += count
files_processed += 1
else:
print(" No text found/extracted.")
except Exception as e:
print(f" Error processing file: {e}")
for file_path in dataset_path .glob ('*'):
if file_path .is_file ()and file_path .suffix .lower ()in ['.csv','.pdf','.txt','.xlsx']:
if is_file_processed (file_path .name ):
print (f"Skipping {file_path .name } (already processed)")
continue
print(f"\nFinished! Processed {files_processed} files. Total chunks ingested: {total_chunks}")
print (f"Processing {file_path .name }...")
try :
chunks =process_file (str (file_path ))
if chunks :
count =ingest_documents (chunks ,source_file =file_path .name ,category =category )
print (f" Ingested {count } chunks.")
if count >0 :
log_processed_file (file_path .name ,category =category ,chunk_count =count )
total_chunks +=count
files_processed +=1
else :
print (" No text found/extracted.")
except Exception as e :
print (f" Error processing file: {e }")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Populate vector database from dataset files")
parser.add_argument("--category", "-c", type=str, help="Category to assign to ingested documents")
parser.add_argument("--dir", "-d", type=str, default=None, help="Dataset directory path")
args = parser.parse_args()
if args.dir:
dataset_dir = args.dir
else:
dataset_dir = os.path.join(os.path.dirname(__file__), '../dataset')
populate_from_dataset(dataset_dir, category=args.category)
print (f"\nFinished! Processed {files_processed } files. Total chunks ingested: {total_chunks }")
if __name__ =="__main__":
parser =argparse .ArgumentParser (description ="Populate vector database from dataset files")
parser .add_argument ("--category","-c",type =str ,help ="Category to assign to ingested documents")
parser .add_argument ("--dir","-d",type =str ,default =None ,help ="Dataset directory path")
args =parser .parse_args ()
if args .dir :
dataset_dir =args .dir
else :
dataset_dir =os .path .join (os .path .dirname (__file__ ),'../dataset')
populate_from_dataset (dataset_dir ,category =args .category )

View File

@@ -1,20 +1,20 @@
from flask import Flask
from flask_cors import CORS
from .routes.main import main_bp
from .routes.rag import rag_bp
from .routes.gemini import gemini_bp
from flask import Flask
from flask_cors import CORS
from .routes .main import main_bp
from .routes .rag import rag_bp
from .routes .gemini import gemini_bp
def create_app():
app = Flask(__name__)
CORS(app)
app.register_blueprint(main_bp)
app.register_blueprint(rag_bp, url_prefix='/api/rag')
app.register_blueprint(gemini_bp, url_prefix='/api/gemini')
from .routes.reports import reports_bp
app.register_blueprint(reports_bp, url_prefix='/api/reports')
from .routes.incidents import incidents_bp
app.register_blueprint(incidents_bp, url_prefix='/api/incidents')
return app
def create_app ():
app =Flask (__name__ )
CORS (app )
app .register_blueprint (main_bp )
app .register_blueprint (rag_bp ,url_prefix ='/api/rag')
app .register_blueprint (gemini_bp ,url_prefix ='/api/gemini')
from .routes .reports import reports_bp
app .register_blueprint (reports_bp ,url_prefix ='/api/reports')
from .routes .incidents import incidents_bp
app .register_blueprint (incidents_bp ,url_prefix ='/api/incidents')
return app

View File

@@ -1,80 +1,80 @@
import chromadb
import chromadb
CHROMA_HOST = "http://chroma.sirblob.co"
COLLECTION_NAME = "rag_documents"
CHROMA_HOST ="http://chroma.sirblob.co"
COLLECTION_NAME ="rag_documents"
_client = None
_client =None
def get_chroma_client():
global _client
if _client is None:
_client = chromadb.HttpClient(host=CHROMA_HOST)
return _client
def get_chroma_client ():
global _client
if _client is None :
_client =chromadb .HttpClient (host =CHROMA_HOST )
return _client
def get_collection(collection_name=COLLECTION_NAME):
client = get_chroma_client()
return client.get_or_create_collection(name=collection_name)
def get_collection (collection_name =COLLECTION_NAME ):
client =get_chroma_client ()
return client .get_or_create_collection (name =collection_name )
def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
collection = get_collection(collection_name)
ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)]
if metadata_list:
collection.add(
ids=ids,
embeddings=embeddings,
documents=texts,
metadatas=metadata_list
def insert_documents (texts ,embeddings ,collection_name =COLLECTION_NAME ,metadata_list =None ):
collection =get_collection (collection_name )
ids =[f"doc_{i }_{hash (text )}"for i ,text in enumerate (texts )]
if metadata_list :
collection .add (
ids =ids ,
embeddings =embeddings ,
documents =texts ,
metadatas =metadata_list
)
else:
collection.add(
ids=ids,
embeddings=embeddings,
documents=texts
else :
collection .add (
ids =ids ,
embeddings =embeddings ,
documents =texts
)
return len(texts)
def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None):
collection = get_collection(collection_name)
query_params = {
"query_embeddings": [query_embedding],
"n_results": num_results
return len (texts )
def search_documents (query_embedding ,collection_name =COLLECTION_NAME ,num_results =5 ,filter_metadata =None ):
collection =get_collection (collection_name )
query_params ={
"query_embeddings":[query_embedding ],
"n_results":num_results
}
if filter_metadata:
query_params["where"] = filter_metadata
results = collection.query(**query_params)
output = []
if results and results["documents"]:
for i, doc in enumerate(results["documents"][0]):
score = results["distances"][0][i] if "distances" in results else None
meta = results["metadatas"][0][i] if "metadatas" in results else {}
output.append({
"text": doc,
"score": score,
"metadata": meta
if filter_metadata :
query_params ["where"]=filter_metadata
results =collection .query (**query_params )
output =[]
if results and results ["documents"]:
for i ,doc in enumerate (results ["documents"][0 ]):
score =results ["distances"][0 ][i ]if "distances"in results else None
meta =results ["metadatas"][0 ][i ]if "metadatas"in results else {}
output .append ({
"text":doc ,
"score":score ,
"metadata":meta
})
return output
def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME):
collection = get_collection(collection_name)
results = collection.get(where={"source": source_file})
if results["ids"]:
collection.delete(ids=results["ids"])
return len(results["ids"])
return 0
return output
def get_all_metadatas(collection_name=COLLECTION_NAME, limit=None):
collection = get_collection(collection_name)
# Only fetch metadatas to be lightweight
if limit:
results = collection.get(include=["metadatas"], limit=limit)
else:
results = collection.get(include=["metadatas"])
return results["metadatas"] if results and "metadatas" in results else []
def delete_documents_by_source (source_file ,collection_name =COLLECTION_NAME ):
collection =get_collection (collection_name )
results =collection .get (where ={"source":source_file })
if results ["ids"]:
collection .delete (ids =results ["ids"])
return len (results ["ids"])
return 0
def get_all_metadatas (collection_name =COLLECTION_NAME ,limit =None ):
collection =get_collection (collection_name )
if limit :
results =collection .get (include =["metadatas"],limit =limit )
else :
results =collection .get (include =["metadatas"])
return results ["metadatas"]if results and "metadatas"in results else []

View File

@@ -1,47 +1,47 @@
from .config import (
CV_DIR,
DATA_DIR,
MODELS_DIR,
ULTRALYTICS_AVAILABLE,
YOLO26_MODELS,
SUPER_CATEGORIES,
COMMON_BRANDS,
COLORS,
DEFAULT_CONF_THRESHOLD,
DEFAULT_IOU_THRESHOLD,
DEFAULT_IMG_SIZE,
CV_DIR ,
DATA_DIR ,
MODELS_DIR ,
ULTRALYTICS_AVAILABLE ,
YOLO26_MODELS ,
SUPER_CATEGORIES ,
COMMON_BRANDS ,
COLORS ,
DEFAULT_CONF_THRESHOLD ,
DEFAULT_IOU_THRESHOLD ,
DEFAULT_IMG_SIZE ,
)
from .detectors import (
YOLO26Detector,
HybridLogoDetector,
YOLO26Detector ,
HybridLogoDetector ,
)
from .yolo_scanner import (
start_scanner as start_yolo_scanner,
detect_objects as detect_yolo_objects,
start_scanner as start_yolo_scanner ,
detect_objects as detect_yolo_objects ,
)
from .scanner import (
start_interactive_capture as start_ollama_scanner,
capture_and_analyze as capture_ollama_once,
start_interactive_capture as start_ollama_scanner ,
capture_and_analyze as capture_ollama_once ,
)
__all__ = [
"CV_DIR",
"DATA_DIR",
"MODELS_DIR",
"ULTRALYTICS_AVAILABLE",
"YOLO26_MODELS",
"SUPER_CATEGORIES",
"COMMON_BRANDS",
"COLORS",
"DEFAULT_CONF_THRESHOLD",
"DEFAULT_IOU_THRESHOLD",
"DEFAULT_IMG_SIZE",
"YOLO26Detector",
"HybridLogoDetector",
"start_yolo_scanner",
"detect_yolo_objects",
"start_ollama_scanner",
"capture_ollama_once",
__all__ =[
"CV_DIR",
"DATA_DIR",
"MODELS_DIR",
"ULTRALYTICS_AVAILABLE",
"YOLO26_MODELS",
"SUPER_CATEGORIES",
"COMMON_BRANDS",
"COLORS",
"DEFAULT_CONF_THRESHOLD",
"DEFAULT_IOU_THRESHOLD",
"DEFAULT_IMG_SIZE",
"YOLO26Detector",
"HybridLogoDetector",
"start_yolo_scanner",
"detect_yolo_objects",
"start_ollama_scanner",
"capture_ollama_once",
]
__version__ = "2.0.0"
__version__ ="2.0.0"

View File

@@ -1,4 +1,4 @@
from .cli import main
from .cli import main
if __name__ == "__main__":
main()
if __name__ =="__main__":
main ()

View File

@@ -1,2 +1,2 @@
print('API chain')
print ('API chain')

View File

@@ -1,47 +1,47 @@
#!/usr/bin/env python3
import argparse
from .config import YOLO26_MODELS
from .yolo_scanner import start_scanner as start_yolo, detect_objects
from .scanner import start_interactive_capture as start_ollama
import argparse
def main():
parser = argparse.ArgumentParser(
description="Ollama and YOLO Logo Detection Scanner"
from .config import YOLO26_MODELS
from .yolo_scanner import start_scanner as start_yolo ,detect_objects
from .scanner import start_interactive_capture as start_ollama
def main ():
parser =argparse .ArgumentParser (
description ="Ollama and YOLO Logo Detection Scanner"
)
parser.add_argument("--model", "-m", type=str)
parser.add_argument("--size", "-s", type=str, default="nano",
choices=["nano", "small", "medium", "large", "xlarge"])
parser.add_argument("--logo-model", type=str)
parser.add_argument("--yolo", action="store_true")
parser.add_argument("--no-gui", action="store_true")
parser.add_argument("--track", "-t", action="store_true")
parser.add_argument("--hybrid", action="store_true")
parser.add_argument("--image", "-i", type=str)
args = parser.parse_args()
if args.image:
detections = detect_objects(
args.image, model_size=args.size, hybrid_mode=args.hybrid
)
print(f"Found {len(detections)} detections:")
for det in detections:
print(f" {det['label']}: {det['confidence']:.2%}")
elif args.yolo:
start_yolo(
model_path=args.model,
model_size=args.size,
logo_model_path=args.logo_model,
use_gui=not args.no_gui,
use_tracking=args.track,
hybrid_mode=args.hybrid
)
else:
start_ollama()
if __name__ == "__main__":
main()
parser .add_argument ("--model","-m",type =str )
parser .add_argument ("--size","-s",type =str ,default ="nano",
choices =["nano","small","medium","large","xlarge"])
parser .add_argument ("--logo-model",type =str )
parser .add_argument ("--yolo",action ="store_true")
parser .add_argument ("--no-gui",action ="store_true")
parser .add_argument ("--track","-t",action ="store_true")
parser .add_argument ("--hybrid",action ="store_true")
parser .add_argument ("--image","-i",type =str )
args =parser .parse_args ()
if args .image :
detections =detect_objects (
args .image ,model_size =args .size ,hybrid_mode =args .hybrid
)
print (f"Found {len (detections )} detections:")
for det in detections :
print (f" {det ['label']}: {det ['confidence']:.2%}")
elif args .yolo :
start_yolo (
model_path =args .model ,
model_size =args .size ,
logo_model_path =args .logo_model ,
use_gui =not args .no_gui ,
use_tracking =args .track ,
hybrid_mode =args .hybrid
)
else :
start_ollama ()
if __name__ =="__main__":
main ()

View File

@@ -1,61 +1,61 @@
import os
from pathlib import Path
from typing import Dict
import os
from pathlib import Path
from typing import Dict
CV_DIR = Path(__file__).parent
DATA_DIR = CV_DIR / "data"
MODELS_DIR = CV_DIR / "models"
CV_DIR =Path (__file__ ).parent
DATA_DIR =CV_DIR /"data"
MODELS_DIR =CV_DIR /"models"
DATA_DIR.mkdir(parents=True, exist_ok=True)
MODELS_DIR.mkdir(parents=True, exist_ok=True)
DATA_DIR .mkdir (parents =True ,exist_ok =True )
MODELS_DIR .mkdir (parents =True ,exist_ok =True )
try:
from ultralytics import YOLO
ULTRALYTICS_AVAILABLE = True
except ImportError:
ULTRALYTICS_AVAILABLE = False
YOLO = None
try :
from ultralytics import YOLO
ULTRALYTICS_AVAILABLE =True
except ImportError :
ULTRALYTICS_AVAILABLE =False
YOLO =None
YOLO26_MODELS: Dict[str, str] = {
"nano": "yolo26n.pt",
"small": "yolo26s.pt",
"medium": "yolo26m.pt",
"large": "yolo26l.pt",
"xlarge": "yolo26x.pt",
YOLO26_MODELS :Dict [str ,str ]={
"nano":"yolo26n.pt",
"small":"yolo26s.pt",
"medium":"yolo26m.pt",
"large":"yolo26l.pt",
"xlarge":"yolo26x.pt",
}
SUPER_CATEGORIES: Dict[str, int] = {
"Food": 932,
"Clothes": 604,
"Necessities": 432,
"Others": 371,
"Electronic": 224,
"Transportation": 213,
"Leisure": 111,
"Sports": 66,
"Medical": 47
SUPER_CATEGORIES :Dict [str ,int ]={
"Food":932 ,
"Clothes":604 ,
"Necessities":432 ,
"Others":371 ,
"Electronic":224 ,
"Transportation":213 ,
"Leisure":111 ,
"Sports":66 ,
"Medical":47
}
COMMON_BRANDS = [
"McDonalds", "Starbucks", "CocaCola", "Pepsi", "KFC", "BurgerKing",
"Subway", "DunkinDonuts", "PizzaHut", "Dominos", "Nestle", "Heineken",
"Nike", "Adidas", "Puma", "UnderArmour", "Levis", "HM", "Zara", "Gap",
"Gucci", "LouisVuitton", "Chanel", "Versace", "Prada", "Armani",
"Apple", "Samsung", "HP", "Dell", "Intel", "AMD", "Nvidia", "Microsoft",
"Sony", "LG", "Huawei", "Xiaomi", "Lenovo", "Asus", "Acer",
"BMW", "Mercedes", "Audi", "Toyota", "Honda", "Ford", "Chevrolet",
"Volkswagen", "Tesla", "Porsche", "Ferrari", "Lamborghini", "Nissan",
"Google", "Facebook", "Twitter", "Instagram", "YouTube", "Amazon",
"Netflix", "Spotify", "Uber", "Airbnb", "PayPal", "Visa", "Mastercard"
COMMON_BRANDS =[
"McDonalds","Starbucks","CocaCola","Pepsi","KFC","BurgerKing",
"Subway","DunkinDonuts","PizzaHut","Dominos","Nestle","Heineken",
"Nike","Adidas","Puma","UnderArmour","Levis","HM","Zara","Gap",
"Gucci","LouisVuitton","Chanel","Versace","Prada","Armani",
"Apple","Samsung","HP","Dell","Intel","AMD","Nvidia","Microsoft",
"Sony","LG","Huawei","Xiaomi","Lenovo","Asus","Acer",
"BMW","Mercedes","Audi","Toyota","Honda","Ford","Chevrolet",
"Volkswagen","Tesla","Porsche","Ferrari","Lamborghini","Nissan",
"Google","Facebook","Twitter","Instagram","YouTube","Amazon",
"Netflix","Spotify","Uber","Airbnb","PayPal","Visa","Mastercard"
]
COLORS = {
"high_conf": (0, 255, 0),
"medium_conf": (0, 255, 255),
"low_conf": (0, 165, 255),
"logo": (255, 0, 255),
COLORS ={
"high_conf":(0 ,255 ,0 ),
"medium_conf":(0 ,255 ,255 ),
"low_conf":(0 ,165 ,255 ),
"logo":(255 ,0 ,255 ),
}
DEFAULT_CONF_THRESHOLD = 0.25
DEFAULT_IOU_THRESHOLD = 0.45
DEFAULT_IMG_SIZE = 640
DEFAULT_CONF_THRESHOLD =0.25
DEFAULT_IOU_THRESHOLD =0.45
DEFAULT_IMG_SIZE =640

View File

@@ -1,7 +1,7 @@
from .yolo26 import YOLO26Detector
from .hybrid import HybridLogoDetector
from .yolo26 import YOLO26Detector
from .hybrid import HybridLogoDetector
__all__ = [
"YOLO26Detector",
"HybridLogoDetector",
__all__ =[
"YOLO26Detector",
"HybridLogoDetector",
]

View File

@@ -1,154 +1,154 @@
import cv2
import numpy as np
import os
from typing import List, Dict, Optional
import cv2
import numpy as np
import os
from typing import List ,Dict ,Optional
from ..config import (
ULTRALYTICS_AVAILABLE,
MODELS_DIR,
COLORS,
DEFAULT_CONF_THRESHOLD,
ULTRALYTICS_AVAILABLE ,
MODELS_DIR ,
COLORS ,
DEFAULT_CONF_THRESHOLD ,
)
from .yolo26 import YOLO26Detector
from .yolo26 import YOLO26Detector
if ULTRALYTICS_AVAILABLE:
from ultralytics import YOLO
if ULTRALYTICS_AVAILABLE :
from ultralytics import YOLO
class HybridLogoDetector:
def __init__(self,
coco_model_size: str = "nano",
logo_model_path: Optional[str] = None,
conf_threshold: float = DEFAULT_CONF_THRESHOLD,
device: str = "auto"):
self.conf_threshold = conf_threshold
self.device = device
self.coco_detector = None
self.logo_model = None
if not ULTRALYTICS_AVAILABLE:
raise RuntimeError("Ultralytics not installed. Run: pip install ultralytics")
print("Loading YOLO26 COCO base model...")
self.coco_detector = YOLO26Detector(
model_size=coco_model_size,
conf_threshold=conf_threshold,
device=device
class HybridLogoDetector :
def __init__ (self ,
coco_model_size :str ="nano",
logo_model_path :Optional [str ]=None ,
conf_threshold :float =DEFAULT_CONF_THRESHOLD ,
device :str ="auto"):
self .conf_threshold =conf_threshold
self .device =device
self .coco_detector =None
self .logo_model =None
if not ULTRALYTICS_AVAILABLE :
raise RuntimeError ("Ultralytics not installed. Run: pip install ultralytics")
print ("Loading YOLO26 COCO base model...")
self .coco_detector =YOLO26Detector (
model_size =coco_model_size ,
conf_threshold =conf_threshold ,
device =device
)
if logo_model_path and os.path.exists(logo_model_path):
print(f"Loading logo model: {logo_model_path}")
self.logo_model = YOLO(logo_model_path)
print("Logo model loaded!")
else:
default_logo_model = MODELS_DIR / "logo_detector.pt"
if default_logo_model.exists():
print(f"Loading default logo model: {default_logo_model}")
self.logo_model = YOLO(str(default_logo_model))
print("Logo model loaded!")
else:
print("No logo model found.")
print("Hybrid detector ready!")
def detect(self,
frame: np.ndarray,
detect_objects: bool = True,
detect_logos: bool = True,
conf_threshold: Optional[float] = None) -> List[Dict]:
conf = conf_threshold if conf_threshold is not None else self.conf_threshold
all_detections = []
if detect_objects and self.coco_detector:
object_detections = self.coco_detector.detect(frame, conf_threshold=conf)
for det in object_detections:
det["type"] = "object"
all_detections.extend(object_detections)
if detect_logos and self.logo_model:
logo_detections = self._detect_logos(frame, conf)
for det in logo_detections:
det["type"] = "logo"
all_detections.extend(logo_detections)
return all_detections
def _detect_logos(self, frame: np.ndarray, conf_threshold: float) -> List[Dict]:
if self.logo_model is None:
if logo_model_path and os .path .exists (logo_model_path ):
print (f"Loading logo model: {logo_model_path }")
self .logo_model =YOLO (logo_model_path )
print ("Logo model loaded!")
else :
default_logo_model =MODELS_DIR /"logo_detector.pt"
if default_logo_model .exists ():
print (f"Loading default logo model: {default_logo_model }")
self .logo_model =YOLO (str (default_logo_model ))
print ("Logo model loaded!")
else :
print ("No logo model found.")
print ("Hybrid detector ready!")
def detect (self ,
frame :np .ndarray ,
detect_objects :bool =True ,
detect_logos :bool =True ,
conf_threshold :Optional [float ]=None )->List [Dict ]:
conf =conf_threshold if conf_threshold is not None else self .conf_threshold
all_detections =[]
if detect_objects and self .coco_detector :
object_detections =self .coco_detector .detect (frame ,conf_threshold =conf )
for det in object_detections :
det ["type"]="object"
all_detections .extend (object_detections )
if detect_logos and self .logo_model :
logo_detections =self ._detect_logos (frame ,conf )
for det in logo_detections :
det ["type"]="logo"
all_detections .extend (logo_detections )
return all_detections
def _detect_logos (self ,frame :np .ndarray ,conf_threshold :float )->List [Dict ]:
if self .logo_model is None :
return []
results = self.logo_model(
frame,
conf=conf_threshold,
device=self.device if self.device != "auto" else None,
verbose=False
results =self .logo_model (
frame ,
conf =conf_threshold ,
device =self .device if self .device !="auto"else None ,
verbose =False
)
detections = []
for result in results:
boxes = result.boxes
if boxes is None:
continue
for i in range(len(boxes)):
xyxy = boxes.xyxy[i].cpu().numpy()
x1, y1, x2, y2 = map(int, xyxy)
conf_val = float(boxes.conf[i].cpu().numpy())
class_id = int(boxes.cls[i].cpu().numpy())
label = self.logo_model.names[class_id]
detections.append({
"bbox": (x1, y1, x2, y2),
"label": label,
"confidence": conf_val,
"class_id": class_id,
"brand": label
detections =[]
for result in results :
boxes =result .boxes
if boxes is None :
continue
for i in range (len (boxes )):
xyxy =boxes .xyxy [i ].cpu ().numpy ()
x1 ,y1 ,x2 ,y2 =map (int ,xyxy )
conf_val =float (boxes .conf [i ].cpu ().numpy ())
class_id =int (boxes .cls [i ].cpu ().numpy ())
label =self .logo_model .names [class_id ]
detections .append ({
"bbox":(x1 ,y1 ,x2 ,y2 ),
"label":label ,
"confidence":conf_val ,
"class_id":class_id ,
"brand":label
})
return detections
def draw_detections(self,
frame: np.ndarray,
detections: List[Dict],
show_labels: bool = True) -> np.ndarray:
result = frame.copy()
for det in detections:
x1, y1, x2, y2 = det["bbox"]
label = det["label"]
conf = det["confidence"]
det_type = det.get("type", "object")
if det_type == "logo":
color = COLORS["logo"]
elif conf > 0.7:
color = COLORS["high_conf"]
elif conf > 0.5:
color = COLORS["medium_conf"]
else:
color = COLORS["low_conf"]
cv2.rectangle(result, (x1, y1), (x2, y2), color, 2)
if show_labels:
label_text = f"{label}: {conf:.2f}"
(text_w, text_h), _ = cv2.getTextSize(
label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1
return detections
def draw_detections (self ,
frame :np .ndarray ,
detections :List [Dict ],
show_labels :bool =True )->np .ndarray :
result =frame .copy ()
for det in detections :
x1 ,y1 ,x2 ,y2 =det ["bbox"]
label =det ["label"]
conf =det ["confidence"]
det_type =det .get ("type","object")
if det_type =="logo":
color =COLORS ["logo"]
elif conf >0.7 :
color =COLORS ["high_conf"]
elif conf >0.5 :
color =COLORS ["medium_conf"]
else :
color =COLORS ["low_conf"]
cv2 .rectangle (result ,(x1 ,y1 ),(x2 ,y2 ),color ,2 )
if show_labels :
label_text =f"{label }: {conf :.2f}"
(text_w ,text_h ),_ =cv2 .getTextSize (
label_text ,cv2 .FONT_HERSHEY_SIMPLEX ,0.5 ,1
)
cv2.rectangle(
result,
(x1, y1 - text_h - 8),
(x1 + text_w + 4, y1),
color,
-1
cv2 .rectangle (
result ,
(x1 ,y1 -text_h -8 ),
(x1 +text_w +4 ,y1 ),
color ,
-1
)
cv2.putText(
result,
label_text,
(x1 + 2, y1 - 4),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 255, 255) if det_type == "logo" else (0, 0, 0),
1
cv2 .putText (
result ,
label_text ,
(x1 +2 ,y1 -4 ),
cv2 .FONT_HERSHEY_SIMPLEX ,
0.5 ,
(255 ,255 ,255 )if det_type =="logo"else (0 ,0 ,0 ),
1
)
return result
return result

View File

@@ -1,186 +1,186 @@
import cv2
import numpy as np
import os
from typing import List, Dict, Optional
import cv2
import numpy as np
import os
from typing import List ,Dict ,Optional
from ..config import (
ULTRALYTICS_AVAILABLE,
YOLO26_MODELS,
COLORS,
DEFAULT_CONF_THRESHOLD,
DEFAULT_IOU_THRESHOLD,
ULTRALYTICS_AVAILABLE ,
YOLO26_MODELS ,
COLORS ,
DEFAULT_CONF_THRESHOLD ,
DEFAULT_IOU_THRESHOLD ,
)
if ULTRALYTICS_AVAILABLE:
from ultralytics import YOLO
if ULTRALYTICS_AVAILABLE :
from ultralytics import YOLO
class YOLO26Detector:
def __init__(self,
model_size: str = "nano",
model_path: Optional[str] = None,
conf_threshold: float = DEFAULT_CONF_THRESHOLD,
iou_threshold: float = DEFAULT_IOU_THRESHOLD,
device: str = "auto"):
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
self.device = device
self.model = None
if not ULTRALYTICS_AVAILABLE:
raise RuntimeError("Ultralytics not installed. Run: pip install ultralytics")
if model_path and os.path.exists(model_path):
model_name = model_path
elif model_size in YOLO26_MODELS:
model_name = YOLO26_MODELS[model_size]
else:
print(f"Unknown model size '{model_size}', defaulting to 'nano'")
model_name = YOLO26_MODELS["nano"]
print(f"Loading YOLO26 model: {model_name}")
self.model = YOLO(model_name)
print(f"YOLO26 model loaded successfully!")
print(f"Classes: {len(self.model.names)} | Device: {device}")
def detect(self,
frame: np.ndarray,
conf_threshold: Optional[float] = None,
classes: Optional[List[int]] = None) -> List[Dict]:
if self.model is None:
class YOLO26Detector :
def __init__ (self ,
model_size :str ="nano",
model_path :Optional [str ]=None ,
conf_threshold :float =DEFAULT_CONF_THRESHOLD ,
iou_threshold :float =DEFAULT_IOU_THRESHOLD ,
device :str ="auto"):
self .conf_threshold =conf_threshold
self .iou_threshold =iou_threshold
self .device =device
self .model =None
if not ULTRALYTICS_AVAILABLE :
raise RuntimeError ("Ultralytics not installed. Run: pip install ultralytics")
if model_path and os .path .exists (model_path ):
model_name =model_path
elif model_size in YOLO26_MODELS :
model_name =YOLO26_MODELS [model_size ]
else :
print (f"Unknown model size '{model_size }', defaulting to 'nano'")
model_name =YOLO26_MODELS ["nano"]
print (f"Loading YOLO26 model: {model_name }")
self .model =YOLO (model_name )
print (f"YOLO26 model loaded successfully!")
print (f"Classes: {len (self .model .names )} | Device: {device }")
def detect (self ,
frame :np .ndarray ,
conf_threshold :Optional [float ]=None ,
classes :Optional [List [int ]]=None )->List [Dict ]:
if self .model is None :
return []
conf = conf_threshold if conf_threshold is not None else self.conf_threshold
results = self.model(
frame,
conf=conf,
iou=self.iou_threshold,
device=self.device if self.device != "auto" else None,
classes=classes,
verbose=False
conf =conf_threshold if conf_threshold is not None else self .conf_threshold
results =self .model (
frame ,
conf =conf ,
iou =self .iou_threshold ,
device =self .device if self .device !="auto"else None ,
classes =classes ,
verbose =False
)
detections = []
for result in results:
boxes = result.boxes
if boxes is None:
continue
for i in range(len(boxes)):
xyxy = boxes.xyxy[i].cpu().numpy()
x1, y1, x2, y2 = map(int, xyxy)
conf_val = float(boxes.conf[i].cpu().numpy())
class_id = int(boxes.cls[i].cpu().numpy())
label = self.model.names[class_id]
detections.append({
"bbox": (x1, y1, x2, y2),
"label": label,
"confidence": conf_val,
"class_id": class_id
detections =[]
for result in results :
boxes =result .boxes
if boxes is None :
continue
for i in range (len (boxes )):
xyxy =boxes .xyxy [i ].cpu ().numpy ()
x1 ,y1 ,x2 ,y2 =map (int ,xyxy )
conf_val =float (boxes .conf [i ].cpu ().numpy ())
class_id =int (boxes .cls [i ].cpu ().numpy ())
label =self .model .names [class_id ]
detections .append ({
"bbox":(x1 ,y1 ,x2 ,y2 ),
"label":label ,
"confidence":conf_val ,
"class_id":class_id
})
return detections
def detect_and_track(self,
frame: np.ndarray,
conf_threshold: Optional[float] = None,
tracker: str = "bytetrack.yaml") -> List[Dict]:
if self.model is None:
return detections
def detect_and_track (self ,
frame :np .ndarray ,
conf_threshold :Optional [float ]=None ,
tracker :str ="bytetrack.yaml")->List [Dict ]:
if self .model is None :
return []
conf = conf_threshold if conf_threshold is not None else self.conf_threshold
results = self.model.track(
frame,
conf=conf,
iou=self.iou_threshold,
device=self.device if self.device != "auto" else None,
tracker=tracker,
persist=True,
verbose=False
conf =conf_threshold if conf_threshold is not None else self .conf_threshold
results =self .model .track (
frame ,
conf =conf ,
iou =self .iou_threshold ,
device =self .device if self .device !="auto"else None ,
tracker =tracker ,
persist =True ,
verbose =False
)
detections = []
for result in results:
boxes = result.boxes
if boxes is None:
continue
for i in range(len(boxes)):
xyxy = boxes.xyxy[i].cpu().numpy()
x1, y1, x2, y2 = map(int, xyxy)
conf_val = float(boxes.conf[i].cpu().numpy())
class_id = int(boxes.cls[i].cpu().numpy())
label = self.model.names[class_id]
track_id = None
if boxes.id is not None:
track_id = int(boxes.id[i].cpu().numpy())
detections.append({
"bbox": (x1, y1, x2, y2),
"label": label,
"confidence": conf_val,
"class_id": class_id,
"track_id": track_id
detections =[]
for result in results :
boxes =result .boxes
if boxes is None :
continue
for i in range (len (boxes )):
xyxy =boxes .xyxy [i ].cpu ().numpy ()
x1 ,y1 ,x2 ,y2 =map (int ,xyxy )
conf_val =float (boxes .conf [i ].cpu ().numpy ())
class_id =int (boxes .cls [i ].cpu ().numpy ())
label =self .model .names [class_id ]
track_id =None
if boxes .id is not None :
track_id =int (boxes .id [i ].cpu ().numpy ())
detections .append ({
"bbox":(x1 ,y1 ,x2 ,y2 ),
"label":label ,
"confidence":conf_val ,
"class_id":class_id ,
"track_id":track_id
})
return detections
def draw_detections(self,
frame: np.ndarray,
detections: List[Dict],
show_labels: bool = True,
show_conf: bool = True) -> np.ndarray:
result = frame.copy()
for det in detections:
x1, y1, x2, y2 = det["bbox"]
label = det["label"]
conf = det["confidence"]
track_id = det.get("track_id")
if conf > 0.7:
color = COLORS["high_conf"]
elif conf > 0.5:
color = COLORS["medium_conf"]
else:
color = COLORS["low_conf"]
cv2.rectangle(result, (x1, y1), (x2, y2), color, 2)
if show_labels:
label_parts = [label]
if track_id is not None:
label_parts.append(f"ID:{track_id}")
if show_conf:
label_parts.append(f"{conf:.2f}")
label_text = " | ".join(label_parts)
(text_w, text_h), baseline = cv2.getTextSize(
label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1
return detections
def draw_detections (self ,
frame :np .ndarray ,
detections :List [Dict ],
show_labels :bool =True ,
show_conf :bool =True )->np .ndarray :
result =frame .copy ()
for det in detections :
x1 ,y1 ,x2 ,y2 =det ["bbox"]
label =det ["label"]
conf =det ["confidence"]
track_id =det .get ("track_id")
if conf >0.7 :
color =COLORS ["high_conf"]
elif conf >0.5 :
color =COLORS ["medium_conf"]
else :
color =COLORS ["low_conf"]
cv2 .rectangle (result ,(x1 ,y1 ),(x2 ,y2 ),color ,2 )
if show_labels :
label_parts =[label ]
if track_id is not None :
label_parts .append (f"ID:{track_id }")
if show_conf :
label_parts .append (f"{conf :.2f}")
label_text =" | ".join (label_parts )
(text_w ,text_h ),baseline =cv2 .getTextSize (
label_text ,cv2 .FONT_HERSHEY_SIMPLEX ,0.5 ,1
)
cv2.rectangle(
result,
(x1, y1 - text_h - 8),
(x1 + text_w + 4, y1),
color,
-1
cv2 .rectangle (
result ,
(x1 ,y1 -text_h -8 ),
(x1 +text_w +4 ,y1 ),
color ,
-1
)
cv2.putText(
result,
label_text,
(x1 + 2, y1 - 4),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 0),
1
cv2 .putText (
result ,
label_text ,
(x1 +2 ,y1 -4 ),
cv2 .FONT_HERSHEY_SIMPLEX ,
0.5 ,
(0 ,0 ,0 ),
1
)
return result
def get_class_names(self) -> Dict[int, str]:
return self.model.names if self.model else {}
return result
def get_class_names (self )->Dict [int ,str ]:
return self .model .names if self .model else {}

View File

@@ -1,197 +1,197 @@
import cv2
import json
import numpy as np
from datetime import datetime
from pathlib import Path
from typing import Dict, Optional
from ..ollama.detector import OllamaLogoDetector
import cv2
import json
import numpy as np
from datetime import datetime
from pathlib import Path
from typing import Dict ,Optional
from ..ollama .detector import OllamaLogoDetector
def capture_and_analyze(model: str = "ministral-3:latest",
save_image: bool = True,
output_dir: Optional[str] = None) -> Dict:
cap = cv2.VideoCapture(0)
if not cap.isOpened():
raise RuntimeError("Could not access camera")
print("Camera ready. Press SPACE to capture, Q to quit.")
result = None
while True:
ret, frame = cap.read()
if not ret:
break
display = frame.copy()
cv2.putText(display, "Press SPACE to capture | Q to quit",
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
cv2.imshow("Capture", display)
key = cv2.waitKey(1) & 0xFF
if key == ord(' '):
print("Analyzing image...")
if save_image:
if output_dir is None:
output_dir = "./captures"
Path(output_dir).mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
img_path = Path(output_dir) / f"capture_{timestamp}.jpg"
cv2.imwrite(str(img_path), frame)
print(f"Saved: {img_path}")
detector = OllamaLogoDetector(model=model)
result = detector.detect_from_numpy(frame)
_display_results(result)
break
elif key == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
return result if result else {"logos_detected": [], "total_count": 0}
def capture_and_analyze (model :str ="ministral-3:latest",
save_image :bool =True ,
output_dir :Optional [str ]=None )->Dict :
cap =cv2 .VideoCapture (0 )
if not cap .isOpened ():
raise RuntimeError ("Could not access camera")
def start_interactive_capture(model: str = "ministral-3:latest",
save_images: bool = True,
output_dir: Optional[str] = None):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
raise RuntimeError("Could not access camera")
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print("=" * 60)
print("Ollama Logo Detection - Interactive Mode")
print("=" * 60)
print(f"Camera: {width}x{height}")
print(f"Model: {model}")
print("\nControls:")
print(" SPACE - Capture and analyze")
print(" S - Save frame only")
print(" R - Show last results")
print(" Q - Quit")
print("=" * 60)
detector = OllamaLogoDetector(model=model)
last_result = None
analyzing = False
status_message = "Ready - Press SPACE to capture"
if output_dir is None:
output_dir = "./captures"
Path(output_dir).mkdir(parents=True, exist_ok=True)
while True:
ret, frame = cap.read()
if not ret:
break
display = frame.copy()
cv2.rectangle(display, (0, 0), (width, 40), (40, 40, 40), -1)
cv2.putText(display, status_message, (10, 28),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
if last_result and last_result.get("logos_detected"):
brands = [l.get("brand", "?") for l in last_result["logos_detected"]]
brand_text = f"Detected: {', '.join(brands[:3])}"
if len(brands) > 3:
brand_text += f" +{len(brands)-3} more"
cv2.rectangle(display, (0, height-35), (width, height), (40, 40, 40), -1)
cv2.putText(display, brand_text, (10, height-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
cv2.imshow("Ollama Logo Detection", display)
key = cv2.waitKey(1) & 0xFF
if key == ord(' ') and not analyzing:
analyzing = True
status_message = "Analyzing with Ollama..."
cv2.imshow("Ollama Logo Detection", display)
cv2.waitKey(1)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
img_path = Path(output_dir) / f"capture_{timestamp}.jpg"
if save_images:
cv2.imwrite(str(img_path), frame)
last_result = detector.detect_from_numpy(frame)
json_path = Path(output_dir) / f"result_{timestamp}.json"
with open(json_path, 'w') as f:
json.dump(last_result, f, indent=2)
count = last_result.get("total_count", 0)
if count > 0:
status_message = f"Found {count} logo(s)! Press R for details"
else:
status_message = "No logos detected. Try again!"
print(f"\nCaptured: {img_path}")
print(f"Results: {json_path}")
_display_results(last_result)
analyzing = False
elif key == ord('s'):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
img_path = Path(output_dir) / f"capture_{timestamp}.jpg"
cv2.imwrite(str(img_path), frame)
status_message = f"Saved: {img_path.name}"
print(f"Saved: {img_path}")
elif key == ord('r') and last_result:
print("\n" + "=" * 40)
print("Last Detection Results:")
print("=" * 40)
_display_results(last_result)
elif key == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
print("\nGoodbye!")
return last_result
print ("Camera ready. Press SPACE to capture, Q to quit.")
def _display_results(result: Dict):
print("\n" + "-" * 40)
logos = result.get("logos_detected", [])
count = result.get("total_count", len(logos))
if count == 0:
print("No logos or brands detected")
if "description" in result:
print(f"Description: {result['description']}")
else:
print(f"Detected {count} logo(s)/brand(s):\n")
for i, logo in enumerate(logos, 1):
brand = logo.get("brand", "Unknown")
conf = logo.get("confidence", "unknown")
loc = logo.get("location", "unknown")
cat = logo.get("category", "")
print(f" {i}. {brand}")
print(f" Confidence: {conf}")
print(f" Location: {loc}")
if cat:
print(f" Category: {cat}")
print()
if "error" in result:
print(f"Error: {result['error']}")
print("-" * 40)
print("\nJSON Output:")
print(json.dumps(result, indent=2))
result =None
while True :
ret ,frame =cap .read ()
if not ret :
break
display =frame .copy ()
cv2 .putText (display ,"Press SPACE to capture | Q to quit",
(10 ,30 ),cv2 .FONT_HERSHEY_SIMPLEX ,0.7 ,(0 ,255 ,0 ),2 )
cv2 .imshow ("Capture",display )
key =cv2 .waitKey (1 )&0xFF
if key ==ord (' '):
print ("Analyzing image...")
if save_image :
if output_dir is None :
output_dir ="./captures"
Path (output_dir ).mkdir (parents =True ,exist_ok =True )
timestamp =datetime .now ().strftime ("%Y%m%d_%H%M%S")
img_path =Path (output_dir )/f"capture_{timestamp }.jpg"
cv2 .imwrite (str (img_path ),frame )
print (f"Saved: {img_path }")
detector =OllamaLogoDetector (model =model )
result =detector .detect_from_numpy (frame )
_display_results (result )
break
elif key ==ord ('q'):
break
cap .release ()
cv2 .destroyAllWindows ()
return result if result else {"logos_detected":[],"total_count":0 }
def start_interactive_capture (model :str ="ministral-3:latest",
save_images :bool =True ,
output_dir :Optional [str ]=None ):
cap =cv2 .VideoCapture (0 )
if not cap .isOpened ():
raise RuntimeError ("Could not access camera")
width =int (cap .get (cv2 .CAP_PROP_FRAME_WIDTH ))
height =int (cap .get (cv2 .CAP_PROP_FRAME_HEIGHT ))
print ("="*60 )
print ("Ollama Logo Detection - Interactive Mode")
print ("="*60 )
print (f"Camera: {width }x{height }")
print (f"Model: {model }")
print ("\nControls:")
print (" SPACE - Capture and analyze")
print (" S - Save frame only")
print (" R - Show last results")
print (" Q - Quit")
print ("="*60 )
detector =OllamaLogoDetector (model =model )
last_result =None
analyzing =False
status_message ="Ready - Press SPACE to capture"
if output_dir is None :
output_dir ="./captures"
Path (output_dir ).mkdir (parents =True ,exist_ok =True )
while True :
ret ,frame =cap .read ()
if not ret :
break
display =frame .copy ()
cv2 .rectangle (display ,(0 ,0 ),(width ,40 ),(40 ,40 ,40 ),-1 )
cv2 .putText (display ,status_message ,(10 ,28 ),
cv2 .FONT_HERSHEY_SIMPLEX ,0.7 ,(0 ,255 ,0 ),2 )
if last_result and last_result .get ("logos_detected"):
brands =[l .get ("brand","?")for l in last_result ["logos_detected"]]
brand_text =f"Detected: {', '.join (brands [:3 ])}"
if len (brands )>3 :
brand_text +=f" +{len (brands )-3 } more"
cv2 .rectangle (display ,(0 ,height -35 ),(width ,height ),(40 ,40 ,40 ),-1 )
cv2 .putText (display ,brand_text ,(10 ,height -10 ),
cv2 .FONT_HERSHEY_SIMPLEX ,0.6 ,(255 ,255 ,0 ),2 )
cv2 .imshow ("Ollama Logo Detection",display )
key =cv2 .waitKey (1 )&0xFF
if key ==ord (' ')and not analyzing :
analyzing =True
status_message ="Analyzing with Ollama..."
cv2 .imshow ("Ollama Logo Detection",display )
cv2 .waitKey (1 )
timestamp =datetime .now ().strftime ("%Y%m%d_%H%M%S")
img_path =Path (output_dir )/f"capture_{timestamp }.jpg"
if save_images :
cv2 .imwrite (str (img_path ),frame )
last_result =detector .detect_from_numpy (frame )
json_path =Path (output_dir )/f"result_{timestamp }.json"
with open (json_path ,'w')as f :
json .dump (last_result ,f ,indent =2 )
count =last_result .get ("total_count",0 )
if count >0 :
status_message =f"Found {count } logo(s)! Press R for details"
else :
status_message ="No logos detected. Try again!"
print (f"\nCaptured: {img_path }")
print (f"Results: {json_path }")
_display_results (last_result )
analyzing =False
elif key ==ord ('s'):
timestamp =datetime .now ().strftime ("%Y%m%d_%H%M%S")
img_path =Path (output_dir )/f"capture_{timestamp }.jpg"
cv2 .imwrite (str (img_path ),frame )
status_message =f"Saved: {img_path .name }"
print (f"Saved: {img_path }")
elif key ==ord ('r')and last_result :
print ("\n"+"="*40 )
print ("Last Detection Results:")
print ("="*40 )
_display_results (last_result )
elif key ==ord ('q'):
break
cap .release ()
cv2 .destroyAllWindows ()
print ("\nGoodbye!")
return last_result
def _display_results (result :Dict ):
print ("\n"+"-"*40 )
logos =result .get ("logos_detected",[])
count =result .get ("total_count",len (logos ))
if count ==0 :
print ("No logos or brands detected")
if "description"in result :
print (f"Description: {result ['description']}")
else :
print (f"Detected {count } logo(s)/brand(s):\n")
for i ,logo in enumerate (logos ,1 ):
brand =logo .get ("brand","Unknown")
conf =logo .get ("confidence","unknown")
loc =logo .get ("location","unknown")
cat =logo .get ("category","")
print (f" {i }. {brand }")
print (f" Confidence: {conf }")
print (f" Location: {loc }")
if cat :
print (f" Category: {cat }")
print ()
if "error"in result :
print (f"Error: {result ['error']}")
print ("-"*40 )
print ("\nJSON Output:")
print (json .dumps (result ,indent =2 ))

View File

@@ -1,28 +1,28 @@
from .config import (
CV_DIR,
DATA_DIR,
MODELS_DIR,
ULTRALYTICS_AVAILABLE,
YOLO26_MODELS,
SUPER_CATEGORIES,
COMMON_BRANDS,
COLORS,
DEFAULT_CONF_THRESHOLD,
DEFAULT_IOU_THRESHOLD,
DEFAULT_IMG_SIZE,
CV_DIR ,
DATA_DIR ,
MODELS_DIR ,
ULTRALYTICS_AVAILABLE ,
YOLO26_MODELS ,
SUPER_CATEGORIES ,
COMMON_BRANDS ,
COLORS ,
DEFAULT_CONF_THRESHOLD ,
DEFAULT_IOU_THRESHOLD ,
DEFAULT_IMG_SIZE ,
)
from .detectors import (
YOLO26Detector,
HybridLogoDetector,
YOLO26Detector ,
HybridLogoDetector ,
)
from .yolo_scanner import (
start_scanner as start_yolo_scanner,
detect_objects as detect_yolo_objects,
start_scanner as start_yolo_scanner ,
detect_objects as detect_yolo_objects ,
)
from .scanner import (
start_interactive_capture as start_ollama_scanner,
start_interactive_capture as start_ollama_scanner ,
)
if __name__ == "__main__":
from .cli import main
main()
if __name__ =="__main__":
from .cli import main
main ()

View File

@@ -1,166 +1,166 @@
import cv2
from pathlib import Path
from typing import List, Dict, Optional
import cv2
from pathlib import Path
from typing import List ,Dict ,Optional
from .config import (
CV_DIR,
ULTRALYTICS_AVAILABLE,
CV_DIR ,
ULTRALYTICS_AVAILABLE ,
)
from .detectors import YOLO26Detector, HybridLogoDetector
from .detectors import YOLO26Detector ,HybridLogoDetector
def start_scanner(model_path: Optional[str] = None,
model_size: str = "nano",
logo_model_path: Optional[str] = None,
use_gui: bool = True,
use_tracking: bool = False,
hybrid_mode: bool = False):
print("=" * 60)
if hybrid_mode:
print("YOLO26 Hybrid Scanner (COCO + Logos)")
else:
print("YOLO26 Object Detection Scanner")
print("=" * 60)
detector = None
if hybrid_mode and ULTRALYTICS_AVAILABLE:
try:
detector = HybridLogoDetector(
coco_model_size=model_size,
logo_model_path=logo_model_path,
conf_threshold=0.25,
device="auto"
def start_scanner (model_path :Optional [str ]=None ,
model_size :str ="nano",
logo_model_path :Optional [str ]=None ,
use_gui :bool =True ,
use_tracking :bool =False ,
hybrid_mode :bool =False ):
print ("="*60 )
if hybrid_mode :
print ("YOLO26 Hybrid Scanner (COCO + Logos)")
else :
print ("YOLO26 Object Detection Scanner")
print ("="*60 )
detector =None
if hybrid_mode and ULTRALYTICS_AVAILABLE :
try :
detector =HybridLogoDetector (
coco_model_size =model_size ,
logo_model_path =logo_model_path ,
conf_threshold =0.25 ,
device ="auto"
)
except Exception as e:
print(f"Hybrid detector failed: {e}")
hybrid_mode = False
if detector is None and ULTRALYTICS_AVAILABLE:
try:
detector = YOLO26Detector(
model_size=model_size,
model_path=model_path,
conf_threshold=0.25,
device="auto"
except Exception as e :
print (f"Hybrid detector failed: {e }")
hybrid_mode =False
if detector is None and ULTRALYTICS_AVAILABLE :
try :
detector =YOLO26Detector (
model_size =model_size ,
model_path =model_path ,
conf_threshold =0.25 ,
device ="auto"
)
except Exception as e:
print(f"YOLO26 failed: {e}")
if detector is None:
print("Error: No detector available.")
return
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Error: Could not access camera.")
return
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
writer = None
output_path = CV_DIR / "output.mp4"
print(f"Camera: {width}x{height} @ {fps:.1f}fps")
print("Controls: q=quit | s=screenshot | t=tracking")
if hybrid_mode:
print(" o=objects | l=logos | b=both")
frame_count = 0
detect_objects_flag = True
detect_logos_flag = True
try:
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
if hybrid_mode and isinstance(detector, HybridLogoDetector):
detections = detector.detect(
frame,
detect_objects=detect_objects_flag,
detect_logos=detect_logos_flag
except Exception as e :
print (f"YOLO26 failed: {e }")
if detector is None :
print ("Error: No detector available.")
return
cap =cv2 .VideoCapture (0 )
if not cap .isOpened ():
print ("Error: Could not access camera.")
return
width =int (cap .get (cv2 .CAP_PROP_FRAME_WIDTH ))
height =int (cap .get (cv2 .CAP_PROP_FRAME_HEIGHT ))
fps =cap .get (cv2 .CAP_PROP_FPS )or 30.0
writer =None
output_path =CV_DIR /"output.mp4"
print (f"Camera: {width }x{height } @ {fps :.1f}fps")
print ("Controls: q=quit | s=screenshot | t=tracking")
if hybrid_mode :
print (" o=objects | l=logos | b=both")
frame_count =0
detect_objects_flag =True
detect_logos_flag =True
try :
while True :
ret ,frame =cap .read ()
if not ret :
break
frame_count +=1
if hybrid_mode and isinstance (detector ,HybridLogoDetector ):
detections =detector .detect (
frame ,
detect_objects =detect_objects_flag ,
detect_logos =detect_logos_flag
)
elif use_tracking and isinstance(detector, YOLO26Detector):
detections = detector.detect_and_track(frame)
else:
detections = detector.detect(frame)
result_frame = detector.draw_detections(frame, detections)
mode_str = "HYBRID" if hybrid_mode else ("TRACK" if use_tracking else "DETECT")
cv2.putText(result_frame, f"{mode_str} | {len(detections)} objects",
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
if use_gui:
try:
cv2.imshow('YOLO26 Scanner', result_frame)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('s'):
path = CV_DIR / f"screenshot_{frame_count}.jpg"
cv2.imwrite(str(path), result_frame)
print(f"Saved: {path}")
elif key == ord('t') and isinstance(detector, YOLO26Detector):
use_tracking = not use_tracking
elif key == ord('o') and hybrid_mode:
detect_objects_flag, detect_logos_flag = True, False
elif key == ord('l') and hybrid_mode:
detect_objects_flag, detect_logos_flag = False, True
elif key == ord('b') and hybrid_mode:
detect_objects_flag, detect_logos_flag = True, True
except cv2.error:
use_gui = False
writer = cv2.VideoWriter(
str(output_path), cv2.VideoWriter_fourcc(*'mp4v'),
fps, (width, height)
)
if not use_gui and writer:
writer.write(result_frame)
except KeyboardInterrupt:
print("Stopping...")
finally:
cap.release()
if writer:
writer.release()
cv2.destroyAllWindows()
elif use_tracking and isinstance (detector ,YOLO26Detector ):
detections =detector .detect_and_track (frame )
else :
detections =detector .detect (frame )
def detect_objects(image_path: str,
model_size: str = "nano",
conf_threshold: float = 0.25,
save_output: bool = True,
hybrid_mode: bool = False) -> List[Dict]:
if not ULTRALYTICS_AVAILABLE:
raise RuntimeError("Ultralytics not installed")
if hybrid_mode:
detector = HybridLogoDetector(
coco_model_size=model_size,
conf_threshold=conf_threshold
result_frame =detector .draw_detections (frame ,detections )
mode_str ="HYBRID"if hybrid_mode else ("TRACK"if use_tracking else "DETECT")
cv2 .putText (result_frame ,f"{mode_str } | {len (detections )} objects",
(10 ,30 ),cv2 .FONT_HERSHEY_SIMPLEX ,0.6 ,(0 ,255 ,0 ),2 )
if use_gui :
try :
cv2 .imshow ('YOLO26 Scanner',result_frame )
key =cv2 .waitKey (1 )&0xFF
if key ==ord ('q'):
break
elif key ==ord ('s'):
path =CV_DIR /f"screenshot_{frame_count }.jpg"
cv2 .imwrite (str (path ),result_frame )
print (f"Saved: {path }")
elif key ==ord ('t')and isinstance (detector ,YOLO26Detector ):
use_tracking =not use_tracking
elif key ==ord ('o')and hybrid_mode :
detect_objects_flag ,detect_logos_flag =True ,False
elif key ==ord ('l')and hybrid_mode :
detect_objects_flag ,detect_logos_flag =False ,True
elif key ==ord ('b')and hybrid_mode :
detect_objects_flag ,detect_logos_flag =True ,True
except cv2 .error :
use_gui =False
writer =cv2 .VideoWriter (
str (output_path ),cv2 .VideoWriter_fourcc (*'mp4v'),
fps ,(width ,height )
)
if not use_gui and writer :
writer .write (result_frame )
except KeyboardInterrupt :
print ("Stopping...")
finally :
cap .release ()
if writer :
writer .release ()
cv2 .destroyAllWindows ()
def detect_objects (image_path :str ,
model_size :str ="nano",
conf_threshold :float =0.25 ,
save_output :bool =True ,
hybrid_mode :bool =False )->List [Dict ]:
if not ULTRALYTICS_AVAILABLE :
raise RuntimeError ("Ultralytics not installed")
if hybrid_mode :
detector =HybridLogoDetector (
coco_model_size =model_size ,
conf_threshold =conf_threshold
)
else:
detector = YOLO26Detector(
model_size=model_size,
conf_threshold=conf_threshold
else :
detector =YOLO26Detector (
model_size =model_size ,
conf_threshold =conf_threshold
)
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Could not load: {image_path}")
detections = detector.detect(image)
if save_output:
result = detector.draw_detections(image, detections)
output = Path(image_path).stem + "_detected.jpg"
cv2.imwrite(output, result)
print(f"Saved: {output}")
return detections
image =cv2 .imread (image_path )
if image is None :
raise ValueError (f"Could not load: {image_path }")
detections =detector .detect (image )
if save_output :
result =detector .draw_detections (image ,detections )
output =Path (image_path ).stem +"_detected.jpg"
cv2 .imwrite (output ,result )
print (f"Saved: {output }")
return detections

View File

@@ -1,9 +1,9 @@
import os
from google import genai
from src.chroma.vector_store import search_documents
from src.rag.embeddings import get_embedding
import os
from google import genai
from src .chroma .vector_store import search_documents
from src .rag .embeddings import get_embedding
GREENWASHING_ANALYSIS_PROMPT = """
GREENWASHING_ANALYSIS_PROMPT ="""
You are an expert Environmental, Social, and Governance (ESG) Analyst specialized in detecting 'Greenwashing'.
Your task is to analyze the provided context from a company's data reports and determine if they are engaging in greenwashing.
@@ -21,123 +21,123 @@ Based on the context provided, give a final verdict:
- EVIDENCE: [Quote specific parts of the context if possible]
"""
def ask(prompt):
client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
return client.models.generate_content(model="gemini-3-flash-preview", contents=prompt).text
def ask (prompt ):
client =genai .Client (api_key =os .environ .get ("GOOGLE_API_KEY"))
return client .models .generate_content (model ="gemini-3-pro-preview",contents =prompt ).text
def ask_gemini_with_rag(prompt, category=None):
def ask_gemini_with_rag (prompt ,category =None ):
"""Ask Gemini with RAG context from the vector database."""
# Get embedding for the prompt
query_embedding = get_embedding(prompt)
# Search for relevant documents
results = search_documents(query_embedding, num_results=5)
# Build context from results
context = ""
for res in results:
context += f"--- Document ---\n{res['text']}\n\n"
# Create full prompt with context
full_prompt = f"""You are a helpful sustainability assistant. Use the following context to answer the user's question.
query_embedding =get_embedding (prompt )
results =search_documents (query_embedding ,num_results =5 )
context =""
for res in results :
context +=f"--- Document ---\n{res ['text']}\n\n"
full_prompt =f"""You are a helpful sustainability assistant. Use the following context to answer the user's question.
If the context doesn't contain relevant information, you can use your general knowledge but mention that.
CONTEXT:
{context}
{context }
USER QUESTION: {prompt}
USER QUESTION: {prompt }
Please provide a helpful and concise response."""
return ask(full_prompt)
return ask (full_prompt )
def analyze(query, query_embedding, num_results=5, num_alternatives=3):
try:
results = search_documents(query_embedding, num_results=num_results + num_alternatives + 5)
except Exception as e:
print(f"Chroma error: {e}")
results = []
if not results:
context = "No data found in database for this brand."
else:
context = "--- START OF REPORT CONTEXT ---\n"
for res in results[:num_results]:
context += f"RELEVANT DATA CHUNK: {res['text']}\n\n"
context += "--- END OF REPORT CONTEXT ---\n"
full_prompt = f"{GREENWASHING_ANALYSIS_PROMPT}\n\n{context}\n\nUSER QUERY/COMPANY FOCUS: {query}"
analysis_text = ask(full_prompt)
alternatives = []
seen_texts = set()
for res in results[num_results:]:
text_preview = res['text'][:200]
if text_preview not in seen_texts:
seen_texts.add(text_preview)
alternatives.append({"text": res['text'], "score": res.get('score'), "summary": text_preview})
if len(alternatives) >= num_alternatives:
break
return {"analysis": analysis_text, "alternatives": alternatives}
def analyze (query ,query_embedding ,num_results =5 ,num_alternatives =3 ):
try :
results =search_documents (query_embedding ,num_results =num_results +num_alternatives +5 )
except Exception as e :
print (f"Chroma error: {e }")
results =[]
def ask_gemini_with_rag(query, category=None, num_results=5):
embedding = get_embedding(query)
result = analyze(query, embedding, num_results=num_results)
return result["analysis"]
if not results :
context ="No data found in database for this brand."
else :
context ="--- START OF REPORT CONTEXT ---\n"
for res in results [:num_results ]:
context +=f"RELEVANT DATA CHUNK: {res ['text']}\n\n"
context +="--- END OF REPORT CONTEXT ---\n"
def analyze_brand(brand_name):
print(f"\n{'='*60}")
print(f"Analyzing brand: {brand_name}")
print('='*60)
try:
print("\n[1/3] Getting embedding for brand...")
embedding = get_embedding(brand_name)
print("[2/3] Querying Chroma database...")
result = analyze(brand_name, embedding)
print("[3/3] Gemini Analysis Complete!\n")
print("-"*60)
print("ANALYSIS:")
print("-"*60)
print(result["analysis"])
print("\n" + "-"*60)
print("ALTERNATIVES FROM DATABASE:")
print("-"*60)
if result["alternatives"]:
for i, alt in enumerate(result["alternatives"], 1):
print(f"\n{i}. {alt['summary']}...")
else:
print("No alternatives found in database.")
print("\n" + "="*60)
return result
except Exception as e:
print(f"\nError during analysis: {e}")
return None
full_prompt =f"{GREENWASHING_ANALYSIS_PROMPT }\n\n{context }\n\nUSER QUERY/COMPANY FOCUS: {query }"
analysis_text =ask (full_prompt )
def scan_and_analyze():
from src.cv.scanner import capture_and_analyze as cv_capture
print("\n" + "="*60)
print("CV + Gemini Greenwashing Scanner")
print("="*60)
print("Using camera to detect brands...")
cv_result = cv_capture()
logos = cv_result.get("logos_detected", [])
if not logos:
print("No brands detected. Try again!")
return None
brand = logos[0].get("brand", "Unknown")
print(f"\nDetected brand: {brand}")
return analyze_brand(brand)
alternatives =[]
seen_texts =set ()
for res in results [num_results :]:
text_preview =res ['text'][:200 ]
if text_preview not in seen_texts :
seen_texts .add (text_preview )
alternatives .append ({"text":res ['text'],"score":res .get ('score'),"summary":text_preview })
if len (alternatives )>=num_alternatives :
break
if __name__ == "__main__":
scan_and_analyze()
return {"analysis":analysis_text ,"alternatives":alternatives }
def ask_gemini_with_rag (query ,category =None ,num_results =5 ):
embedding =get_embedding (query )
result =analyze (query ,embedding ,num_results =num_results )
return result ["analysis"]
def analyze_brand (brand_name ):
print (f"\n{'='*60 }")
print (f"Analyzing brand: {brand_name }")
print ('='*60 )
try :
print ("\n[1/3] Getting embedding for brand...")
embedding =get_embedding (brand_name )
print ("[2/3] Querying Chroma database...")
result =analyze (brand_name ,embedding )
print ("[3/3] Gemini Analysis Complete!\n")
print ("-"*60 )
print ("ANALYSIS:")
print ("-"*60 )
print (result ["analysis"])
print ("\n"+"-"*60 )
print ("ALTERNATIVES FROM DATABASE:")
print ("-"*60 )
if result ["alternatives"]:
for i ,alt in enumerate (result ["alternatives"],1 ):
print (f"\n{i }. {alt ['summary']}...")
else :
print ("No alternatives found in database.")
print ("\n"+"="*60 )
return result
except Exception as e :
print (f"\nError during analysis: {e }")
return None
def scan_and_analyze ():
from src .cv .scanner import capture_and_analyze as cv_capture
print ("\n"+"="*60 )
print ("CV + Gemini Greenwashing Scanner")
print ("="*60 )
print ("Using camera to detect brands...")
cv_result =cv_capture ()
logos =cv_result .get ("logos_detected",[])
if not logos :
print ("No brands detected. Try again!")
return None
brand =logos [0 ].get ("brand","Unknown")
print (f"\nDetected brand: {brand }")
return analyze_brand (brand )
if __name__ =="__main__":
scan_and_analyze ()

View File

@@ -1,11 +1,11 @@
from google import genai
import os
from google import genai
import os
def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
api_key = os.environ.get("GOOGLE_API_KEY")
client = genai.Client(api_key=api_key)
response = client.models.generate_content(
model=model_name,
contents=prompt,
def generate_content (prompt ,model_name ="gemini-2.0-flash-exp"):
api_key =os .environ .get ("GOOGLE_API_KEY")
client =genai .Client (api_key =api_key )
response =client .models .generate_content (
model =model_name ,
contents =prompt ,
)
return response.text
return response .text

View File

@@ -1,21 +1,21 @@
import os
from pymongo import MongoClient
from dotenv import load_dotenv
import os
from pymongo import MongoClient
from dotenv import load_dotenv
script_dir = os.path.dirname(os.path.abspath(__file__))
env_path = os.path.join(script_dir, '..', 'rag', '.env')
load_dotenv(env_path)
script_dir =os .path .dirname (os .path .abspath (__file__ ))
env_path =os .path .join (script_dir ,'..','rag','.env')
load_dotenv (env_path )
def get_database():
uri = os.getenv("MONGO_URI")
try:
client = MongoClient(uri)
db = client["my_rag_app"]
print("SUCCESS: Connected to MongoDB Atlas!")
return db
except Exception as e:
print(f"ERROR: Could not connect to MongoDB: {e}")
return None
def get_database ():
uri =os .getenv ("MONGO_URI")
try :
client =MongoClient (uri )
db =client ["my_rag_app"]
print ("SUCCESS: Connected to MongoDB Atlas!")
return db
except Exception as e :
print (f"ERROR: Could not connect to MongoDB: {e }")
return None
if __name__ == "__main__":
get_database()
if __name__ =="__main__":
get_database ()

View File

@@ -1,8 +1,8 @@
import os
from pymongo import MongoClient
import os
from pymongo import MongoClient
def get_mongo_client():
uri = os.environ.get("MONGO_URI")
if not uri:
raise ValueError("MONGO_URI environment variable not set")
return MongoClient(uri)
def get_mongo_client ():
uri =os .environ .get ("MONGO_URI")
if not uri :
raise ValueError ("MONGO_URI environment variable not set")
return MongoClient (uri )

View File

@@ -1,62 +1,62 @@
from .connection import get_mongo_client
from datetime import datetime
from .connection import get_mongo_client
from datetime import datetime
DB_NAME = "hoya_metadata"
DB_NAME ="hoya_metadata"
def get_datasets_collection():
client = get_mongo_client()
db = client.get_database(DB_NAME)
return db["datasets"]
def get_datasets_collection ():
client =get_mongo_client ()
db =client .get_database (DB_NAME )
return db ["datasets"]
def get_categories_collection():
client = get_mongo_client()
db = client.get_database(DB_NAME)
return db["categories"]
def get_categories_collection ():
client =get_mongo_client ()
db =client .get_database (DB_NAME )
return db ["categories"]
def is_file_processed(filename):
collection = get_datasets_collection()
return collection.find_one({"filename": filename}) is not None
def is_file_processed (filename ):
collection =get_datasets_collection ()
return collection .find_one ({"filename":filename })is not None
def log_processed_file(filename, category=None, chunk_count=0):
collection = get_datasets_collection()
doc = {
"filename": filename,
"category": category,
"chunk_count": chunk_count,
"processed_at": datetime.utcnow(),
"status": "processed"
def log_processed_file (filename ,category =None ,chunk_count =0 ):
collection =get_datasets_collection ()
doc ={
"filename":filename ,
"category":category ,
"chunk_count":chunk_count ,
"processed_at":datetime .utcnow (),
"status":"processed"
}
collection.insert_one(doc)
collection .insert_one (doc )
def get_all_datasets():
collection = get_datasets_collection()
return list(collection.find({}, {"_id": 0}))
def get_all_datasets ():
collection =get_datasets_collection ()
return list (collection .find ({},{"_id":0 }))
def get_datasets_by_category(category):
collection = get_datasets_collection()
return list(collection.find({"category": category}, {"_id": 0}))
def get_datasets_by_category (category ):
collection =get_datasets_collection ()
return list (collection .find ({"category":category },{"_id":0 }))
def delete_dataset_record(filename):
collection = get_datasets_collection()
result = collection.delete_one({"filename": filename})
return result.deleted_count > 0
def delete_dataset_record (filename ):
collection =get_datasets_collection ()
result =collection .delete_one ({"filename":filename })
return result .deleted_count >0
def create_category(name, description=""):
collection = get_categories_collection()
if collection.find_one({"name": name}):
return False
collection.insert_one({
"name": name,
"description": description,
"created_at": datetime.utcnow()
def create_category (name ,description =""):
collection =get_categories_collection ()
if collection .find_one ({"name":name }):
return False
collection .insert_one ({
"name":name ,
"description":description ,
"created_at":datetime .utcnow ()
})
return True
return True
def get_all_categories():
collection = get_categories_collection()
return list(collection.find({}, {"_id": 0}))
def get_all_categories ():
collection =get_categories_collection ()
return list (collection .find ({},{"_id":0 }))
def delete_category(name):
collection = get_categories_collection()
result = collection.delete_one({"name": name})
return result.deleted_count > 0
def delete_category (name ):
collection =get_categories_collection ()
result =collection .delete_one ({"name":name })
return result .deleted_count >0

View File

@@ -1,49 +1,49 @@
from .connection import get_mongo_client
from .connection import get_mongo_client
def insert_rag_documents(documents, collection_name="rag_documents", db_name="vectors_db"):
client = get_mongo_client()
db = client.get_database(db_name)
collection = db[collection_name]
if documents:
result = collection.insert_many(documents)
return len(result.inserted_ids)
return 0
def insert_rag_documents (documents ,collection_name ="rag_documents",db_name ="vectors_db"):
client =get_mongo_client ()
db =client .get_database (db_name )
collection =db [collection_name ]
def search_rag_documents(query_embedding, collection_name="rag_documents", db_name="vectors_db", num_results=5):
client = get_mongo_client()
db = client.get_database(db_name)
collection = db[collection_name]
pipeline = [
{
"$vectorSearch": {
"index": "vector_index",
"path": "embedding",
"queryVector": query_embedding,
"numCandidates": num_results * 10,
"limit": num_results
}
},
{
"$project": {
"_id": 0,
"text": 1,
"score": { "$meta": "vectorSearchScore" }
}
}
if documents :
result =collection .insert_many (documents )
return len (result .inserted_ids )
return 0
def search_rag_documents (query_embedding ,collection_name ="rag_documents",db_name ="vectors_db",num_results =5 ):
client =get_mongo_client ()
db =client .get_database (db_name )
collection =db [collection_name ]
pipeline =[
{
"$vectorSearch":{
"index":"vector_index",
"path":"embedding",
"queryVector":query_embedding ,
"numCandidates":num_results *10 ,
"limit":num_results
}
},
{
"$project":{
"_id":0 ,
"text":1 ,
"score":{"$meta":"vectorSearchScore"}
}
}
]
return list(collection.aggregate(pipeline))
def is_file_processed(filename, log_collection="ingested_files", db_name="vectors_db"):
client = get_mongo_client()
db = client.get_database(db_name)
collection = db[log_collection]
return collection.find_one({"filename": filename}) is not None
return list (collection .aggregate (pipeline ))
def log_processed_file(filename, log_collection="ingested_files", db_name="vectors_db"):
client = get_mongo_client()
db = client.get_database(db_name)
collection = db[log_collection]
collection.insert_one({"filename": filename, "processed_at": 1})
def is_file_processed (filename ,log_collection ="ingested_files",db_name ="vectors_db"):
client =get_mongo_client ()
db =client .get_database (db_name )
collection =db [log_collection ]
return collection .find_one ({"filename":filename })is not None
def log_processed_file (filename ,log_collection ="ingested_files",db_name ="vectors_db"):
client =get_mongo_client ()
db =client .get_database (db_name )
collection =db [log_collection ]
collection .insert_one ({"filename":filename ,"processed_at":1 })

View File

@@ -1,5 +1,5 @@
from .detector import OllamaLogoDetector
from .detector import OllamaLogoDetector
__all__ = [
"OllamaLogoDetector",
__all__ =[
"OllamaLogoDetector",
]

View File

@@ -1,4 +1,4 @@
from .cli import main
from .cli import main
if __name__ == "__main__":
main()
if __name__ =="__main__":
main ()

View File

@@ -1,103 +1,103 @@
#!/usr/bin/env python3
import argparse
import json
import sys
from .detector import OllamaLogoDetector
from .camera import capture_and_analyze, start_interactive_capture
import argparse
import json
import sys
def main():
parser = argparse.ArgumentParser(
description="Detect logos and companies using Ollama vision models"
from .detector import OllamaLogoDetector
from .camera import capture_and_analyze ,start_interactive_capture
def main ():
parser =argparse .ArgumentParser (
description ="Detect logos and companies using Ollama vision models"
)
parser.add_argument("--image", "-i", type=str)
parser.add_argument("--model", "-m", type=str, default="ministral-3:latest")
parser.add_argument("--output", "-o", type=str)
parser.add_argument("--host", type=str)
parser.add_argument("--single", "-s", action="store_true")
parser.add_argument("--no-save", action="store_true")
parser.add_argument("--output-dir", type=str, default="./captures")
args = parser.parse_args()
try:
if args.image:
print(f"Analyzing: {args.image}")
print(f"Model: {args.model}")
detector = OllamaLogoDetector(model=args.model, host=args.host)
result = detector.detect_from_file(args.image)
_print_results(result)
if args.output:
with open(args.output, 'w') as f:
json.dump(result, f, indent=2)
print(f"Results saved to: {args.output}")
elif args.single:
result = capture_and_analyze(
model=args.model,
save_image=not args.no_save,
output_dir=args.output_dir
)
if args.output and result:
with open(args.output, 'w') as f:
json.dump(result, f, indent=2)
print(f"Results saved to: {args.output}")
else:
start_interactive_capture(
model=args.model,
save_images=not args.no_save,
output_dir=args.output_dir
)
except KeyboardInterrupt:
sys.exit(0)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
def _print_results(result: dict):
print("\n" + "=" * 50)
print("DETECTION RESULTS")
print("=" * 50)
logos = result.get("logos_detected", [])
count = result.get("total_count", len(logos))
if count == 0:
print("\nNo logos or companies detected")
if desc := result.get("description"):
print(f"\nImage description: {desc}")
else:
print(f"\nFound {count} logo(s)/company(s):\n")
for i, logo in enumerate(logos, 1):
brand = logo.get("brand", "Unknown")
conf = logo.get("confidence", "unknown")
loc = logo.get("location", "unknown")
cat = logo.get("category", "N/A")
print(f" {i}. {brand}")
print(f" Confidence: {conf}")
print(f" Location: {loc}")
print(f" Category: {cat}")
print()
if "error" in result:
print(f"\nError occurred: {result['error']}")
if "raw_response" in result and result.get("parse_error"):
print(f"\nParse error: {result['parse_error']}")
print(f"Raw response:\n{result['raw_response'][:500]}...")
print("=" * 50)
print("\nRaw JSON:")
print(json.dumps(result, indent=2))
parser .add_argument ("--image","-i",type =str )
parser .add_argument ("--model","-m",type =str ,default ="ministral-3:latest")
parser .add_argument ("--output","-o",type =str )
parser .add_argument ("--host",type =str )
parser .add_argument ("--single","-s",action ="store_true")
parser .add_argument ("--no-save",action ="store_true")
parser .add_argument ("--output-dir",type =str ,default ="./captures")
if __name__ == "__main__":
main()
args =parser .parse_args ()
try :
if args .image :
print (f"Analyzing: {args .image }")
print (f"Model: {args .model }")
detector =OllamaLogoDetector (model =args .model ,host =args .host )
result =detector .detect_from_file (args .image )
_print_results (result )
if args .output :
with open (args .output ,'w')as f :
json .dump (result ,f ,indent =2 )
print (f"Results saved to: {args .output }")
elif args .single :
result =capture_and_analyze (
model =args .model ,
save_image =not args .no_save ,
output_dir =args .output_dir
)
if args .output and result :
with open (args .output ,'w')as f :
json .dump (result ,f ,indent =2 )
print (f"Results saved to: {args .output }")
else :
start_interactive_capture (
model =args .model ,
save_images =not args .no_save ,
output_dir =args .output_dir
)
except KeyboardInterrupt :
sys .exit (0 )
except Exception as e :
print (f"Error: {e }")
sys .exit (1 )
def _print_results (result :dict ):
print ("\n"+"="*50 )
print ("DETECTION RESULTS")
print ("="*50 )
logos =result .get ("logos_detected",[])
count =result .get ("total_count",len (logos ))
if count ==0 :
print ("\nNo logos or companies detected")
if desc :=result .get ("description"):
print (f"\nImage description: {desc }")
else :
print (f"\nFound {count } logo(s)/company(s):\n")
for i ,logo in enumerate (logos ,1 ):
brand =logo .get ("brand","Unknown")
conf =logo .get ("confidence","unknown")
loc =logo .get ("location","unknown")
cat =logo .get ("category","N/A")
print (f" {i }. {brand }")
print (f" Confidence: {conf }")
print (f" Location: {loc }")
print (f" Category: {cat }")
print ()
if "error"in result :
print (f"\nError occurred: {result ['error']}")
if "raw_response"in result and result .get ("parse_error"):
print (f"\nParse error: {result ['parse_error']}")
print (f"Raw response:\n{result ['raw_response'][:500 ]}...")
print ("="*50 )
print ("\nRaw JSON:")
print (json .dumps (result ,indent =2 ))
if __name__ =="__main__":
main ()

View File

@@ -1,20 +1,20 @@
import base64
import json
import re
from pathlib import Path
from typing import Dict, List, Optional, Union
import base64
import json
import re
from pathlib import Path
from typing import Dict ,List ,Optional ,Union
try:
import ollama
OLLAMA_AVAILABLE = True
except ImportError:
OLLAMA_AVAILABLE = False
print("Ollama not installed. Run: pip install ollama")
try :
import ollama
OLLAMA_AVAILABLE =True
except ImportError :
OLLAMA_AVAILABLE =False
print ("Ollama not installed. Run: pip install ollama")
DEFAULT_HOST = "https://ollama.sirblob.co"
DEFAULT_MODEL = "ministral-3:latest"
DEFAULT_HOST ="https://ollama.sirblob.co"
DEFAULT_MODEL ="ministral-3:latest"
DEFAULT_PROMPT = """Analyze this image and identify ALL logos, brand names, and company names visible.
DEFAULT_PROMPT ="""Analyze this image and identify ALL logos, brand names, and company names visible.
For each logo or brand you detect, provide:
1. The company/brand name
@@ -45,120 +45,120 @@ If no logos are found, return:
IMPORTANT: Return ONLY the JSON object, no other text."""
class OllamaLogoDetector:
def __init__(self,
model: str = DEFAULT_MODEL,
host: str = DEFAULT_HOST):
if not OLLAMA_AVAILABLE:
raise RuntimeError("Ollama not installed. Run: pip install ollama")
self.model = model
self.host = host
self.client = ollama.Client(host=host)
try:
models = self.client.list()
model_names = [m['name'] for m in models.get('models', [])]
model_base = model.split(':')[0]
if not any(model_base in name for name in model_names):
print(f"Model '{model}' not found. Available models: {model_names}")
print(f"Pulling {model}...")
self.client.pull(model)
print(f"Model {model} ready!")
else:
print(f"Using Ollama model: {model}")
except Exception as e:
print(f"Could not verify model: {e}")
print("Make sure Ollama is running: ollama serve")
def detect_from_file(self,
image_path: str,
prompt: Optional[str] = None) -> Dict:
path = Path(image_path)
if not path.exists():
raise FileNotFoundError(f"Image not found: {image_path}")
with open(path, 'rb') as f:
image_data = base64.b64encode(f.read()).decode('utf-8')
return self._analyze_image(image_data, prompt)
def detect_from_bytes(self,
image_bytes: bytes,
prompt: Optional[str] = None) -> Dict:
image_data = base64.b64encode(image_bytes).decode('utf-8')
return self._analyze_image(image_data, prompt)
def detect_from_numpy(self,
image_array,
prompt: Optional[str] = None) -> Dict:
import cv2
success, buffer = cv2.imencode('.jpg', image_array)
if not success:
raise ValueError("Failed to encode image")
return self.detect_from_bytes(buffer.tobytes(), prompt)
def _analyze_image(self,
image_base64: str,
prompt: Optional[str] = None) -> Dict:
if prompt is None:
prompt = DEFAULT_PROMPT
try:
response = self.client.chat(
model=self.model,
messages=[{
'role': 'user',
'content': prompt,
'images': [image_base64]
}],
options={
'temperature': 0.1,
}
)
content = response['message']['content']
return self._parse_response(content)
except Exception as e:
return {
"logos_detected": [],
"total_count": 0,
"error": str(e),
"raw_response": None
class OllamaLogoDetector :
def __init__ (self ,
model :str =DEFAULT_MODEL ,
host :str =DEFAULT_HOST ):
if not OLLAMA_AVAILABLE :
raise RuntimeError ("Ollama not installed. Run: pip install ollama")
self .model =model
self .host =host
self .client =ollama .Client (host =host )
try :
models =self .client .list ()
model_names =[m ['name']for m in models .get ('models',[])]
model_base =model .split (':')[0 ]
if not any (model_base in name for name in model_names ):
print (f"Model '{model }' not found. Available models: {model_names }")
print (f"Pulling {model }...")
self .client .pull (model )
print (f"Model {model } ready!")
else :
print (f"Using Ollama model: {model }")
except Exception as e :
print (f"Could not verify model: {e }")
print ("Make sure Ollama is running: ollama serve")
def detect_from_file (self ,
image_path :str ,
prompt :Optional [str ]=None )->Dict :
path =Path (image_path )
if not path .exists ():
raise FileNotFoundError (f"Image not found: {image_path }")
with open (path ,'rb')as f :
image_data =base64 .b64encode (f .read ()).decode ('utf-8')
return self ._analyze_image (image_data ,prompt )
def detect_from_bytes (self ,
image_bytes :bytes ,
prompt :Optional [str ]=None )->Dict :
image_data =base64 .b64encode (image_bytes ).decode ('utf-8')
return self ._analyze_image (image_data ,prompt )
def detect_from_numpy (self ,
image_array ,
prompt :Optional [str ]=None )->Dict :
import cv2
success ,buffer =cv2 .imencode ('.jpg',image_array )
if not success :
raise ValueError ("Failed to encode image")
return self .detect_from_bytes (buffer .tobytes (),prompt )
def _analyze_image (self ,
image_base64 :str ,
prompt :Optional [str ]=None )->Dict :
if prompt is None :
prompt =DEFAULT_PROMPT
try :
response =self .client .chat (
model =self .model ,
messages =[{
'role':'user',
'content':prompt ,
'images':[image_base64 ]
}],
options ={
'temperature':0.1 ,
}
def _parse_response(self, content: str) -> Dict:
try:
return json.loads(content)
except json.JSONDecodeError:
pass
json_patterns = [
r'```json\s*([\s\S]*?)\s*```',
r'```\s*([\s\S]*?)\s*```',
r'\{[\s\S]*\}'
)
content =response ['message']['content']
return self ._parse_response (content )
except Exception as e :
return {
"logos_detected":[],
"total_count":0 ,
"error":str (e ),
"raw_response":None
}
def _parse_response (self ,content :str )->Dict :
try :
return json .loads (content )
except json .JSONDecodeError :
pass
json_patterns =[
r'```json\s*([\s\S]*?)\s*```',
r'```\s*([\s\S]*?)\s*```',
r'\{[\s\S]*\}'
]
for pattern in json_patterns:
match = re.search(pattern, content)
if match:
try:
json_str = match.group(1) if '```' in pattern else match.group(0)
return json.loads(json_str)
except json.JSONDecodeError:
continue
for pattern in json_patterns :
match =re .search (pattern ,content )
if match :
try :
json_str =match .group (1 )if '```'in pattern else match .group (0 )
return json .loads (json_str )
except json .JSONDecodeError :
continue
return {
"logos_detected": [],
"total_count": 0,
"raw_response": content,
"parse_error": "Could not extract valid JSON from response"
"logos_detected":[],
"total_count":0 ,
"raw_response":content ,
"parse_error":"Could not extract valid JSON from response"
}
def get_brands_list(self, result: Dict) -> List[str]:
logos = result.get("logos_detected", [])
return [logo.get("brand", "Unknown") for logo in logos]
def get_brands_list (self ,result :Dict )->List [str ]:
logos =result .get ("logos_detected",[])
return [logo .get ("brand","Unknown")for logo in logos ]

View File

@@ -1,32 +1,32 @@
import ollama
import os
import ollama
import os
client = ollama.Client(host="https://ollama.sirblob.co")
DEFAULT_MODEL = "nomic-embed-text:latest"
client =ollama .Client (host ="https://ollama.sirblob.co")
DEFAULT_MODEL ="nomic-embed-text:latest"
def get_embedding(text, model=DEFAULT_MODEL):
try:
response = client.embeddings(model=model, prompt=text)
return response["embedding"]
except Exception as e:
print(f"Error getting embedding from Ollama: {e}")
raise e
def get_embedding (text ,model =DEFAULT_MODEL ):
try :
response =client .embeddings (model =model ,prompt =text )
return response ["embedding"]
except Exception as e :
print (f"Error getting embedding from Ollama: {e }")
raise e
def get_embeddings_batch(texts, model=DEFAULT_MODEL, batch_size=50):
all_embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
try:
response = client.embed(model=model, input=batch)
if "embeddings" in response:
all_embeddings.extend(response["embeddings"])
else:
raise ValueError("Unexpected response format from client.embed")
except Exception as e:
print(f"Error embedding batch {i}-{i+batch_size}: {e}")
raise e
return all_embeddings
def get_embeddings_batch (texts ,model =DEFAULT_MODEL ,batch_size =50 ):
all_embeddings =[]
for i in range (0 ,len (texts ),batch_size ):
batch =texts [i :i +batch_size ]
try :
response =client .embed (model =model ,input =batch )
if "embeddings"in response :
all_embeddings .extend (response ["embeddings"])
else :
raise ValueError ("Unexpected response format from client.embed")
except Exception as e :
print (f"Error embedding batch {i }-{i +batch_size }: {e }")
raise e
return all_embeddings

View File

@@ -1,43 +1,43 @@
import os
from google import genai
from dotenv import load_dotenv
import os
from google import genai
from dotenv import load_dotenv
script_dir = os.path.dirname(os.path.abspath(__file__))
load_dotenv(os.path.join(script_dir, '.env'))
script_dir =os .path .dirname (os .path .abspath (__file__ ))
load_dotenv (os .path .join (script_dir ,'.env'))
class GeminiClient:
def __init__(self):
self.api_key = os.getenv("GOOGLE_API_KEY")
if not self.api_key:
raise ValueError("No GOOGLE_API_KEY found in .env file!")
class GeminiClient :
def __init__ (self ):
self .api_key =os .getenv ("GOOGLE_API_KEY")
self.client = genai.Client(api_key=self.api_key)
self.model_name = "gemini-2.0-flash"
if not self .api_key :
raise ValueError ("No GOOGLE_API_KEY found in .env file!")
def ask(self, prompt, context=""):
try:
if context:
full_message = f"Use this information to answer: {context}\n\nQuestion: {prompt}"
else:
full_message = prompt
self .client =genai .Client (api_key =self .api_key )
self .model_name ="gemini-2.0-flash"
response = self.client.models.generate_content(
model=self.model_name,
contents=full_message,
config={
'system_instruction': 'You are a concise sustainability assistant. Your responses must be a single short paragraph, maximum 6 sentences long. Do not use bullet points or multiple sections.'
}
def ask (self ,prompt ,context =""):
try :
if context :
full_message =f"Use this information to answer: {context }\n\nQuestion: {prompt }"
else :
full_message =prompt
response =self .client .models .generate_content (
model =self .model_name ,
contents =full_message ,
config ={
'system_instruction':'You are a concise sustainability assistant. Your responses must be a single short paragraph, maximum 6 sentences long. Do not use bullet points or multiple sections.'
}
)
return response.text
except Exception as e:
return f"Error talking to Gemini: {str(e)}"
return response .text
if __name__ == "__main__":
try:
brain = GeminiClient()
print("--- Testing Class Connection ---")
print(brain.ask("Hello! Give me a 1-sentence coding tip."))
except Exception as e:
print(f"Failed to start Gemini: {e}")
except Exception as e :
return f"Error talking to Gemini: {str (e )}"
if __name__ =="__main__":
try :
brain =GeminiClient ()
print ("--- Testing Class Connection ---")
print (brain .ask ("Hello! Give me a 1-sentence coding tip."))
except Exception as e :
print (f"Failed to start Gemini: {e }")

View File

@@ -1,92 +1,92 @@
import pandas as pd
from pypdf import PdfReader
import io
import os
import pandas as pd
from pypdf import PdfReader
import io
import os
def chunk_text(text, target_length=2000, overlap=100):
if not text:
def chunk_text (text ,target_length =2000 ,overlap =100 ):
if not text :
return []
chunks = []
paragraphs = text.split('\n\n')
current_chunk = ""
for para in paragraphs:
if len(current_chunk) + len(para) > target_length:
if current_chunk:
chunks.append(current_chunk.strip())
if len(para) > target_length:
start = 0
while start < len(para):
end = start + target_length
chunks.append(para[start:end].strip())
start += (target_length - overlap)
current_chunk = ""
else:
current_chunk = para
else:
if current_chunk:
current_chunk += "\n\n" + para
else:
current_chunk = para
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
def load_csv(file_path):
df = pd.read_csv(file_path)
return df.apply(lambda x: ' | '.join(x.astype(str)), axis=1).tolist()
chunks =[]
paragraphs =text .split ('\n\n')
current_chunk =""
def load_pdf(file_path):
reader = PdfReader(file_path)
text_chunks = []
for page in reader.pages:
text = page.extract_text()
if text:
if len(text) > 4000:
text_chunks.extend(chunk_text(text))
else:
text_chunks.append(text)
return text_chunks
for para in paragraphs :
if len (current_chunk )+len (para )>target_length :
if current_chunk :
chunks .append (current_chunk .strip ())
def load_txt(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
return chunk_text(content)
if len (para )>target_length :
start =0
while start <len (para ):
end =start +target_length
chunks .append (para [start :end ].strip ())
start +=(target_length -overlap )
current_chunk =""
else :
current_chunk =para
else :
if current_chunk :
current_chunk +="\n\n"+para
else :
current_chunk =para
def load_xlsx(file_path):
all_rows = []
try:
sheets = pd.read_excel(file_path, sheet_name=None)
except Exception as e:
raise ValueError(f"Pandas read_excel failed: {e}")
for sheet_name, df in sheets.items():
if df.empty:
continue
df = df.fillna("")
for row in df.values:
row_items = [str(x) for x in row if str(x).strip() != ""]
if row_items:
row_str = f"Sheet: {str(sheet_name)} | " + " | ".join(row_items)
if len(row_str) > 8000:
all_rows.extend(chunk_text(row_str))
else:
all_rows.append(row_str)
return all_rows
if current_chunk :
chunks .append (current_chunk .strip ())
def process_file(file_path):
ext = os.path.splitext(file_path)[1].lower()
if ext == '.csv':
return load_csv(file_path)
elif ext == '.pdf':
return load_pdf(file_path)
elif ext == '.txt':
return load_txt(file_path)
elif ext == '.xlsx':
return load_xlsx(file_path)
else:
raise ValueError(f"Unsupported file type: {ext}")
return chunks
def load_csv (file_path ):
df =pd .read_csv (file_path )
return df .apply (lambda x :' | '.join (x .astype (str )),axis =1 ).tolist ()
def load_pdf (file_path ):
reader =PdfReader (file_path )
text_chunks =[]
for page in reader .pages :
text =page .extract_text ()
if text :
if len (text )>4000 :
text_chunks .extend (chunk_text (text ))
else :
text_chunks .append (text )
return text_chunks
def load_txt (file_path ):
with open (file_path ,'r',encoding ='utf-8')as f :
content =f .read ()
return chunk_text (content )
def load_xlsx (file_path ):
all_rows =[]
try :
sheets =pd .read_excel (file_path ,sheet_name =None )
except Exception as e :
raise ValueError (f"Pandas read_excel failed: {e }")
for sheet_name ,df in sheets .items ():
if df .empty :
continue
df =df .fillna ("")
for row in df .values :
row_items =[str (x )for x in row if str (x ).strip ()!=""]
if row_items :
row_str =f"Sheet: {str (sheet_name )} | "+" | ".join (row_items )
if len (row_str )>8000 :
all_rows .extend (chunk_text (row_str ))
else :
all_rows .append (row_str )
return all_rows
def process_file (file_path ):
ext =os .path .splitext (file_path )[1 ].lower ()
if ext =='.csv':
return load_csv (file_path )
elif ext =='.pdf':
return load_pdf (file_path )
elif ext =='.txt':
return load_txt (file_path )
elif ext =='.xlsx':
return load_xlsx (file_path )
else :
raise ValueError (f"Unsupported file type: {ext }")

View File

@@ -1,27 +1,27 @@
from .embeddings import get_embeddings_batch, get_embedding
from ..chroma.vector_store import insert_documents, search_documents
from .embeddings import get_embeddings_batch ,get_embedding
from ..chroma .vector_store import insert_documents ,search_documents
def ingest_documents(text_chunks, collection_name="rag_documents", source_file=None, category=None):
embeddings = get_embeddings_batch(text_chunks)
metadata_list = None
if source_file or category:
metadata_list = []
for _ in text_chunks:
meta = {}
if source_file:
meta["source"] = source_file
if category:
meta["category"] = category
metadata_list.append(meta)
return insert_documents(text_chunks, embeddings, collection_name=collection_name, metadata_list=metadata_list)
def ingest_documents (text_chunks ,collection_name ="rag_documents",source_file =None ,category =None ):
embeddings =get_embeddings_batch (text_chunks )
def vector_search(query_text, collection_name="rag_documents", num_results=5, category=None):
query_embedding = get_embedding(query_text)
filter_metadata = None
if category:
filter_metadata = {"category": category}
return search_documents(query_embedding, collection_name=collection_name, num_results=num_results, filter_metadata=filter_metadata)
metadata_list =None
if source_file or category :
metadata_list =[]
for _ in text_chunks :
meta ={}
if source_file :
meta ["source"]=source_file
if category :
meta ["category"]=category
metadata_list .append (meta )
return insert_documents (text_chunks ,embeddings ,collection_name =collection_name ,metadata_list =metadata_list )
def vector_search (query_text ,collection_name ="rag_documents",num_results =5 ,category =None ):
query_embedding =get_embedding (query_text )
filter_metadata =None
if category :
filter_metadata ={"category":category }
return search_documents (query_embedding ,collection_name =collection_name ,num_results =num_results ,filter_metadata =filter_metadata )

View File

@@ -1,62 +1,62 @@
from flask import Blueprint, request, jsonify
from src.rag.gemeni import GeminiClient
from src.gemini import ask_gemini_with_rag
from flask import Blueprint ,request ,jsonify
from src .rag .gemeni import GeminiClient
from src .gemini import ask_gemini_with_rag
gemini_bp = Blueprint('gemini', __name__)
brain = None
gemini_bp =Blueprint ('gemini',__name__ )
brain =None
def get_brain():
global brain
if brain is None:
brain = GeminiClient()
return brain
def get_brain ():
global brain
if brain is None :
brain =GeminiClient ()
return brain
@gemini_bp.route('/ask', methods=['POST'])
def ask():
data = request.json
prompt = data.get("prompt")
context = data.get("context", "")
@gemini_bp .route ('/ask',methods =['POST'])
def ask ():
data =request .json
prompt =data .get ("prompt")
context =data .get ("context","")
if not prompt:
return jsonify({"error": "No prompt provided"}), 400
if not prompt :
return jsonify ({"error":"No prompt provided"}),400
try:
client = get_brain()
response = client.ask(prompt, context)
return jsonify({
"status": "success",
"reply": response
try :
client =get_brain ()
response =client .ask (prompt ,context )
return jsonify ({
"status":"success",
"reply":response
})
except Exception as e:
return jsonify({
"status": "error",
"message": str(e)
}), 500
except Exception as e :
return jsonify ({
"status":"error",
"message":str (e )
}),500
@gemini_bp.route('/rag', methods=['POST'])
def rag():
data = request.json
prompt = data.get("prompt")
category = data.get("category")
if not prompt:
return jsonify({"error": "No prompt provided"}), 400
try:
response = ask_gemini_with_rag(prompt, category=category)
return jsonify({
"status": "success",
"reply": response
@gemini_bp .route ('/rag',methods =['POST'])
def rag ():
data =request .json
prompt =data .get ("prompt")
category =data .get ("category")
if not prompt :
return jsonify ({"error":"No prompt provided"}),400
try :
response =ask_gemini_with_rag (prompt ,category =category )
return jsonify ({
"status":"success",
"reply":response
})
except Exception as e:
return jsonify({
"status": "error",
"message": str(e)
}), 500
except Exception as e :
return jsonify ({
"status":"error",
"message":str (e )
}),500
@gemini_bp.route('/vision', methods=['POST'])
def vision():
return jsonify({
"status": "error",
"message": "Vision endpoint not yet implemented"
}), 501
@gemini_bp .route ('/vision',methods =['POST'])
def vision ():
return jsonify ({
"status":"error",
"message":"Vision endpoint not yet implemented"
}),501

View File

@@ -2,65 +2,112 @@
Incident Report API - Handles greenwashing report submissions
Uses structured outputs with Pydantic for reliable JSON responses
"""
import base64
import os
from datetime import datetime
from flask import Blueprint, request, jsonify
from google import genai
from pydantic import BaseModel, Field
from typing import List, Optional, Literal
import base64
import os
import cv2
import numpy as np
from datetime import datetime
from flask import Blueprint ,request ,jsonify
from google import genai
from pydantic import BaseModel ,Field
from typing import List ,Optional ,Literal
from src.ollama.detector import OllamaLogoDetector
from src.chroma.vector_store import search_documents, insert_documents
from src.rag.embeddings import get_embedding
from src.mongo.connection import get_mongo_client
from src .ollama .detector import OllamaLogoDetector
from src .chroma .vector_store import search_documents ,insert_documents
from src .rag .embeddings import get_embedding
from src .mongo .connection import get_mongo_client
incidents_bp = Blueprint('incidents', __name__)
# Initialize detector lazily
_detector = None
def get_detector():
global _detector
if _detector is None:
_detector = OllamaLogoDetector()
return _detector
incidents_bp =Blueprint ('incidents',__name__ )
# ============= Pydantic Models for Structured Outputs =============
_detector =None
class GreenwashingAnalysis(BaseModel):
def get_detector ():
global _detector
if _detector is None :
_detector =OllamaLogoDetector ()
return _detector
def compress_image (image_bytes :bytes ,max_width :int =800 ,quality :int =85 )->str :
"""
Compress image using OpenCV and return Base64 string
Args:
image_bytes: Original image bytes
max_width: Maximum width for resized image
quality: JPEG quality (1-100)
Returns:
Base64 encoded compressed image
"""
try :
nparr =np .frombuffer (image_bytes ,np .uint8 )
img =cv2 .imdecode (nparr ,cv2 .IMREAD_COLOR )
if img is None :
raise ValueError ("Failed to decode image")
height ,width =img .shape [:2 ]
if width >max_width :
ratio =max_width /width
new_width =max_width
new_height =int (height *ratio )
img =cv2 .resize (img ,(new_width ,new_height ),interpolation =cv2 .INTER_AREA )
encode_param =[int (cv2 .IMWRITE_JPEG_QUALITY ),quality ]
_ ,buffer =cv2 .imencode ('.jpg',img ,encode_param )
compressed_base64 =base64 .b64encode (buffer ).decode ('utf-8')
return compressed_base64
except Exception as e :
print (f"Image compression error: {e }")
return base64 .b64encode (image_bytes ).decode ('utf-8')
class GreenwashingAnalysis (BaseModel ):
"""Structured output for greenwashing analysis"""
is_greenwashing: bool = Field(description="Whether this is a case of greenwashing")
confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of the analysis")
verdict: str = Field(description="Brief one-sentence verdict")
reasoning: str = Field(description="Detailed explanation of why this is or isn't greenwashing")
severity: Literal["high", "medium", "low"] = Field(description="Severity of the greenwashing if detected")
recommendations: str = Field(description="What consumers should know about this case")
key_claims: List[str] = Field(description="List of specific environmental claims made by the company")
red_flags: List[str] = Field(description="List of red flags or concerning practices identified")
is_greenwashing :bool =Field (description ="Whether this is a case of greenwashing")
confidence :Literal ["high","medium","low"]=Field (description ="Confidence level of the analysis")
verdict :str =Field (description ="Brief one-sentence verdict")
reasoning :str =Field (description ="Detailed explanation of why this is or isn't greenwashing")
severity :Literal ["high","medium","low"]=Field (description ="Severity of the greenwashing if detected")
recommendations :str =Field (description ="What consumers should know about this case")
key_claims :List [str ]=Field (description ="List of specific environmental claims made by the company")
red_flags :List [str ]=Field (description ="List of red flags or concerning practices identified")
class LogoDetection(BaseModel):
class LogoDetection (BaseModel ):
"""Structured output for logo detection from Ollama"""
brand: str = Field(description="The company or brand name detected")
confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of detection")
location: str = Field(description="Location in image (e.g., center, top-left)")
category: str = Field(description="Product category if identifiable")
brand :str =Field (description ="The company or brand name detected")
confidence :Literal ["high","medium","low"]=Field (description ="Confidence level of detection")
location :str =Field (description ="Location in image (e.g., center, top-left)")
category :str =Field (description ="Product category if identifiable")
class ImageAnalysis(BaseModel):
class ImageAnalysis (BaseModel ):
"""Structured output for full image analysis"""
logos_detected: List[LogoDetection] = Field(description="List of logos/brands detected in the image")
total_count: int = Field(description="Total number of logos detected")
description: str = Field(description="Brief description of what's in the image")
environmental_claims: List[str] = Field(description="Any environmental or eco-friendly claims visible in the image")
packaging_description: str = Field(description="Description of the product packaging and design")
logos_detected :List [LogoDetection ]=Field (description ="List of logos/brands detected in the image")
total_count :int =Field (description ="Total number of logos detected")
description :str =Field (description ="Brief description of what's in the image")
environmental_claims :List [str ]=Field (description ="Any environmental or eco-friendly claims visible in the image")
packaging_description :str =Field (description ="Description of the product packaging and design")
# ============= Analysis Functions =============
GREENWASHING_ANALYSIS_PROMPT = """You are an expert at detecting greenwashing - misleading environmental claims by companies.
GREENWASHING_ANALYSIS_PROMPT ="""You are an expert at detecting greenwashing - misleading environmental claims by companies.
Analyze the following user-submitted report about a potential greenwashing incident:
@@ -81,48 +128,48 @@ Based on this information, determine if this is a valid case of greenwashing. Co
Provide your analysis in the structured format requested."""
def analyze_with_gemini(product_name: str, user_description: str, detected_brand: str,
image_description: str, context: str) -> GreenwashingAnalysis:
def analyze_with_gemini (product_name :str ,user_description :str ,detected_brand :str ,
image_description :str ,context :str )->GreenwashingAnalysis :
"""Send analysis request to Gemini with structured output"""
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY not set")
prompt = GREENWASHING_ANALYSIS_PROMPT.format(
product_name=product_name,
user_description=user_description,
detected_brand=detected_brand,
image_description=image_description,
context=context
api_key =os .environ .get ("GOOGLE_API_KEY")
if not api_key :
raise ValueError ("GOOGLE_API_KEY not set")
prompt =GREENWASHING_ANALYSIS_PROMPT .format (
product_name =product_name ,
user_description =user_description ,
detected_brand =detected_brand ,
image_description =image_description ,
context =context
)
client = genai.Client(api_key=api_key)
# Use structured output with Pydantic schema
response = client.models.generate_content(
model="gemini-3-flash-preview",
contents=prompt,
config={
"response_mime_type": "application/json",
"response_json_schema": GreenwashingAnalysis.model_json_schema(),
}
)
# Validate and parse the response
analysis = GreenwashingAnalysis.model_validate_json(response.text)
return analysis
client =genai .Client (api_key =api_key )
def analyze_image_with_ollama(image_bytes: bytes) -> ImageAnalysis:
response =client .models .generate_content (
model ="gemini-3-pro-preview",
contents =prompt ,
config ={
"response_mime_type":"application/json",
"response_json_schema":GreenwashingAnalysis .model_json_schema (),
}
)
analysis =GreenwashingAnalysis .model_validate_json (response .text )
return analysis
def analyze_image_with_ollama (image_bytes :bytes )->ImageAnalysis :
"""Analyze image using Ollama with structured output"""
try:
import ollama
client = ollama.Client(host="https://ollama.sirblob.co")
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
prompt = """Analyze this image for a greenwashing detection system.
try :
import ollama
client =ollama .Client (host ="https://ollama.sirblob.co")
image_base64 =base64 .b64encode (image_bytes ).decode ('utf-8')
prompt ="""Analyze this image for a greenwashing detection system.
Identify:
1. All visible logos, brand names, and company names
@@ -131,275 +178,318 @@ Identify:
Respond with structured JSON matching the schema provided."""
response = client.chat(
model="ministral-3:latest",
messages=[{
'role': 'user',
'content': prompt,
'images': [image_base64],
}],
format=ImageAnalysis.model_json_schema(),
options={'temperature': 0.1}
response =client .chat (
model ="ministral-3:latest",
messages =[{
'role':'user',
'content':prompt ,
'images':[image_base64 ],
}],
format =ImageAnalysis .model_json_schema (),
options ={'temperature':0.1 }
)
# Validate and parse
analysis = ImageAnalysis.model_validate_json(response['message']['content'])
return analysis
except Exception as e:
print(f"Ollama structured analysis failed: {e}")
# Fall back to basic detection
detector = get_detector()
result = detector.detect_from_bytes(image_bytes)
# Convert to structured format
logos = []
for logo in result.get('logos_detected', []):
logos.append(LogoDetection(
brand=logo.get('brand', 'Unknown'),
confidence=logo.get('confidence', 'low'),
location=logo.get('location', 'unknown'),
category=logo.get('category', 'unknown')
analysis =ImageAnalysis .model_validate_json (response ['message']['content'])
return analysis
except Exception as e :
print (f"Ollama structured analysis failed: {e }")
detector =get_detector ()
result =detector .detect_from_bytes (image_bytes )
logos =[]
for logo in result .get ('logos_detected',[]):
logos .append (LogoDetection (
brand =logo .get ('brand','Unknown'),
confidence =logo .get ('confidence','low'),
location =logo .get ('location','unknown'),
category =logo .get ('category','unknown')
))
return ImageAnalysis(
logos_detected=logos,
total_count=result.get('total_count', 0),
description=result.get('description', 'No description available'),
environmental_claims=[],
packaging_description=""
return ImageAnalysis (
logos_detected =logos ,
total_count =result .get ('total_count',0 ),
description =result .get ('description','No description available'),
environmental_claims =[],
packaging_description =""
)
def save_to_mongodb(incident_data: dict) -> str:
def save_to_mongodb (incident_data :dict )->str :
"""Save incident to MongoDB and return the ID"""
client = get_mongo_client()
db = client["ethix"]
collection = db["incidents"]
result = collection.insert_one(incident_data)
return str(result.inserted_id)
client =get_mongo_client ()
db =client ["ethix"]
collection =db ["incidents"]
result =collection .insert_one (incident_data )
return str (result .inserted_id )
def save_to_chromadb(incident_data: dict, incident_id: str):
"""Save incident as context for the chatbot"""
analysis = incident_data['analysis']
# Create a rich text representation of the incident
red_flags = "\n".join(f"- {flag}" for flag in analysis.get('red_flags', []))
key_claims = "\n".join(f"- {claim}" for claim in analysis.get('key_claims', []))
text = f"""GREENWASHING INCIDENT REPORT #{incident_id}
Date: {incident_data['created_at']}
Company/Product: {incident_data['product_name']} ({incident_data.get('detected_brand', 'Unknown brand')})
def save_to_chromadb (incident_data :dict ,incident_id :str ):
"""
Save incident as context for the chatbot
Includes verdict, full analysis, and environmental impact information
"""
analysis =incident_data ['analysis']
USER REPORT: {incident_data['user_description']}
ANALYSIS VERDICT: {analysis['verdict']}
Confidence: {analysis['confidence']}
Severity: {analysis['severity']}
red_flags ="\n".join (f"- {flag }"for flag in analysis .get ('red_flags',[]))
key_claims ="\n".join (f"- {claim }"for claim in analysis .get ('key_claims',[]))
env_claims ="\n".join (f"- {claim }"for claim in incident_data .get ('environmental_claims',[]))
DETAILED REASONING:
{analysis['reasoning']}
KEY ENVIRONMENTAL CLAIMS MADE:
{key_claims}
text =f"""GREENWASHING INCIDENT REPORT #{incident_id }
Report Date: {incident_data ['created_at']}
Company/Product: {incident_data ['product_name']}
Detected Brand: {incident_data .get ('detected_brand','Unknown brand')}
Status: {incident_data ['status']}
RED FLAGS IDENTIFIED:
{red_flags}
=== VERDICT ===
{analysis ['verdict']}
CONSUMER RECOMMENDATIONS:
{analysis['recommendations']}
Greenwashing Detected: {'YES'if analysis ['is_greenwashing']else 'NO'}
Confidence Level: {analysis ['confidence']}
Severity Assessment: {analysis ['severity']}
=== USER COMPLAINT ===
{incident_data ['user_description']}
=== IMAGE ANALYSIS ===
{incident_data .get ('image_description','No image analysis available')}
=== ENVIRONMENTAL CLAIMS IDENTIFIED ===
{env_claims if env_claims else 'No specific environmental claims identified'}
=== DETAILED ANALYSIS & REASONING ===
{analysis ['reasoning']}
=== KEY MARKETING CLAIMS ===
{key_claims if key_claims else 'No key claims identified'}
=== RED FLAGS IDENTIFIED ===
{red_flags if red_flags else 'No specific red flags identified'}
=== CONSUMER RECOMMENDATIONS ===
{analysis ['recommendations']}
=== ENVIRONMENTAL IMPACT ASSESSMENT ===
This report highlights potential misleading environmental claims by {incident_data .get ('detected_brand','the company')}.
Consumers should be aware that {analysis ['severity']} severity greenwashing has been identified with {analysis ['confidence']} confidence.
This incident has been documented for future reference and to help inform sustainable purchasing decisions.
"""
# Get embedding for the incident
embedding = get_embedding(text)
# Store in ChromaDB with metadata
metadata = {
"type": "incident_report",
"source": f"incident_{incident_id}",
"product_name": incident_data['product_name'],
"brand": incident_data.get('detected_brand', 'Unknown'),
"severity": analysis['severity'],
"confidence": analysis['confidence'],
"is_greenwashing": True,
"created_at": incident_data['created_at']
embedding =get_embedding (text )
metadata ={
"type":"incident_report",
"source":f"incident_{incident_id }",
"product_name":incident_data ['product_name'],
"brand":incident_data .get ('detected_brand','Unknown'),
"severity":analysis ['severity'],
"confidence":analysis ['confidence'],
"is_greenwashing":True ,
"verdict":analysis ['verdict'],
"status":incident_data ['status'],
"created_at":incident_data ['created_at'],
"num_red_flags":len (analysis .get ('red_flags',[])),
"num_claims":len (analysis .get ('key_claims',[]))
}
insert_documents(
texts=[text],
embeddings=[embedding],
metadata_list=[metadata]
insert_documents (
texts =[text ],
embeddings =[embedding ],
metadata_list =[metadata ]
)
print (f"✓ Incident #{incident_id } saved to ChromaDB for AI chat context")
# ============= API Endpoints =============
@incidents_bp.route('/submit', methods=['POST'])
def submit_incident():
@incidents_bp .route ('/submit',methods =['POST'])
def submit_incident ():
"""
Submit a greenwashing incident report
Expects JSON with:
- product_name: Name of the product/company
- description: User's description of the misleading claim
- image: Base64 encoded image (optional, but recommended)
- report_type: 'product' or 'company'
- image: Base64 encoded image (for product reports)
- pdf_data: Base64 encoded PDF (for company reports)
"""
data = request.json
if not data:
return jsonify({"error": "No data provided"}), 400
product_name = data.get('product_name', '').strip()
user_description = data.get('description', '').strip()
image_base64 = data.get('image') # Base64 encoded image
if not product_name:
return jsonify({"error": "Product name is required"}), 400
if not user_description:
return jsonify({"error": "Description is required"}), 400
try:
# Step 1: Analyze image with Ollama (structured output)
detected_brand = "Unknown"
image_description = "No image provided"
environmental_claims = []
if image_base64:
try:
# Remove data URL prefix if present
if ',' in image_base64:
image_base64 = image_base64.split(',')[1]
image_bytes = base64.b64decode(image_base64)
# Use structured image analysis
image_analysis = analyze_image_with_ollama(image_bytes)
if image_analysis.logos_detected:
detected_brand = image_analysis.logos_detected[0].brand
image_description = image_analysis.description
environmental_claims = image_analysis.environmental_claims
except Exception as e:
print(f"Image analysis error: {e}")
# Continue without image analysis
# Step 2: Get relevant context from vector database
search_query = f"{product_name} {detected_brand} environmental claims sustainability greenwashing"
query_embedding = get_embedding(search_query)
search_results = search_documents(query_embedding, num_results=5)
context = ""
for res in search_results:
context += f"--- Document ---\n{res['text'][:500]}\n\n"
if not context:
context = "No prior information found about this company in our database."
# Add environmental claims from image to context
if environmental_claims:
context += "\n--- Claims visible in submitted image ---\n"
context += "\n".join(f"- {claim}" for claim in environmental_claims)
# Step 3: Analyze with Gemini (structured output)
analysis = analyze_with_gemini(
product_name=product_name,
user_description=user_description,
detected_brand=detected_brand,
image_description=image_description,
context=context
data =request .json
if not data :
return jsonify ({"error":"No data provided"}),400
product_name =data .get ('product_name','').strip ()
user_description =data .get ('description','').strip ()
report_type =data .get ('report_type','product')
image_base64 =data .get ('image')
if not product_name :
return jsonify ({"error":"Product name is required"}),400
if not user_description :
return jsonify ({"error":"Description is required"}),400
try :
detected_brand ="Unknown"
image_description ="No image provided"
environmental_claims =[]
compressed_image_base64 =None
if report_type =='product'and image_base64 :
try :
if ','in image_base64 :
image_base64 =image_base64 .split (',')[1 ]
image_bytes =base64 .b64decode (image_base64 )
print ("Compressing image with OpenCV...")
compressed_image_base64 =compress_image (image_bytes ,max_width =600 ,quality =75 )
image_analysis =analyze_image_with_ollama (image_bytes )
if image_analysis .logos_detected :
detected_brand =image_analysis .logos_detected [0 ].brand
image_description =image_analysis .description
environmental_claims =image_analysis .environmental_claims
except Exception as e :
print (f"Image processing error: {e }")
search_query =f"{product_name } {detected_brand } environmental claims sustainability greenwashing"
query_embedding =get_embedding (search_query )
search_results =search_documents (query_embedding ,num_results =5 )
context =""
for res in search_results :
context +=f"--- Document ---\n{res ['text'][:500 ]}\n\n"
if not context :
context ="No prior information found about this company in our database."
if environmental_claims :
context +="\n--- Claims visible in submitted image ---\n"
context +="\n".join (f"- {claim }"for claim in environmental_claims )
analysis =analyze_with_gemini (
product_name =product_name ,
user_description =user_description ,
detected_brand =detected_brand ,
image_description =image_description ,
context =context
)
# Convert Pydantic model to dict
analysis_dict = analysis.model_dump()
# Step 4: Prepare incident data
incident_data = {
"product_name": product_name,
"user_description": user_description,
"detected_brand": detected_brand,
"image_description": image_description,
"environmental_claims": environmental_claims,
"analysis": analysis_dict,
"is_greenwashing": analysis.is_greenwashing,
"created_at": datetime.utcnow().isoformat(),
"status": "confirmed" if analysis.is_greenwashing else "dismissed"
analysis_dict =analysis .model_dump ()
incident_data ={
"product_name":product_name ,
"user_description":user_description ,
"detected_brand":detected_brand ,
"image_description":image_description ,
"environmental_claims":environmental_claims ,
"analysis":analysis_dict ,
"is_greenwashing":analysis .is_greenwashing ,
"created_at":datetime .utcnow ().isoformat (),
"status":"confirmed"if analysis .is_greenwashing else "dismissed",
"report_type":report_type
}
incident_id = None
# Step 5: If greenwashing detected, save to databases
if analysis.is_greenwashing:
# Save to MongoDB
incident_id = save_to_mongodb(incident_data)
# Save to ChromaDB for chatbot context
save_to_chromadb(incident_data, incident_id)
return jsonify({
"status": "success",
"is_greenwashing": analysis.is_greenwashing,
"incident_id": incident_id,
"analysis": analysis_dict,
"detected_brand": detected_brand,
"environmental_claims": environmental_claims
if compressed_image_base64 :
incident_data ["image_base64"]=compressed_image_base64
incident_id =None
if analysis .is_greenwashing :
incident_id =save_to_mongodb (incident_data )
save_to_chromadb (incident_data ,incident_id )
return jsonify ({
"status":"success",
"is_greenwashing":analysis .is_greenwashing ,
"incident_id":incident_id ,
"analysis":analysis_dict ,
"detected_brand":detected_brand ,
"environmental_claims":environmental_claims
})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({
"status": "error",
"message": str(e)
}), 500
except Exception as e :
import traceback
traceback .print_exc ()
return jsonify ({
"status":"error",
"message":str (e )
}),500
@incidents_bp.route('/list', methods=['GET'])
def list_incidents():
@incidents_bp .route ('/list',methods =['GET'])
def list_incidents ():
"""Get all confirmed greenwashing incidents"""
try:
client = get_mongo_client()
db = client["ethix"]
collection = db["incidents"]
# Get recent incidents with full analysis details
incidents = list(collection.find(
{"is_greenwashing": True},
{"_id": 1, "product_name": 1, "detected_brand": 1,
"user_description": 1, "analysis": 1, "created_at": 1}
).sort("created_at", -1).limit(50))
# Convert ObjectId to string
for inc in incidents:
inc["_id"] = str(inc["_id"])
return jsonify(incidents)
except Exception as e:
return jsonify({"error": str(e)}), 500
try :
client =get_mongo_client ()
db =client ["ethix"]
collection =db ["incidents"]
@incidents_bp.route('/<incident_id>', methods=['GET'])
def get_incident(incident_id):
incidents =list (collection .find (
{"is_greenwashing":True },
{"_id":1 ,"product_name":1 ,"detected_brand":1 ,
"user_description":1 ,"analysis":1 ,"created_at":1 ,
"image_base64":1 ,"report_type":1 }
).sort ("created_at",-1 ).limit (50 ))
for inc in incidents :
inc ["_id"]=str (inc ["_id"])
return jsonify (incidents )
except Exception as e :
return jsonify ({"error":str (e )}),500
@incidents_bp .route ('/<incident_id>',methods =['GET'])
def get_incident (incident_id ):
"""Get a specific incident by ID"""
try:
from bson import ObjectId
client = get_mongo_client()
db = client["ethix"]
collection = db["incidents"]
incident = collection.find_one({"_id": ObjectId(incident_id)})
if not incident:
return jsonify({"error": "Incident not found"}), 404
incident["_id"] = str(incident["_id"])
return jsonify(incident)
except Exception as e:
return jsonify({"error": str(e)}), 500
try :
from bson import ObjectId
client =get_mongo_client ()
db =client ["ethix"]
collection =db ["incidents"]
incident =collection .find_one ({"_id":ObjectId (incident_id )})
if not incident :
return jsonify ({"error":"Incident not found"}),404
incident ["_id"]=str (incident ["_id"])
return jsonify (incident )
except Exception as e :
return jsonify ({"error":str (e )}),500

View File

@@ -1,7 +1,7 @@
from flask import Blueprint
from flask import Blueprint
main_bp = Blueprint('main', __name__)
main_bp =Blueprint ('main',__name__ )
@main_bp.route('/')
def index():
@main_bp .route ('/')
def index ():
return "Hello from the organized Flask App!"

View File

@@ -1,24 +1,24 @@
from flask import Blueprint, request, jsonify
from ..rag.store import vector_search, ingest_documents
from flask import Blueprint ,request ,jsonify
from ..rag .store import vector_search ,ingest_documents
rag_bp = Blueprint('rag', __name__)
rag_bp =Blueprint ('rag',__name__ )
@rag_bp.route('/ingest', methods=['POST'])
def ingest():
data = request.json
text_chunks = data.get('chunks', [])
if not text_chunks:
return jsonify({"error": "No chunks provided"}), 400
count = ingest_documents(text_chunks)
return jsonify({"message": f"Ingested {count} documents"}), 201
@rag_bp .route ('/ingest',methods =['POST'])
def ingest ():
data =request .json
text_chunks =data .get ('chunks',[])
if not text_chunks :
return jsonify ({"error":"No chunks provided"}),400
@rag_bp.route('/search', methods=['POST'])
def search():
data = request.json
query = data.get('query')
if not query:
return jsonify({"error": "No query provided"}), 400
results = vector_search(query)
return jsonify({"results": results}), 200
count =ingest_documents (text_chunks )
return jsonify ({"message":f"Ingested {count } documents"}),201
@rag_bp .route ('/search',methods =['POST'])
def search ():
data =request .json
query =data .get ('query')
if not query :
return jsonify ({"error":"No query provided"}),400
results =vector_search (query )
return jsonify ({"results":results }),200

View File

@@ -1,233 +1,233 @@
from flask import Blueprint, jsonify, request
from src.chroma.vector_store import get_all_metadatas, search_documents
from src.rag.embeddings import get_embedding
from flask import Blueprint ,jsonify ,request
from src .chroma .vector_store import get_all_metadatas ,search_documents
from src .rag .embeddings import get_embedding
reports_bp = Blueprint('reports', __name__)
reports_bp =Blueprint ('reports',__name__ )
@reports_bp.route('/', methods=['GET'])
def get_reports():
try:
# Fetch all metadatas to ensure we get diversity.
# 60k items is manageable for metadata-only fetch.
metadatas = get_all_metadatas()
unique_reports = {}
for meta in metadatas:
filename = meta.get('source') or meta.get('filename')
if not filename:
continue
# Skip incident reports - these are user-submitted greenwashing reports
if meta.get('type') == 'incident_report' or filename.startswith('incident_'):
continue
if filename not in unique_reports:
# Attempt to extract info from filename
# Common patterns:
# 2020-tesla-impact-report.pdf
# google-2023-environmental-report.pdf
# ghgp_data_2021.xlsx
company_name = "Unknown"
year = "N/A"
sector = "Other"
lower_name = filename.lower()
# Extract Year
import re
year_match = re.search(r'20\d{2}', lower_name)
if year_match:
year = year_match.group(0)
# Extract Company (heuristics)
if 'tesla' in lower_name:
company_name = "Tesla"
sector = "Automotive"
elif 'google' in lower_name:
company_name = "Google"
sector = "Tech"
elif 'apple' in lower_name:
company_name = "Apple"
sector = "Tech"
elif 'microsoft' in lower_name:
company_name = "Microsoft"
sector = "Tech"
elif 'amazon' in lower_name:
company_name = "Amazon"
sector = "Tech"
elif 'boeing' in lower_name:
company_name = "Boeing"
sector = "Aerospace"
elif 'ghgp' in lower_name:
company_name = "GHGP Data"
sector = "Data"
elif 'salesforce' in lower_name:
company_name = "Salesforce"
sector = "Tech"
elif 'hp ' in lower_name or 'hp-' in lower_name:
company_name = "HP"
sector = "Tech"
else:
# Fallback: capitalize first word of filename
parts = re.split(r'[-_.]', filename)
if parts:
company_name = parts[0].capitalize()
if company_name.isdigit(): # If starts with year
company_name = parts[1].capitalize() if len(parts) > 1 else "Unknown"
@reports_bp .route ('/',methods =['GET'])
def get_reports ():
try :
unique_reports[filename] = {
'company_name': company_name,
'year': year,
'sector': sector,
'greenwashing_score': meta.get('greenwashing_score', 0), # Likely 0
'filename': filename,
'title': f"{company_name} {year} Report"
metadatas =get_all_metadatas ()
unique_reports ={}
for meta in metadatas :
filename =meta .get ('source')or meta .get ('filename')
if not filename :
continue
if meta .get ('type')=='incident_report'or filename .startswith ('incident_'):
continue
if filename not in unique_reports :
company_name ="Unknown"
year ="N/A"
sector ="Other"
lower_name =filename .lower ()
import re
year_match =re .search (r'20\d{2}',lower_name )
if year_match :
year =year_match .group (0 )
if 'tesla'in lower_name :
company_name ="Tesla"
sector ="Automotive"
elif 'google'in lower_name :
company_name ="Google"
sector ="Tech"
elif 'apple'in lower_name :
company_name ="Apple"
sector ="Tech"
elif 'microsoft'in lower_name :
company_name ="Microsoft"
sector ="Tech"
elif 'amazon'in lower_name :
company_name ="Amazon"
sector ="Tech"
elif 'boeing'in lower_name :
company_name ="Boeing"
sector ="Aerospace"
elif 'ghgp'in lower_name :
company_name ="GHGP Data"
sector ="Data"
elif 'salesforce'in lower_name :
company_name ="Salesforce"
sector ="Tech"
elif 'hp 'in lower_name or 'hp-'in lower_name :
company_name ="HP"
sector ="Tech"
else :
parts =re .split (r'[-_.]',filename )
if parts :
company_name =parts [0 ].capitalize ()
if company_name .isdigit ():
company_name =parts [1 ].capitalize ()if len (parts )>1 else "Unknown"
unique_reports [filename ]={
'company_name':company_name ,
'year':year ,
'sector':sector ,
'greenwashing_score':meta .get ('greenwashing_score',0 ),
'filename':filename ,
'title':f"{company_name } {year } Report"
}
reports_list = list(unique_reports.values())
return jsonify(reports_list)
except Exception as e:
print(f"Error fetching reports: {e}")
import traceback
traceback.print_exc()
return jsonify({'error': str(e)}), 500
@reports_bp.route('/search', methods=['POST'])
def search_reports():
data = request.json
query = data.get('query', '')
if not query:
return jsonify([])
try:
import re
# Get embedding for the query
query_embedding = get_embedding(query)
# Search in Chroma - get more results to filter
results = search_documents(query_embedding, num_results=50)
query_lower = query.lower()
# Helper function to extract company info
def extract_company_info(filename):
company_name = "Unknown"
year = "N/A"
sector = "Other"
lower_name = filename.lower()
# Extract Year
year_match = re.search(r'20\d{2}', lower_name)
if year_match:
year = year_match.group(0)
# Extract Company (heuristics)
if 'tesla' in lower_name:
company_name = "Tesla"
sector = "Automotive"
elif 'google' in lower_name:
company_name = "Google"
sector = "Tech"
elif 'apple' in lower_name:
company_name = "Apple"
sector = "Tech"
elif 'microsoft' in lower_name:
company_name = "Microsoft"
sector = "Tech"
elif 'amazon' in lower_name:
company_name = "Amazon"
sector = "Tech"
elif 'boeing' in lower_name:
company_name = "Boeing"
sector = "Aerospace"
elif 'ghgp' in lower_name:
company_name = "GHGP Data"
sector = "Data"
elif 'salesforce' in lower_name:
company_name = "Salesforce"
sector = "Tech"
elif 'hp ' in lower_name or 'hp-' in lower_name or lower_name.startswith('hp'):
company_name = "HP"
sector = "Tech"
else:
parts = re.split(r'[-_.]', filename)
if parts:
company_name = parts[0].capitalize()
if company_name.isdigit():
company_name = parts[1].capitalize() if len(parts) > 1 else "Unknown"
return company_name, year, sector
output = []
seen_filenames = set()
for item in results:
meta = item.get('metadata', {})
text = item.get('text', '')
filename = meta.get('source') or meta.get('filename', 'Unknown')
# Skip duplicates
if filename in seen_filenames:
continue
seen_filenames.add(filename)
company_name, year, sector = extract_company_info(filename)
# Calculate match score - boost if query matches company/filename
match_boost = 0
if query_lower in filename.lower():
match_boost = 1000 # Strong boost for filename match
if query_lower in company_name.lower():
match_boost = 1000 # Strong boost for company match
# Semantic score (inverted distance, higher = better)
semantic_score = 1 / (item.get('score', 1) + 0.001) if item.get('score') else 0
combined_score = match_boost + semantic_score
# Format snippet
snippet = text[:300] + "..." if len(text) > 300 else text
output.append({
'company_name': company_name,
'year': year,
'filename': filename,
'sector': sector,
'greenwashing_score': meta.get('greenwashing_score', 0),
'snippet': snippet,
'relevance_score': item.get('score'),
'_combined_score': combined_score
reports_list =list (unique_reports .values ())
return jsonify (reports_list )
except Exception as e :
print (f"Error fetching reports: {e }")
import traceback
traceback .print_exc ()
return jsonify ({'error':str (e )}),500
@reports_bp .route ('/search',methods =['POST'])
def search_reports ():
data =request .json
query =data .get ('query','')
if not query :
return jsonify ([])
try :
import re
query_embedding =get_embedding (query )
results =search_documents (query_embedding ,num_results =50 )
query_lower =query .lower ()
def extract_company_info (filename ):
company_name ="Unknown"
year ="N/A"
sector ="Other"
lower_name =filename .lower ()
year_match =re .search (r'20\d{2}',lower_name )
if year_match :
year =year_match .group (0 )
if 'tesla'in lower_name :
company_name ="Tesla"
sector ="Automotive"
elif 'google'in lower_name :
company_name ="Google"
sector ="Tech"
elif 'apple'in lower_name :
company_name ="Apple"
sector ="Tech"
elif 'microsoft'in lower_name :
company_name ="Microsoft"
sector ="Tech"
elif 'amazon'in lower_name :
company_name ="Amazon"
sector ="Tech"
elif 'boeing'in lower_name :
company_name ="Boeing"
sector ="Aerospace"
elif 'ghgp'in lower_name :
company_name ="GHGP Data"
sector ="Data"
elif 'salesforce'in lower_name :
company_name ="Salesforce"
sector ="Tech"
elif 'hp 'in lower_name or 'hp-'in lower_name or lower_name .startswith ('hp'):
company_name ="HP"
sector ="Tech"
else :
parts =re .split (r'[-_.]',filename )
if parts :
company_name =parts [0 ].capitalize ()
if company_name .isdigit ():
company_name =parts [1 ].capitalize ()if len (parts )>1 else "Unknown"
return company_name ,year ,sector
output =[]
seen_filenames =set ()
for item in results :
meta =item .get ('metadata',{})
text =item .get ('text','')
filename =meta .get ('source')or meta .get ('filename','Unknown')
if filename in seen_filenames :
continue
seen_filenames .add (filename )
company_name ,year ,sector =extract_company_info (filename )
match_boost =0
if query_lower in filename .lower ():
match_boost =1000
if query_lower in company_name .lower ():
match_boost =1000
semantic_score =1 /(item .get ('score',1 )+0.001 )if item .get ('score')else 0
combined_score =match_boost +semantic_score
snippet =text [:300 ]+"..."if len (text )>300 else text
output .append ({
'company_name':company_name ,
'year':year ,
'filename':filename ,
'sector':sector ,
'greenwashing_score':meta .get ('greenwashing_score',0 ),
'snippet':snippet ,
'relevance_score':item .get ('score'),
'_combined_score':combined_score
})
# Sort by combined score (descending - higher is better)
output.sort(key=lambda x: x.get('_combined_score', 0), reverse=True)
# Remove internal score field and limit results
for item in output:
item.pop('_combined_score', None)
return jsonify(output[:20])
except Exception as e:
print(f"Error searching reports: {e}")
return jsonify({'error': str(e)}), 500
@reports_bp.route('/view/<path:filename>', methods=['GET'])
def view_report_file(filename):
import os
from flask import send_from_directory
# Dataset path relative to this file
# src/routes/reports.py -> src/routes -> src -> backend -> dataset
# So ../../../dataset
current_dir = os.path.dirname(os.path.abspath(__file__))
dataset_dir = os.path.join(current_dir, '..', '..', 'dataset')
return send_from_directory(dataset_dir, filename)
output .sort (key =lambda x :x .get ('_combined_score',0 ),reverse =True )
for item in output :
item .pop ('_combined_score',None )
return jsonify (output [:20 ])
except Exception as e :
print (f"Error searching reports: {e }")
return jsonify ({'error':str (e )}),500
@reports_bp .route ('/view/<path:filename>',methods =['GET'])
def view_report_file (filename ):
import os
from flask import send_from_directory
current_dir =os .path .dirname (os .path .abspath (__file__ ))
dataset_dir =os .path .join (current_dir ,'..','..','dataset')
return send_from_directory (dataset_dir ,filename )