diff --git a/backend/Dockerfile b/backend/Dockerfile index 9a5aeee..b41bc0a 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,24 +1,24 @@ -# Use a lightweight Python image + FROM python:3.9-slim -# Set working directory inside the container + WORKDIR /app -# Copy requirements first (for better caching) + COPY requirements.txt . -# Install dependencies -# 'gunicorn' must be in your requirements.txt or installed here + + RUN pip install --no-cache-dir -r requirements.txt RUN pip install gunicorn -# Copy the rest of the application + COPY . . -# Expose the internal port (Gunicorn default is 8000, or we choose one) + EXPOSE 5000 -# Command to run production server -# -w 4: 4 worker processes -# -b 0.0.0.0:5000: Bind to all interfaces inside container on port 5000 + + + CMD ["gunicorn", "--workers", "4", "--bind", "0.0.0.0:5000", "app:app"] diff --git a/backend/app.py b/backend/app.py index 963aa10..4c64478 100644 --- a/backend/app.py +++ b/backend/app.py @@ -1,7 +1,7 @@ -import os -from src import create_app +import os +from src import create_app -app = create_app() +app =create_app () -if __name__ == "__main__": - app.run(debug=True, port=5000, host="0.0.0.0") \ No newline at end of file +if __name__ =="__main__": + app .run (debug =True ,port =5000 ,host ="0.0.0.0") \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index 03a8e1d..2ece870 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,7 +1,7 @@ flask gunicorn ultralytics -opencv-python +opencv-python-headless transformers torch pandas diff --git a/backend/scripts/populate_db.py b/backend/scripts/populate_db.py index 3865dcd..73491b1 100644 --- a/backend/scripts/populate_db.py +++ b/backend/scripts/populate_db.py @@ -1,62 +1,62 @@ -import os -import sys -import argparse -from pathlib import Path +import os +import sys +import argparse +from pathlib import Path -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +sys .path .append (os .path .join (os .path .dirname (__file__ ),'..')) -from dotenv import load_dotenv -load_dotenv() +from dotenv import load_dotenv +load_dotenv () -from src.rag.ingest import process_file -from src.rag.store import ingest_documents -from src.mongo.metadata import is_file_processed, log_processed_file +from src .rag .ingest import process_file +from src .rag .store import ingest_documents +from src .mongo .metadata import is_file_processed ,log_processed_file -def populate_from_dataset(dataset_dir, category=None): - dataset_path = Path(dataset_dir) - if not dataset_path.exists(): - print(f"Dataset directory not found: {dataset_dir}") - return +def populate_from_dataset (dataset_dir ,category =None ): + dataset_path =Path (dataset_dir ) + if not dataset_path .exists (): + print (f"Dataset directory not found: {dataset_dir }") + return - print(f"Scanning {dataset_dir}...") - if category: - print(f"Category: {category}") - - total_chunks = 0 - files_processed = 0 + print (f"Scanning {dataset_dir }...") + if category : + print (f"Category: {category }") - for file_path in dataset_path.glob('*'): - if file_path.is_file() and file_path.suffix.lower() in ['.csv', '.pdf', '.txt', '.xlsx']: - if is_file_processed(file_path.name): - print(f"Skipping {file_path.name} (already processed)") - continue + total_chunks =0 + files_processed =0 - print(f"Processing {file_path.name}...") - try: - chunks = process_file(str(file_path)) - if chunks: - count = ingest_documents(chunks, source_file=file_path.name, category=category) - print(f" Ingested {count} chunks.") - if count > 0: - log_processed_file(file_path.name, category=category, chunk_count=count) - total_chunks += count - files_processed += 1 - else: - print(" No text found/extracted.") - except Exception as e: - print(f" Error processing file: {e}") + for file_path in dataset_path .glob ('*'): + if file_path .is_file ()and file_path .suffix .lower ()in ['.csv','.pdf','.txt','.xlsx']: + if is_file_processed (file_path .name ): + print (f"Skipping {file_path .name } (already processed)") + continue - print(f"\nFinished! Processed {files_processed} files. Total chunks ingested: {total_chunks}") + print (f"Processing {file_path .name }...") + try : + chunks =process_file (str (file_path )) + if chunks : + count =ingest_documents (chunks ,source_file =file_path .name ,category =category ) + print (f" Ingested {count } chunks.") + if count >0 : + log_processed_file (file_path .name ,category =category ,chunk_count =count ) + total_chunks +=count + files_processed +=1 + else : + print (" No text found/extracted.") + except Exception as e : + print (f" Error processing file: {e }") -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Populate vector database from dataset files") - parser.add_argument("--category", "-c", type=str, help="Category to assign to ingested documents") - parser.add_argument("--dir", "-d", type=str, default=None, help="Dataset directory path") - args = parser.parse_args() - - if args.dir: - dataset_dir = args.dir - else: - dataset_dir = os.path.join(os.path.dirname(__file__), '../dataset') - - populate_from_dataset(dataset_dir, category=args.category) + print (f"\nFinished! Processed {files_processed } files. Total chunks ingested: {total_chunks }") + +if __name__ =="__main__": + parser =argparse .ArgumentParser (description ="Populate vector database from dataset files") + parser .add_argument ("--category","-c",type =str ,help ="Category to assign to ingested documents") + parser .add_argument ("--dir","-d",type =str ,default =None ,help ="Dataset directory path") + args =parser .parse_args () + + if args .dir : + dataset_dir =args .dir + else : + dataset_dir =os .path .join (os .path .dirname (__file__ ),'../dataset') + + populate_from_dataset (dataset_dir ,category =args .category ) diff --git a/backend/src/__init__.py b/backend/src/__init__.py index 0eeb055..736235a 100644 --- a/backend/src/__init__.py +++ b/backend/src/__init__.py @@ -1,20 +1,20 @@ -from flask import Flask -from flask_cors import CORS -from .routes.main import main_bp -from .routes.rag import rag_bp -from .routes.gemini import gemini_bp +from flask import Flask +from flask_cors import CORS +from .routes .main import main_bp +from .routes .rag import rag_bp +from .routes .gemini import gemini_bp -def create_app(): - app = Flask(__name__) - CORS(app) - - app.register_blueprint(main_bp) - app.register_blueprint(rag_bp, url_prefix='/api/rag') - app.register_blueprint(gemini_bp, url_prefix='/api/gemini') - from .routes.reports import reports_bp - app.register_blueprint(reports_bp, url_prefix='/api/reports') - from .routes.incidents import incidents_bp - app.register_blueprint(incidents_bp, url_prefix='/api/incidents') - - return app +def create_app (): + app =Flask (__name__ ) + CORS (app ) + + app .register_blueprint (main_bp ) + app .register_blueprint (rag_bp ,url_prefix ='/api/rag') + app .register_blueprint (gemini_bp ,url_prefix ='/api/gemini') + from .routes .reports import reports_bp + app .register_blueprint (reports_bp ,url_prefix ='/api/reports') + from .routes .incidents import incidents_bp + app .register_blueprint (incidents_bp ,url_prefix ='/api/incidents') + + return app diff --git a/backend/src/chroma/vector_store.py b/backend/src/chroma/vector_store.py index 032914f..5b0f24b 100644 --- a/backend/src/chroma/vector_store.py +++ b/backend/src/chroma/vector_store.py @@ -1,80 +1,80 @@ -import chromadb +import chromadb -CHROMA_HOST = "http://chroma.sirblob.co" -COLLECTION_NAME = "rag_documents" +CHROMA_HOST ="http://chroma.sirblob.co" +COLLECTION_NAME ="rag_documents" -_client = None +_client =None -def get_chroma_client(): - global _client - if _client is None: - _client = chromadb.HttpClient(host=CHROMA_HOST) - return _client +def get_chroma_client (): + global _client + if _client is None : + _client =chromadb .HttpClient (host =CHROMA_HOST ) + return _client -def get_collection(collection_name=COLLECTION_NAME): - client = get_chroma_client() - return client.get_or_create_collection(name=collection_name) +def get_collection (collection_name =COLLECTION_NAME ): + client =get_chroma_client () + return client .get_or_create_collection (name =collection_name ) -def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None): - collection = get_collection(collection_name) - - ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)] - - if metadata_list: - collection.add( - ids=ids, - embeddings=embeddings, - documents=texts, - metadatas=metadata_list +def insert_documents (texts ,embeddings ,collection_name =COLLECTION_NAME ,metadata_list =None ): + collection =get_collection (collection_name ) + + ids =[f"doc_{i }_{hash (text )}"for i ,text in enumerate (texts )] + + if metadata_list : + collection .add ( + ids =ids , + embeddings =embeddings , + documents =texts , + metadatas =metadata_list ) - else: - collection.add( - ids=ids, - embeddings=embeddings, - documents=texts + else : + collection .add ( + ids =ids , + embeddings =embeddings , + documents =texts ) - - return len(texts) -def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None): - collection = get_collection(collection_name) - - query_params = { - "query_embeddings": [query_embedding], - "n_results": num_results + return len (texts ) + +def search_documents (query_embedding ,collection_name =COLLECTION_NAME ,num_results =5 ,filter_metadata =None ): + collection =get_collection (collection_name ) + + query_params ={ + "query_embeddings":[query_embedding ], + "n_results":num_results } - - if filter_metadata: - query_params["where"] = filter_metadata - - results = collection.query(**query_params) - - output = [] - if results and results["documents"]: - for i, doc in enumerate(results["documents"][0]): - score = results["distances"][0][i] if "distances" in results else None - meta = results["metadatas"][0][i] if "metadatas" in results else {} - output.append({ - "text": doc, - "score": score, - "metadata": meta + + if filter_metadata : + query_params ["where"]=filter_metadata + + results =collection .query (**query_params ) + + output =[] + if results and results ["documents"]: + for i ,doc in enumerate (results ["documents"][0 ]): + score =results ["distances"][0 ][i ]if "distances"in results else None + meta =results ["metadatas"][0 ][i ]if "metadatas"in results else {} + output .append ({ + "text":doc , + "score":score , + "metadata":meta }) - - return output -def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME): - collection = get_collection(collection_name) - results = collection.get(where={"source": source_file}) - if results["ids"]: - collection.delete(ids=results["ids"]) - return len(results["ids"]) - return 0 + return output -def get_all_metadatas(collection_name=COLLECTION_NAME, limit=None): - collection = get_collection(collection_name) - # Only fetch metadatas to be lightweight - if limit: - results = collection.get(include=["metadatas"], limit=limit) - else: - results = collection.get(include=["metadatas"]) - return results["metadatas"] if results and "metadatas" in results else [] +def delete_documents_by_source (source_file ,collection_name =COLLECTION_NAME ): + collection =get_collection (collection_name ) + results =collection .get (where ={"source":source_file }) + if results ["ids"]: + collection .delete (ids =results ["ids"]) + return len (results ["ids"]) + return 0 + +def get_all_metadatas (collection_name =COLLECTION_NAME ,limit =None ): + collection =get_collection (collection_name ) + + if limit : + results =collection .get (include =["metadatas"],limit =limit ) + else : + results =collection .get (include =["metadatas"]) + return results ["metadatas"]if results and "metadatas"in results else [] diff --git a/backend/src/cv/__init__.py b/backend/src/cv/__init__.py index f563a57..e5e4371 100644 --- a/backend/src/cv/__init__.py +++ b/backend/src/cv/__init__.py @@ -1,47 +1,47 @@ from .config import ( - CV_DIR, - DATA_DIR, - MODELS_DIR, - ULTRALYTICS_AVAILABLE, - YOLO26_MODELS, - SUPER_CATEGORIES, - COMMON_BRANDS, - COLORS, - DEFAULT_CONF_THRESHOLD, - DEFAULT_IOU_THRESHOLD, - DEFAULT_IMG_SIZE, +CV_DIR , +DATA_DIR , +MODELS_DIR , +ULTRALYTICS_AVAILABLE , +YOLO26_MODELS , +SUPER_CATEGORIES , +COMMON_BRANDS , +COLORS , +DEFAULT_CONF_THRESHOLD , +DEFAULT_IOU_THRESHOLD , +DEFAULT_IMG_SIZE , ) from .detectors import ( - YOLO26Detector, - HybridLogoDetector, +YOLO26Detector , +HybridLogoDetector , ) from .yolo_scanner import ( - start_scanner as start_yolo_scanner, - detect_objects as detect_yolo_objects, +start_scanner as start_yolo_scanner , +detect_objects as detect_yolo_objects , ) from .scanner import ( - start_interactive_capture as start_ollama_scanner, - capture_and_analyze as capture_ollama_once, +start_interactive_capture as start_ollama_scanner , +capture_and_analyze as capture_ollama_once , ) -__all__ = [ - "CV_DIR", - "DATA_DIR", - "MODELS_DIR", - "ULTRALYTICS_AVAILABLE", - "YOLO26_MODELS", - "SUPER_CATEGORIES", - "COMMON_BRANDS", - "COLORS", - "DEFAULT_CONF_THRESHOLD", - "DEFAULT_IOU_THRESHOLD", - "DEFAULT_IMG_SIZE", - "YOLO26Detector", - "HybridLogoDetector", - "start_yolo_scanner", - "detect_yolo_objects", - "start_ollama_scanner", - "capture_ollama_once", +__all__ =[ +"CV_DIR", +"DATA_DIR", +"MODELS_DIR", +"ULTRALYTICS_AVAILABLE", +"YOLO26_MODELS", +"SUPER_CATEGORIES", +"COMMON_BRANDS", +"COLORS", +"DEFAULT_CONF_THRESHOLD", +"DEFAULT_IOU_THRESHOLD", +"DEFAULT_IMG_SIZE", +"YOLO26Detector", +"HybridLogoDetector", +"start_yolo_scanner", +"detect_yolo_objects", +"start_ollama_scanner", +"capture_ollama_once", ] -__version__ = "2.0.0" +__version__ ="2.0.0" diff --git a/backend/src/cv/__main__.py b/backend/src/cv/__main__.py index 9ae637f..4e6e9b5 100644 --- a/backend/src/cv/__main__.py +++ b/backend/src/cv/__main__.py @@ -1,4 +1,4 @@ -from .cli import main +from .cli import main -if __name__ == "__main__": - main() +if __name__ =="__main__": + main () diff --git a/backend/src/cv/api_chain.py b/backend/src/cv/api_chain.py index 4719d64..e48d916 100644 --- a/backend/src/cv/api_chain.py +++ b/backend/src/cv/api_chain.py @@ -1,2 +1,2 @@ -print('API chain') +print ('API chain') diff --git a/backend/src/cv/cli.py b/backend/src/cv/cli.py index e2c7ec1..04e4621 100644 --- a/backend/src/cv/cli.py +++ b/backend/src/cv/cli.py @@ -1,47 +1,47 @@ -#!/usr/bin/env python3 -import argparse -from .config import YOLO26_MODELS -from .yolo_scanner import start_scanner as start_yolo, detect_objects -from .scanner import start_interactive_capture as start_ollama +import argparse -def main(): - parser = argparse.ArgumentParser( - description="Ollama and YOLO Logo Detection Scanner" +from .config import YOLO26_MODELS +from .yolo_scanner import start_scanner as start_yolo ,detect_objects +from .scanner import start_interactive_capture as start_ollama + +def main (): + parser =argparse .ArgumentParser ( + description ="Ollama and YOLO Logo Detection Scanner" ) - - parser.add_argument("--model", "-m", type=str) - parser.add_argument("--size", "-s", type=str, default="nano", - choices=["nano", "small", "medium", "large", "xlarge"]) - parser.add_argument("--logo-model", type=str) - parser.add_argument("--yolo", action="store_true") - parser.add_argument("--no-gui", action="store_true") - parser.add_argument("--track", "-t", action="store_true") - parser.add_argument("--hybrid", action="store_true") - parser.add_argument("--image", "-i", type=str) - - args = parser.parse_args() - - if args.image: - detections = detect_objects( - args.image, model_size=args.size, hybrid_mode=args.hybrid - ) - print(f"Found {len(detections)} detections:") - for det in detections: - print(f" {det['label']}: {det['confidence']:.2%}") - - elif args.yolo: - start_yolo( - model_path=args.model, - model_size=args.size, - logo_model_path=args.logo_model, - use_gui=not args.no_gui, - use_tracking=args.track, - hybrid_mode=args.hybrid - ) - - else: - start_ollama() -if __name__ == "__main__": - main() + parser .add_argument ("--model","-m",type =str ) + parser .add_argument ("--size","-s",type =str ,default ="nano", + choices =["nano","small","medium","large","xlarge"]) + parser .add_argument ("--logo-model",type =str ) + parser .add_argument ("--yolo",action ="store_true") + parser .add_argument ("--no-gui",action ="store_true") + parser .add_argument ("--track","-t",action ="store_true") + parser .add_argument ("--hybrid",action ="store_true") + parser .add_argument ("--image","-i",type =str ) + + args =parser .parse_args () + + if args .image : + detections =detect_objects ( + args .image ,model_size =args .size ,hybrid_mode =args .hybrid + ) + print (f"Found {len (detections )} detections:") + for det in detections : + print (f" {det ['label']}: {det ['confidence']:.2%}") + + elif args .yolo : + start_yolo ( + model_path =args .model , + model_size =args .size , + logo_model_path =args .logo_model , + use_gui =not args .no_gui , + use_tracking =args .track , + hybrid_mode =args .hybrid + ) + + else : + start_ollama () + +if __name__ =="__main__": + main () diff --git a/backend/src/cv/config.py b/backend/src/cv/config.py index e167391..5df0411 100644 --- a/backend/src/cv/config.py +++ b/backend/src/cv/config.py @@ -1,61 +1,61 @@ -import os -from pathlib import Path -from typing import Dict +import os +from pathlib import Path +from typing import Dict -CV_DIR = Path(__file__).parent -DATA_DIR = CV_DIR / "data" -MODELS_DIR = CV_DIR / "models" +CV_DIR =Path (__file__ ).parent +DATA_DIR =CV_DIR /"data" +MODELS_DIR =CV_DIR /"models" -DATA_DIR.mkdir(parents=True, exist_ok=True) -MODELS_DIR.mkdir(parents=True, exist_ok=True) +DATA_DIR .mkdir (parents =True ,exist_ok =True ) +MODELS_DIR .mkdir (parents =True ,exist_ok =True ) -try: - from ultralytics import YOLO - ULTRALYTICS_AVAILABLE = True -except ImportError: - ULTRALYTICS_AVAILABLE = False - YOLO = None +try : + from ultralytics import YOLO + ULTRALYTICS_AVAILABLE =True +except ImportError : + ULTRALYTICS_AVAILABLE =False + YOLO =None -YOLO26_MODELS: Dict[str, str] = { - "nano": "yolo26n.pt", - "small": "yolo26s.pt", - "medium": "yolo26m.pt", - "large": "yolo26l.pt", - "xlarge": "yolo26x.pt", +YOLO26_MODELS :Dict [str ,str ]={ +"nano":"yolo26n.pt", +"small":"yolo26s.pt", +"medium":"yolo26m.pt", +"large":"yolo26l.pt", +"xlarge":"yolo26x.pt", } -SUPER_CATEGORIES: Dict[str, int] = { - "Food": 932, - "Clothes": 604, - "Necessities": 432, - "Others": 371, - "Electronic": 224, - "Transportation": 213, - "Leisure": 111, - "Sports": 66, - "Medical": 47 +SUPER_CATEGORIES :Dict [str ,int ]={ +"Food":932 , +"Clothes":604 , +"Necessities":432 , +"Others":371 , +"Electronic":224 , +"Transportation":213 , +"Leisure":111 , +"Sports":66 , +"Medical":47 } -COMMON_BRANDS = [ - "McDonalds", "Starbucks", "CocaCola", "Pepsi", "KFC", "BurgerKing", - "Subway", "DunkinDonuts", "PizzaHut", "Dominos", "Nestle", "Heineken", - "Nike", "Adidas", "Puma", "UnderArmour", "Levis", "HM", "Zara", "Gap", - "Gucci", "LouisVuitton", "Chanel", "Versace", "Prada", "Armani", - "Apple", "Samsung", "HP", "Dell", "Intel", "AMD", "Nvidia", "Microsoft", - "Sony", "LG", "Huawei", "Xiaomi", "Lenovo", "Asus", "Acer", - "BMW", "Mercedes", "Audi", "Toyota", "Honda", "Ford", "Chevrolet", - "Volkswagen", "Tesla", "Porsche", "Ferrari", "Lamborghini", "Nissan", - "Google", "Facebook", "Twitter", "Instagram", "YouTube", "Amazon", - "Netflix", "Spotify", "Uber", "Airbnb", "PayPal", "Visa", "Mastercard" +COMMON_BRANDS =[ +"McDonalds","Starbucks","CocaCola","Pepsi","KFC","BurgerKing", +"Subway","DunkinDonuts","PizzaHut","Dominos","Nestle","Heineken", +"Nike","Adidas","Puma","UnderArmour","Levis","HM","Zara","Gap", +"Gucci","LouisVuitton","Chanel","Versace","Prada","Armani", +"Apple","Samsung","HP","Dell","Intel","AMD","Nvidia","Microsoft", +"Sony","LG","Huawei","Xiaomi","Lenovo","Asus","Acer", +"BMW","Mercedes","Audi","Toyota","Honda","Ford","Chevrolet", +"Volkswagen","Tesla","Porsche","Ferrari","Lamborghini","Nissan", +"Google","Facebook","Twitter","Instagram","YouTube","Amazon", +"Netflix","Spotify","Uber","Airbnb","PayPal","Visa","Mastercard" ] -COLORS = { - "high_conf": (0, 255, 0), - "medium_conf": (0, 255, 255), - "low_conf": (0, 165, 255), - "logo": (255, 0, 255), +COLORS ={ +"high_conf":(0 ,255 ,0 ), +"medium_conf":(0 ,255 ,255 ), +"low_conf":(0 ,165 ,255 ), +"logo":(255 ,0 ,255 ), } -DEFAULT_CONF_THRESHOLD = 0.25 -DEFAULT_IOU_THRESHOLD = 0.45 -DEFAULT_IMG_SIZE = 640 +DEFAULT_CONF_THRESHOLD =0.25 +DEFAULT_IOU_THRESHOLD =0.45 +DEFAULT_IMG_SIZE =640 diff --git a/backend/src/cv/detectors/__init__.py b/backend/src/cv/detectors/__init__.py index 6681f8f..f497120 100644 --- a/backend/src/cv/detectors/__init__.py +++ b/backend/src/cv/detectors/__init__.py @@ -1,7 +1,7 @@ -from .yolo26 import YOLO26Detector -from .hybrid import HybridLogoDetector +from .yolo26 import YOLO26Detector +from .hybrid import HybridLogoDetector -__all__ = [ - "YOLO26Detector", - "HybridLogoDetector", +__all__ =[ +"YOLO26Detector", +"HybridLogoDetector", ] diff --git a/backend/src/cv/detectors/hybrid.py b/backend/src/cv/detectors/hybrid.py index e4beed0..676a3c7 100644 --- a/backend/src/cv/detectors/hybrid.py +++ b/backend/src/cv/detectors/hybrid.py @@ -1,154 +1,154 @@ -import cv2 -import numpy as np -import os -from typing import List, Dict, Optional +import cv2 +import numpy as np +import os +from typing import List ,Dict ,Optional from ..config import ( - ULTRALYTICS_AVAILABLE, - MODELS_DIR, - COLORS, - DEFAULT_CONF_THRESHOLD, +ULTRALYTICS_AVAILABLE , +MODELS_DIR , +COLORS , +DEFAULT_CONF_THRESHOLD , ) -from .yolo26 import YOLO26Detector +from .yolo26 import YOLO26Detector -if ULTRALYTICS_AVAILABLE: - from ultralytics import YOLO +if ULTRALYTICS_AVAILABLE : + from ultralytics import YOLO -class HybridLogoDetector: - def __init__(self, - coco_model_size: str = "nano", - logo_model_path: Optional[str] = None, - conf_threshold: float = DEFAULT_CONF_THRESHOLD, - device: str = "auto"): - self.conf_threshold = conf_threshold - self.device = device - self.coco_detector = None - self.logo_model = None - - if not ULTRALYTICS_AVAILABLE: - raise RuntimeError("Ultralytics not installed. Run: pip install ultralytics") - - print("Loading YOLO26 COCO base model...") - self.coco_detector = YOLO26Detector( - model_size=coco_model_size, - conf_threshold=conf_threshold, - device=device +class HybridLogoDetector : + def __init__ (self , + coco_model_size :str ="nano", + logo_model_path :Optional [str ]=None , + conf_threshold :float =DEFAULT_CONF_THRESHOLD , + device :str ="auto"): + self .conf_threshold =conf_threshold + self .device =device + self .coco_detector =None + self .logo_model =None + + if not ULTRALYTICS_AVAILABLE : + raise RuntimeError ("Ultralytics not installed. Run: pip install ultralytics") + + print ("Loading YOLO26 COCO base model...") + self .coco_detector =YOLO26Detector ( + model_size =coco_model_size , + conf_threshold =conf_threshold , + device =device ) - - if logo_model_path and os.path.exists(logo_model_path): - print(f"Loading logo model: {logo_model_path}") - self.logo_model = YOLO(logo_model_path) - print("Logo model loaded!") - else: - default_logo_model = MODELS_DIR / "logo_detector.pt" - if default_logo_model.exists(): - print(f"Loading default logo model: {default_logo_model}") - self.logo_model = YOLO(str(default_logo_model)) - print("Logo model loaded!") - else: - print("No logo model found.") - - print("Hybrid detector ready!") - - def detect(self, - frame: np.ndarray, - detect_objects: bool = True, - detect_logos: bool = True, - conf_threshold: Optional[float] = None) -> List[Dict]: - conf = conf_threshold if conf_threshold is not None else self.conf_threshold - all_detections = [] - - if detect_objects and self.coco_detector: - object_detections = self.coco_detector.detect(frame, conf_threshold=conf) - for det in object_detections: - det["type"] = "object" - all_detections.extend(object_detections) - - if detect_logos and self.logo_model: - logo_detections = self._detect_logos(frame, conf) - for det in logo_detections: - det["type"] = "logo" - all_detections.extend(logo_detections) - - return all_detections - - def _detect_logos(self, frame: np.ndarray, conf_threshold: float) -> List[Dict]: - if self.logo_model is None: + + if logo_model_path and os .path .exists (logo_model_path ): + print (f"Loading logo model: {logo_model_path }") + self .logo_model =YOLO (logo_model_path ) + print ("Logo model loaded!") + else : + default_logo_model =MODELS_DIR /"logo_detector.pt" + if default_logo_model .exists (): + print (f"Loading default logo model: {default_logo_model }") + self .logo_model =YOLO (str (default_logo_model )) + print ("Logo model loaded!") + else : + print ("No logo model found.") + + print ("Hybrid detector ready!") + + def detect (self , + frame :np .ndarray , + detect_objects :bool =True , + detect_logos :bool =True , + conf_threshold :Optional [float ]=None )->List [Dict ]: + conf =conf_threshold if conf_threshold is not None else self .conf_threshold + all_detections =[] + + if detect_objects and self .coco_detector : + object_detections =self .coco_detector .detect (frame ,conf_threshold =conf ) + for det in object_detections : + det ["type"]="object" + all_detections .extend (object_detections ) + + if detect_logos and self .logo_model : + logo_detections =self ._detect_logos (frame ,conf ) + for det in logo_detections : + det ["type"]="logo" + all_detections .extend (logo_detections ) + + return all_detections + + def _detect_logos (self ,frame :np .ndarray ,conf_threshold :float )->List [Dict ]: + if self .logo_model is None : return [] - - results = self.logo_model( - frame, - conf=conf_threshold, - device=self.device if self.device != "auto" else None, - verbose=False + + results =self .logo_model ( + frame , + conf =conf_threshold , + device =self .device if self .device !="auto"else None , + verbose =False ) - - detections = [] - for result in results: - boxes = result.boxes - if boxes is None: - continue - - for i in range(len(boxes)): - xyxy = boxes.xyxy[i].cpu().numpy() - x1, y1, x2, y2 = map(int, xyxy) - conf_val = float(boxes.conf[i].cpu().numpy()) - class_id = int(boxes.cls[i].cpu().numpy()) - label = self.logo_model.names[class_id] - - detections.append({ - "bbox": (x1, y1, x2, y2), - "label": label, - "confidence": conf_val, - "class_id": class_id, - "brand": label + + detections =[] + for result in results : + boxes =result .boxes + if boxes is None : + continue + + for i in range (len (boxes )): + xyxy =boxes .xyxy [i ].cpu ().numpy () + x1 ,y1 ,x2 ,y2 =map (int ,xyxy ) + conf_val =float (boxes .conf [i ].cpu ().numpy ()) + class_id =int (boxes .cls [i ].cpu ().numpy ()) + label =self .logo_model .names [class_id ] + + detections .append ({ + "bbox":(x1 ,y1 ,x2 ,y2 ), + "label":label , + "confidence":conf_val , + "class_id":class_id , + "brand":label }) - - return detections - - def draw_detections(self, - frame: np.ndarray, - detections: List[Dict], - show_labels: bool = True) -> np.ndarray: - result = frame.copy() - - for det in detections: - x1, y1, x2, y2 = det["bbox"] - label = det["label"] - conf = det["confidence"] - det_type = det.get("type", "object") - - if det_type == "logo": - color = COLORS["logo"] - elif conf > 0.7: - color = COLORS["high_conf"] - elif conf > 0.5: - color = COLORS["medium_conf"] - else: - color = COLORS["low_conf"] - - cv2.rectangle(result, (x1, y1), (x2, y2), color, 2) - - if show_labels: - label_text = f"{label}: {conf:.2f}" - (text_w, text_h), _ = cv2.getTextSize( - label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1 + + return detections + + def draw_detections (self , + frame :np .ndarray , + detections :List [Dict ], + show_labels :bool =True )->np .ndarray : + result =frame .copy () + + for det in detections : + x1 ,y1 ,x2 ,y2 =det ["bbox"] + label =det ["label"] + conf =det ["confidence"] + det_type =det .get ("type","object") + + if det_type =="logo": + color =COLORS ["logo"] + elif conf >0.7 : + color =COLORS ["high_conf"] + elif conf >0.5 : + color =COLORS ["medium_conf"] + else : + color =COLORS ["low_conf"] + + cv2 .rectangle (result ,(x1 ,y1 ),(x2 ,y2 ),color ,2 ) + + if show_labels : + label_text =f"{label }: {conf :.2f}" + (text_w ,text_h ),_ =cv2 .getTextSize ( + label_text ,cv2 .FONT_HERSHEY_SIMPLEX ,0.5 ,1 ) - cv2.rectangle( - result, - (x1, y1 - text_h - 8), - (x1 + text_w + 4, y1), - color, - -1 + cv2 .rectangle ( + result , + (x1 ,y1 -text_h -8 ), + (x1 +text_w +4 ,y1 ), + color , + -1 ) - cv2.putText( - result, - label_text, - (x1 + 2, y1 - 4), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, - (255, 255, 255) if det_type == "logo" else (0, 0, 0), - 1 + cv2 .putText ( + result , + label_text , + (x1 +2 ,y1 -4 ), + cv2 .FONT_HERSHEY_SIMPLEX , + 0.5 , + (255 ,255 ,255 )if det_type =="logo"else (0 ,0 ,0 ), + 1 ) - - return result + + return result diff --git a/backend/src/cv/detectors/yolo26.py b/backend/src/cv/detectors/yolo26.py index 3d57700..372d992 100644 --- a/backend/src/cv/detectors/yolo26.py +++ b/backend/src/cv/detectors/yolo26.py @@ -1,186 +1,186 @@ -import cv2 -import numpy as np -import os -from typing import List, Dict, Optional +import cv2 +import numpy as np +import os +from typing import List ,Dict ,Optional from ..config import ( - ULTRALYTICS_AVAILABLE, - YOLO26_MODELS, - COLORS, - DEFAULT_CONF_THRESHOLD, - DEFAULT_IOU_THRESHOLD, +ULTRALYTICS_AVAILABLE , +YOLO26_MODELS , +COLORS , +DEFAULT_CONF_THRESHOLD , +DEFAULT_IOU_THRESHOLD , ) -if ULTRALYTICS_AVAILABLE: - from ultralytics import YOLO +if ULTRALYTICS_AVAILABLE : + from ultralytics import YOLO -class YOLO26Detector: - def __init__(self, - model_size: str = "nano", - model_path: Optional[str] = None, - conf_threshold: float = DEFAULT_CONF_THRESHOLD, - iou_threshold: float = DEFAULT_IOU_THRESHOLD, - device: str = "auto"): - self.conf_threshold = conf_threshold - self.iou_threshold = iou_threshold - self.device = device - self.model = None - - if not ULTRALYTICS_AVAILABLE: - raise RuntimeError("Ultralytics not installed. Run: pip install ultralytics") - - if model_path and os.path.exists(model_path): - model_name = model_path - elif model_size in YOLO26_MODELS: - model_name = YOLO26_MODELS[model_size] - else: - print(f"Unknown model size '{model_size}', defaulting to 'nano'") - model_name = YOLO26_MODELS["nano"] - - print(f"Loading YOLO26 model: {model_name}") - self.model = YOLO(model_name) - print(f"YOLO26 model loaded successfully!") - print(f"Classes: {len(self.model.names)} | Device: {device}") - - def detect(self, - frame: np.ndarray, - conf_threshold: Optional[float] = None, - classes: Optional[List[int]] = None) -> List[Dict]: - if self.model is None: +class YOLO26Detector : + def __init__ (self , + model_size :str ="nano", + model_path :Optional [str ]=None , + conf_threshold :float =DEFAULT_CONF_THRESHOLD , + iou_threshold :float =DEFAULT_IOU_THRESHOLD , + device :str ="auto"): + self .conf_threshold =conf_threshold + self .iou_threshold =iou_threshold + self .device =device + self .model =None + + if not ULTRALYTICS_AVAILABLE : + raise RuntimeError ("Ultralytics not installed. Run: pip install ultralytics") + + if model_path and os .path .exists (model_path ): + model_name =model_path + elif model_size in YOLO26_MODELS : + model_name =YOLO26_MODELS [model_size ] + else : + print (f"Unknown model size '{model_size }', defaulting to 'nano'") + model_name =YOLO26_MODELS ["nano"] + + print (f"Loading YOLO26 model: {model_name }") + self .model =YOLO (model_name ) + print (f"YOLO26 model loaded successfully!") + print (f"Classes: {len (self .model .names )} | Device: {device }") + + def detect (self , + frame :np .ndarray , + conf_threshold :Optional [float ]=None , + classes :Optional [List [int ]]=None )->List [Dict ]: + if self .model is None : return [] - - conf = conf_threshold if conf_threshold is not None else self.conf_threshold - - results = self.model( - frame, - conf=conf, - iou=self.iou_threshold, - device=self.device if self.device != "auto" else None, - classes=classes, - verbose=False + + conf =conf_threshold if conf_threshold is not None else self .conf_threshold + + results =self .model ( + frame , + conf =conf , + iou =self .iou_threshold , + device =self .device if self .device !="auto"else None , + classes =classes , + verbose =False ) - - detections = [] - for result in results: - boxes = result.boxes - if boxes is None: - continue - - for i in range(len(boxes)): - xyxy = boxes.xyxy[i].cpu().numpy() - x1, y1, x2, y2 = map(int, xyxy) - - conf_val = float(boxes.conf[i].cpu().numpy()) - class_id = int(boxes.cls[i].cpu().numpy()) - label = self.model.names[class_id] - - detections.append({ - "bbox": (x1, y1, x2, y2), - "label": label, - "confidence": conf_val, - "class_id": class_id + + detections =[] + for result in results : + boxes =result .boxes + if boxes is None : + continue + + for i in range (len (boxes )): + xyxy =boxes .xyxy [i ].cpu ().numpy () + x1 ,y1 ,x2 ,y2 =map (int ,xyxy ) + + conf_val =float (boxes .conf [i ].cpu ().numpy ()) + class_id =int (boxes .cls [i ].cpu ().numpy ()) + label =self .model .names [class_id ] + + detections .append ({ + "bbox":(x1 ,y1 ,x2 ,y2 ), + "label":label , + "confidence":conf_val , + "class_id":class_id }) - - return detections - - def detect_and_track(self, - frame: np.ndarray, - conf_threshold: Optional[float] = None, - tracker: str = "bytetrack.yaml") -> List[Dict]: - if self.model is None: + + return detections + + def detect_and_track (self , + frame :np .ndarray , + conf_threshold :Optional [float ]=None , + tracker :str ="bytetrack.yaml")->List [Dict ]: + if self .model is None : return [] - - conf = conf_threshold if conf_threshold is not None else self.conf_threshold - - results = self.model.track( - frame, - conf=conf, - iou=self.iou_threshold, - device=self.device if self.device != "auto" else None, - tracker=tracker, - persist=True, - verbose=False + + conf =conf_threshold if conf_threshold is not None else self .conf_threshold + + results =self .model .track ( + frame , + conf =conf , + iou =self .iou_threshold , + device =self .device if self .device !="auto"else None , + tracker =tracker , + persist =True , + verbose =False ) - - detections = [] - for result in results: - boxes = result.boxes - if boxes is None: - continue - - for i in range(len(boxes)): - xyxy = boxes.xyxy[i].cpu().numpy() - x1, y1, x2, y2 = map(int, xyxy) - - conf_val = float(boxes.conf[i].cpu().numpy()) - class_id = int(boxes.cls[i].cpu().numpy()) - label = self.model.names[class_id] - - track_id = None - if boxes.id is not None: - track_id = int(boxes.id[i].cpu().numpy()) - - detections.append({ - "bbox": (x1, y1, x2, y2), - "label": label, - "confidence": conf_val, - "class_id": class_id, - "track_id": track_id + + detections =[] + for result in results : + boxes =result .boxes + if boxes is None : + continue + + for i in range (len (boxes )): + xyxy =boxes .xyxy [i ].cpu ().numpy () + x1 ,y1 ,x2 ,y2 =map (int ,xyxy ) + + conf_val =float (boxes .conf [i ].cpu ().numpy ()) + class_id =int (boxes .cls [i ].cpu ().numpy ()) + label =self .model .names [class_id ] + + track_id =None + if boxes .id is not None : + track_id =int (boxes .id [i ].cpu ().numpy ()) + + detections .append ({ + "bbox":(x1 ,y1 ,x2 ,y2 ), + "label":label , + "confidence":conf_val , + "class_id":class_id , + "track_id":track_id }) - - return detections - - def draw_detections(self, - frame: np.ndarray, - detections: List[Dict], - show_labels: bool = True, - show_conf: bool = True) -> np.ndarray: - result = frame.copy() - - for det in detections: - x1, y1, x2, y2 = det["bbox"] - label = det["label"] - conf = det["confidence"] - track_id = det.get("track_id") - - if conf > 0.7: - color = COLORS["high_conf"] - elif conf > 0.5: - color = COLORS["medium_conf"] - else: - color = COLORS["low_conf"] - - cv2.rectangle(result, (x1, y1), (x2, y2), color, 2) - - if show_labels: - label_parts = [label] - if track_id is not None: - label_parts.append(f"ID:{track_id}") - if show_conf: - label_parts.append(f"{conf:.2f}") - label_text = " | ".join(label_parts) - - (text_w, text_h), baseline = cv2.getTextSize( - label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1 + + return detections + + def draw_detections (self , + frame :np .ndarray , + detections :List [Dict ], + show_labels :bool =True , + show_conf :bool =True )->np .ndarray : + result =frame .copy () + + for det in detections : + x1 ,y1 ,x2 ,y2 =det ["bbox"] + label =det ["label"] + conf =det ["confidence"] + track_id =det .get ("track_id") + + if conf >0.7 : + color =COLORS ["high_conf"] + elif conf >0.5 : + color =COLORS ["medium_conf"] + else : + color =COLORS ["low_conf"] + + cv2 .rectangle (result ,(x1 ,y1 ),(x2 ,y2 ),color ,2 ) + + if show_labels : + label_parts =[label ] + if track_id is not None : + label_parts .append (f"ID:{track_id }") + if show_conf : + label_parts .append (f"{conf :.2f}") + label_text =" | ".join (label_parts ) + + (text_w ,text_h ),baseline =cv2 .getTextSize ( + label_text ,cv2 .FONT_HERSHEY_SIMPLEX ,0.5 ,1 ) - cv2.rectangle( - result, - (x1, y1 - text_h - 8), - (x1 + text_w + 4, y1), - color, - -1 + cv2 .rectangle ( + result , + (x1 ,y1 -text_h -8 ), + (x1 +text_w +4 ,y1 ), + color , + -1 ) - cv2.putText( - result, - label_text, - (x1 + 2, y1 - 4), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, - (0, 0, 0), - 1 + cv2 .putText ( + result , + label_text , + (x1 +2 ,y1 -4 ), + cv2 .FONT_HERSHEY_SIMPLEX , + 0.5 , + (0 ,0 ,0 ), + 1 ) - - return result - - def get_class_names(self) -> Dict[int, str]: - return self.model.names if self.model else {} + + return result + + def get_class_names (self )->Dict [int ,str ]: + return self .model .names if self .model else {} diff --git a/backend/src/cv/scanner.py b/backend/src/cv/scanner.py index 7078da8..1a885be 100644 --- a/backend/src/cv/scanner.py +++ b/backend/src/cv/scanner.py @@ -1,197 +1,197 @@ -import cv2 -import json -import numpy as np -from datetime import datetime -from pathlib import Path -from typing import Dict, Optional -from ..ollama.detector import OllamaLogoDetector +import cv2 +import json +import numpy as np +from datetime import datetime +from pathlib import Path +from typing import Dict ,Optional +from ..ollama .detector import OllamaLogoDetector -def capture_and_analyze(model: str = "ministral-3:latest", - save_image: bool = True, - output_dir: Optional[str] = None) -> Dict: - cap = cv2.VideoCapture(0) - if not cap.isOpened(): - raise RuntimeError("Could not access camera") - - print("Camera ready. Press SPACE to capture, Q to quit.") - - result = None - - while True: - ret, frame = cap.read() - if not ret: - break - - display = frame.copy() - cv2.putText(display, "Press SPACE to capture | Q to quit", - (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) - cv2.imshow("Capture", display) - - key = cv2.waitKey(1) & 0xFF - - if key == ord(' '): - print("Analyzing image...") - - if save_image: - if output_dir is None: - output_dir = "./captures" - Path(output_dir).mkdir(parents=True, exist_ok=True) - - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - img_path = Path(output_dir) / f"capture_{timestamp}.jpg" - cv2.imwrite(str(img_path), frame) - print(f"Saved: {img_path}") - - detector = OllamaLogoDetector(model=model) - result = detector.detect_from_numpy(frame) - - _display_results(result) - break - - elif key == ord('q'): - break - - cap.release() - cv2.destroyAllWindows() - - return result if result else {"logos_detected": [], "total_count": 0} +def capture_and_analyze (model :str ="ministral-3:latest", +save_image :bool =True , +output_dir :Optional [str ]=None )->Dict : + cap =cv2 .VideoCapture (0 ) + if not cap .isOpened (): + raise RuntimeError ("Could not access camera") -def start_interactive_capture(model: str = "ministral-3:latest", - save_images: bool = True, - output_dir: Optional[str] = None): - cap = cv2.VideoCapture(0) - if not cap.isOpened(): - raise RuntimeError("Could not access camera") - - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - - print("=" * 60) - print("Ollama Logo Detection - Interactive Mode") - print("=" * 60) - print(f"Camera: {width}x{height}") - print(f"Model: {model}") - print("\nControls:") - print(" SPACE - Capture and analyze") - print(" S - Save frame only") - print(" R - Show last results") - print(" Q - Quit") - print("=" * 60) - - detector = OllamaLogoDetector(model=model) - last_result = None - analyzing = False - status_message = "Ready - Press SPACE to capture" - - if output_dir is None: - output_dir = "./captures" - Path(output_dir).mkdir(parents=True, exist_ok=True) - - while True: - ret, frame = cap.read() - if not ret: - break - - display = frame.copy() - - cv2.rectangle(display, (0, 0), (width, 40), (40, 40, 40), -1) - cv2.putText(display, status_message, (10, 28), - cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) - - if last_result and last_result.get("logos_detected"): - brands = [l.get("brand", "?") for l in last_result["logos_detected"]] - brand_text = f"Detected: {', '.join(brands[:3])}" - if len(brands) > 3: - brand_text += f" +{len(brands)-3} more" - cv2.rectangle(display, (0, height-35), (width, height), (40, 40, 40), -1) - cv2.putText(display, brand_text, (10, height-10), - cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2) - - cv2.imshow("Ollama Logo Detection", display) - - key = cv2.waitKey(1) & 0xFF - - if key == ord(' ') and not analyzing: - analyzing = True - status_message = "Analyzing with Ollama..." - cv2.imshow("Ollama Logo Detection", display) - cv2.waitKey(1) - - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - img_path = Path(output_dir) / f"capture_{timestamp}.jpg" - - if save_images: - cv2.imwrite(str(img_path), frame) - - last_result = detector.detect_from_numpy(frame) - - json_path = Path(output_dir) / f"result_{timestamp}.json" - with open(json_path, 'w') as f: - json.dump(last_result, f, indent=2) - - count = last_result.get("total_count", 0) - if count > 0: - status_message = f"Found {count} logo(s)! Press R for details" - else: - status_message = "No logos detected. Try again!" - - print(f"\nCaptured: {img_path}") - print(f"Results: {json_path}") - _display_results(last_result) - - analyzing = False - - elif key == ord('s'): - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - img_path = Path(output_dir) / f"capture_{timestamp}.jpg" - cv2.imwrite(str(img_path), frame) - status_message = f"Saved: {img_path.name}" - print(f"Saved: {img_path}") - - elif key == ord('r') and last_result: - print("\n" + "=" * 40) - print("Last Detection Results:") - print("=" * 40) - _display_results(last_result) - - elif key == ord('q'): - break - - cap.release() - cv2.destroyAllWindows() - - print("\nGoodbye!") - return last_result + print ("Camera ready. Press SPACE to capture, Q to quit.") -def _display_results(result: Dict): - print("\n" + "-" * 40) - - logos = result.get("logos_detected", []) - count = result.get("total_count", len(logos)) - - if count == 0: - print("No logos or brands detected") - if "description" in result: - print(f"Description: {result['description']}") - else: - print(f"Detected {count} logo(s)/brand(s):\n") - for i, logo in enumerate(logos, 1): - brand = logo.get("brand", "Unknown") - conf = logo.get("confidence", "unknown") - loc = logo.get("location", "unknown") - cat = logo.get("category", "") - - print(f" {i}. {brand}") - print(f" Confidence: {conf}") - print(f" Location: {loc}") - if cat: - print(f" Category: {cat}") - print() - - if "error" in result: - print(f"Error: {result['error']}") - - print("-" * 40) - - print("\nJSON Output:") - print(json.dumps(result, indent=2)) + result =None + + while True : + ret ,frame =cap .read () + if not ret : + break + + display =frame .copy () + cv2 .putText (display ,"Press SPACE to capture | Q to quit", + (10 ,30 ),cv2 .FONT_HERSHEY_SIMPLEX ,0.7 ,(0 ,255 ,0 ),2 ) + cv2 .imshow ("Capture",display ) + + key =cv2 .waitKey (1 )&0xFF + + if key ==ord (' '): + print ("Analyzing image...") + + if save_image : + if output_dir is None : + output_dir ="./captures" + Path (output_dir ).mkdir (parents =True ,exist_ok =True ) + + timestamp =datetime .now ().strftime ("%Y%m%d_%H%M%S") + img_path =Path (output_dir )/f"capture_{timestamp }.jpg" + cv2 .imwrite (str (img_path ),frame ) + print (f"Saved: {img_path }") + + detector =OllamaLogoDetector (model =model ) + result =detector .detect_from_numpy (frame ) + + _display_results (result ) + break + + elif key ==ord ('q'): + break + + cap .release () + cv2 .destroyAllWindows () + + return result if result else {"logos_detected":[],"total_count":0 } + +def start_interactive_capture (model :str ="ministral-3:latest", +save_images :bool =True , +output_dir :Optional [str ]=None ): + cap =cv2 .VideoCapture (0 ) + if not cap .isOpened (): + raise RuntimeError ("Could not access camera") + + width =int (cap .get (cv2 .CAP_PROP_FRAME_WIDTH )) + height =int (cap .get (cv2 .CAP_PROP_FRAME_HEIGHT )) + + print ("="*60 ) + print ("Ollama Logo Detection - Interactive Mode") + print ("="*60 ) + print (f"Camera: {width }x{height }") + print (f"Model: {model }") + print ("\nControls:") + print (" SPACE - Capture and analyze") + print (" S - Save frame only") + print (" R - Show last results") + print (" Q - Quit") + print ("="*60 ) + + detector =OllamaLogoDetector (model =model ) + last_result =None + analyzing =False + status_message ="Ready - Press SPACE to capture" + + if output_dir is None : + output_dir ="./captures" + Path (output_dir ).mkdir (parents =True ,exist_ok =True ) + + while True : + ret ,frame =cap .read () + if not ret : + break + + display =frame .copy () + + cv2 .rectangle (display ,(0 ,0 ),(width ,40 ),(40 ,40 ,40 ),-1 ) + cv2 .putText (display ,status_message ,(10 ,28 ), + cv2 .FONT_HERSHEY_SIMPLEX ,0.7 ,(0 ,255 ,0 ),2 ) + + if last_result and last_result .get ("logos_detected"): + brands =[l .get ("brand","?")for l in last_result ["logos_detected"]] + brand_text =f"Detected: {', '.join (brands [:3 ])}" + if len (brands )>3 : + brand_text +=f" +{len (brands )-3 } more" + cv2 .rectangle (display ,(0 ,height -35 ),(width ,height ),(40 ,40 ,40 ),-1 ) + cv2 .putText (display ,brand_text ,(10 ,height -10 ), + cv2 .FONT_HERSHEY_SIMPLEX ,0.6 ,(255 ,255 ,0 ),2 ) + + cv2 .imshow ("Ollama Logo Detection",display ) + + key =cv2 .waitKey (1 )&0xFF + + if key ==ord (' ')and not analyzing : + analyzing =True + status_message ="Analyzing with Ollama..." + cv2 .imshow ("Ollama Logo Detection",display ) + cv2 .waitKey (1 ) + + timestamp =datetime .now ().strftime ("%Y%m%d_%H%M%S") + img_path =Path (output_dir )/f"capture_{timestamp }.jpg" + + if save_images : + cv2 .imwrite (str (img_path ),frame ) + + last_result =detector .detect_from_numpy (frame ) + + json_path =Path (output_dir )/f"result_{timestamp }.json" + with open (json_path ,'w')as f : + json .dump (last_result ,f ,indent =2 ) + + count =last_result .get ("total_count",0 ) + if count >0 : + status_message =f"Found {count } logo(s)! Press R for details" + else : + status_message ="No logos detected. Try again!" + + print (f"\nCaptured: {img_path }") + print (f"Results: {json_path }") + _display_results (last_result ) + + analyzing =False + + elif key ==ord ('s'): + timestamp =datetime .now ().strftime ("%Y%m%d_%H%M%S") + img_path =Path (output_dir )/f"capture_{timestamp }.jpg" + cv2 .imwrite (str (img_path ),frame ) + status_message =f"Saved: {img_path .name }" + print (f"Saved: {img_path }") + + elif key ==ord ('r')and last_result : + print ("\n"+"="*40 ) + print ("Last Detection Results:") + print ("="*40 ) + _display_results (last_result ) + + elif key ==ord ('q'): + break + + cap .release () + cv2 .destroyAllWindows () + + print ("\nGoodbye!") + return last_result + +def _display_results (result :Dict ): + print ("\n"+"-"*40 ) + + logos =result .get ("logos_detected",[]) + count =result .get ("total_count",len (logos )) + + if count ==0 : + print ("No logos or brands detected") + if "description"in result : + print (f"Description: {result ['description']}") + else : + print (f"Detected {count } logo(s)/brand(s):\n") + for i ,logo in enumerate (logos ,1 ): + brand =logo .get ("brand","Unknown") + conf =logo .get ("confidence","unknown") + loc =logo .get ("location","unknown") + cat =logo .get ("category","") + + print (f" {i }. {brand }") + print (f" Confidence: {conf }") + print (f" Location: {loc }") + if cat : + print (f" Category: {cat }") + print () + + if "error"in result : + print (f"Error: {result ['error']}") + + print ("-"*40 ) + + print ("\nJSON Output:") + print (json .dumps (result ,indent =2 )) diff --git a/backend/src/cv/vision.py b/backend/src/cv/vision.py index 4bbedd3..a1d0522 100644 --- a/backend/src/cv/vision.py +++ b/backend/src/cv/vision.py @@ -1,28 +1,28 @@ from .config import ( - CV_DIR, - DATA_DIR, - MODELS_DIR, - ULTRALYTICS_AVAILABLE, - YOLO26_MODELS, - SUPER_CATEGORIES, - COMMON_BRANDS, - COLORS, - DEFAULT_CONF_THRESHOLD, - DEFAULT_IOU_THRESHOLD, - DEFAULT_IMG_SIZE, +CV_DIR , +DATA_DIR , +MODELS_DIR , +ULTRALYTICS_AVAILABLE , +YOLO26_MODELS , +SUPER_CATEGORIES , +COMMON_BRANDS , +COLORS , +DEFAULT_CONF_THRESHOLD , +DEFAULT_IOU_THRESHOLD , +DEFAULT_IMG_SIZE , ) from .detectors import ( - YOLO26Detector, - HybridLogoDetector, +YOLO26Detector , +HybridLogoDetector , ) from .yolo_scanner import ( - start_scanner as start_yolo_scanner, - detect_objects as detect_yolo_objects, +start_scanner as start_yolo_scanner , +detect_objects as detect_yolo_objects , ) from .scanner import ( - start_interactive_capture as start_ollama_scanner, +start_interactive_capture as start_ollama_scanner , ) -if __name__ == "__main__": - from .cli import main - main() \ No newline at end of file +if __name__ =="__main__": + from .cli import main + main () \ No newline at end of file diff --git a/backend/src/cv/yolo_scanner.py b/backend/src/cv/yolo_scanner.py index 56ad225..5e26ef9 100644 --- a/backend/src/cv/yolo_scanner.py +++ b/backend/src/cv/yolo_scanner.py @@ -1,166 +1,166 @@ -import cv2 -from pathlib import Path -from typing import List, Dict, Optional +import cv2 +from pathlib import Path +from typing import List ,Dict ,Optional from .config import ( - CV_DIR, - ULTRALYTICS_AVAILABLE, +CV_DIR , +ULTRALYTICS_AVAILABLE , ) -from .detectors import YOLO26Detector, HybridLogoDetector +from .detectors import YOLO26Detector ,HybridLogoDetector -def start_scanner(model_path: Optional[str] = None, - model_size: str = "nano", - logo_model_path: Optional[str] = None, - use_gui: bool = True, - use_tracking: bool = False, - hybrid_mode: bool = False): - print("=" * 60) - if hybrid_mode: - print("YOLO26 Hybrid Scanner (COCO + Logos)") - else: - print("YOLO26 Object Detection Scanner") - print("=" * 60) - - detector = None - - if hybrid_mode and ULTRALYTICS_AVAILABLE: - try: - detector = HybridLogoDetector( - coco_model_size=model_size, - logo_model_path=logo_model_path, - conf_threshold=0.25, - device="auto" +def start_scanner (model_path :Optional [str ]=None , +model_size :str ="nano", +logo_model_path :Optional [str ]=None , +use_gui :bool =True , +use_tracking :bool =False , +hybrid_mode :bool =False ): + print ("="*60 ) + if hybrid_mode : + print ("YOLO26 Hybrid Scanner (COCO + Logos)") + else : + print ("YOLO26 Object Detection Scanner") + print ("="*60 ) + + detector =None + + if hybrid_mode and ULTRALYTICS_AVAILABLE : + try : + detector =HybridLogoDetector ( + coco_model_size =model_size , + logo_model_path =logo_model_path , + conf_threshold =0.25 , + device ="auto" ) - except Exception as e: - print(f"Hybrid detector failed: {e}") - hybrid_mode = False - - if detector is None and ULTRALYTICS_AVAILABLE: - try: - detector = YOLO26Detector( - model_size=model_size, - model_path=model_path, - conf_threshold=0.25, - device="auto" + except Exception as e : + print (f"Hybrid detector failed: {e }") + hybrid_mode =False + + if detector is None and ULTRALYTICS_AVAILABLE : + try : + detector =YOLO26Detector ( + model_size =model_size , + model_path =model_path , + conf_threshold =0.25 , + device ="auto" ) - except Exception as e: - print(f"YOLO26 failed: {e}") - - if detector is None: - print("Error: No detector available.") - return - - cap = cv2.VideoCapture(0) - if not cap.isOpened(): - print("Error: Could not access camera.") - return - - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = cap.get(cv2.CAP_PROP_FPS) or 30.0 - - writer = None - output_path = CV_DIR / "output.mp4" - - print(f"Camera: {width}x{height} @ {fps:.1f}fps") - print("Controls: q=quit | s=screenshot | t=tracking") - if hybrid_mode: - print(" o=objects | l=logos | b=both") - - frame_count = 0 - detect_objects_flag = True - detect_logos_flag = True - - try: - while True: - ret, frame = cap.read() - if not ret: - break - - frame_count += 1 - - if hybrid_mode and isinstance(detector, HybridLogoDetector): - detections = detector.detect( - frame, - detect_objects=detect_objects_flag, - detect_logos=detect_logos_flag + except Exception as e : + print (f"YOLO26 failed: {e }") + + if detector is None : + print ("Error: No detector available.") + return + + cap =cv2 .VideoCapture (0 ) + if not cap .isOpened (): + print ("Error: Could not access camera.") + return + + width =int (cap .get (cv2 .CAP_PROP_FRAME_WIDTH )) + height =int (cap .get (cv2 .CAP_PROP_FRAME_HEIGHT )) + fps =cap .get (cv2 .CAP_PROP_FPS )or 30.0 + + writer =None + output_path =CV_DIR /"output.mp4" + + print (f"Camera: {width }x{height } @ {fps :.1f}fps") + print ("Controls: q=quit | s=screenshot | t=tracking") + if hybrid_mode : + print (" o=objects | l=logos | b=both") + + frame_count =0 + detect_objects_flag =True + detect_logos_flag =True + + try : + while True : + ret ,frame =cap .read () + if not ret : + break + + frame_count +=1 + + if hybrid_mode and isinstance (detector ,HybridLogoDetector ): + detections =detector .detect ( + frame , + detect_objects =detect_objects_flag , + detect_logos =detect_logos_flag ) - elif use_tracking and isinstance(detector, YOLO26Detector): - detections = detector.detect_and_track(frame) - else: - detections = detector.detect(frame) - - result_frame = detector.draw_detections(frame, detections) - - mode_str = "HYBRID" if hybrid_mode else ("TRACK" if use_tracking else "DETECT") - cv2.putText(result_frame, f"{mode_str} | {len(detections)} objects", - (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) - - if use_gui: - try: - cv2.imshow('YOLO26 Scanner', result_frame) - key = cv2.waitKey(1) & 0xFF - if key == ord('q'): - break - elif key == ord('s'): - path = CV_DIR / f"screenshot_{frame_count}.jpg" - cv2.imwrite(str(path), result_frame) - print(f"Saved: {path}") - elif key == ord('t') and isinstance(detector, YOLO26Detector): - use_tracking = not use_tracking - elif key == ord('o') and hybrid_mode: - detect_objects_flag, detect_logos_flag = True, False - elif key == ord('l') and hybrid_mode: - detect_objects_flag, detect_logos_flag = False, True - elif key == ord('b') and hybrid_mode: - detect_objects_flag, detect_logos_flag = True, True - except cv2.error: - use_gui = False - writer = cv2.VideoWriter( - str(output_path), cv2.VideoWriter_fourcc(*'mp4v'), - fps, (width, height) - ) - - if not use_gui and writer: - writer.write(result_frame) - - except KeyboardInterrupt: - print("Stopping...") - finally: - cap.release() - if writer: - writer.release() - cv2.destroyAllWindows() + elif use_tracking and isinstance (detector ,YOLO26Detector ): + detections =detector .detect_and_track (frame ) + else : + detections =detector .detect (frame ) -def detect_objects(image_path: str, - model_size: str = "nano", - conf_threshold: float = 0.25, - save_output: bool = True, - hybrid_mode: bool = False) -> List[Dict]: - if not ULTRALYTICS_AVAILABLE: - raise RuntimeError("Ultralytics not installed") - - if hybrid_mode: - detector = HybridLogoDetector( - coco_model_size=model_size, - conf_threshold=conf_threshold + result_frame =detector .draw_detections (frame ,detections ) + + mode_str ="HYBRID"if hybrid_mode else ("TRACK"if use_tracking else "DETECT") + cv2 .putText (result_frame ,f"{mode_str } | {len (detections )} objects", + (10 ,30 ),cv2 .FONT_HERSHEY_SIMPLEX ,0.6 ,(0 ,255 ,0 ),2 ) + + if use_gui : + try : + cv2 .imshow ('YOLO26 Scanner',result_frame ) + key =cv2 .waitKey (1 )&0xFF + if key ==ord ('q'): + break + elif key ==ord ('s'): + path =CV_DIR /f"screenshot_{frame_count }.jpg" + cv2 .imwrite (str (path ),result_frame ) + print (f"Saved: {path }") + elif key ==ord ('t')and isinstance (detector ,YOLO26Detector ): + use_tracking =not use_tracking + elif key ==ord ('o')and hybrid_mode : + detect_objects_flag ,detect_logos_flag =True ,False + elif key ==ord ('l')and hybrid_mode : + detect_objects_flag ,detect_logos_flag =False ,True + elif key ==ord ('b')and hybrid_mode : + detect_objects_flag ,detect_logos_flag =True ,True + except cv2 .error : + use_gui =False + writer =cv2 .VideoWriter ( + str (output_path ),cv2 .VideoWriter_fourcc (*'mp4v'), + fps ,(width ,height ) + ) + + if not use_gui and writer : + writer .write (result_frame ) + + except KeyboardInterrupt : + print ("Stopping...") + finally : + cap .release () + if writer : + writer .release () + cv2 .destroyAllWindows () + +def detect_objects (image_path :str , +model_size :str ="nano", +conf_threshold :float =0.25 , +save_output :bool =True , +hybrid_mode :bool =False )->List [Dict ]: + if not ULTRALYTICS_AVAILABLE : + raise RuntimeError ("Ultralytics not installed") + + if hybrid_mode : + detector =HybridLogoDetector ( + coco_model_size =model_size , + conf_threshold =conf_threshold ) - else: - detector = YOLO26Detector( - model_size=model_size, - conf_threshold=conf_threshold + else : + detector =YOLO26Detector ( + model_size =model_size , + conf_threshold =conf_threshold ) - - image = cv2.imread(image_path) - if image is None: - raise ValueError(f"Could not load: {image_path}") - - detections = detector.detect(image) - - if save_output: - result = detector.draw_detections(image, detections) - output = Path(image_path).stem + "_detected.jpg" - cv2.imwrite(output, result) - print(f"Saved: {output}") - - return detections + + image =cv2 .imread (image_path ) + if image is None : + raise ValueError (f"Could not load: {image_path }") + + detections =detector .detect (image ) + + if save_output : + result =detector .draw_detections (image ,detections ) + output =Path (image_path ).stem +"_detected.jpg" + cv2 .imwrite (output ,result ) + print (f"Saved: {output }") + + return detections diff --git a/backend/src/gemini/__init__.py b/backend/src/gemini/__init__.py index 6082a81..929ee34 100644 --- a/backend/src/gemini/__init__.py +++ b/backend/src/gemini/__init__.py @@ -1,9 +1,9 @@ -import os -from google import genai -from src.chroma.vector_store import search_documents -from src.rag.embeddings import get_embedding +import os +from google import genai +from src .chroma .vector_store import search_documents +from src .rag .embeddings import get_embedding -GREENWASHING_ANALYSIS_PROMPT = """ +GREENWASHING_ANALYSIS_PROMPT =""" You are an expert Environmental, Social, and Governance (ESG) Analyst specialized in detecting 'Greenwashing'. Your task is to analyze the provided context from a company's data reports and determine if they are engaging in greenwashing. @@ -21,123 +21,123 @@ Based on the context provided, give a final verdict: - EVIDENCE: [Quote specific parts of the context if possible] """ -def ask(prompt): - client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY")) - return client.models.generate_content(model="gemini-3-flash-preview", contents=prompt).text +def ask (prompt ): + client =genai .Client (api_key =os .environ .get ("GOOGLE_API_KEY")) + return client .models .generate_content (model ="gemini-3-pro-preview",contents =prompt ).text -def ask_gemini_with_rag(prompt, category=None): +def ask_gemini_with_rag (prompt ,category =None ): """Ask Gemini with RAG context from the vector database.""" - # Get embedding for the prompt - query_embedding = get_embedding(prompt) - - # Search for relevant documents - results = search_documents(query_embedding, num_results=5) - - # Build context from results - context = "" - for res in results: - context += f"--- Document ---\n{res['text']}\n\n" - - # Create full prompt with context - full_prompt = f"""You are a helpful sustainability assistant. Use the following context to answer the user's question. + + query_embedding =get_embedding (prompt ) + + + results =search_documents (query_embedding ,num_results =5 ) + + + context ="" + for res in results : + context +=f"--- Document ---\n{res ['text']}\n\n" + + + full_prompt =f"""You are a helpful sustainability assistant. Use the following context to answer the user's question. If the context doesn't contain relevant information, you can use your general knowledge but mention that. CONTEXT: -{context} +{context } -USER QUESTION: {prompt} +USER QUESTION: {prompt } Please provide a helpful and concise response.""" - return ask(full_prompt) + return ask (full_prompt ) -def analyze(query, query_embedding, num_results=5, num_alternatives=3): - try: - results = search_documents(query_embedding, num_results=num_results + num_alternatives + 5) - except Exception as e: - print(f"Chroma error: {e}") - results = [] - - if not results: - context = "No data found in database for this brand." - else: - context = "--- START OF REPORT CONTEXT ---\n" - for res in results[:num_results]: - context += f"RELEVANT DATA CHUNK: {res['text']}\n\n" - context += "--- END OF REPORT CONTEXT ---\n" - - full_prompt = f"{GREENWASHING_ANALYSIS_PROMPT}\n\n{context}\n\nUSER QUERY/COMPANY FOCUS: {query}" - analysis_text = ask(full_prompt) - - alternatives = [] - seen_texts = set() - for res in results[num_results:]: - text_preview = res['text'][:200] - if text_preview not in seen_texts: - seen_texts.add(text_preview) - alternatives.append({"text": res['text'], "score": res.get('score'), "summary": text_preview}) - if len(alternatives) >= num_alternatives: - break - - return {"analysis": analysis_text, "alternatives": alternatives} +def analyze (query ,query_embedding ,num_results =5 ,num_alternatives =3 ): + try : + results =search_documents (query_embedding ,num_results =num_results +num_alternatives +5 ) + except Exception as e : + print (f"Chroma error: {e }") + results =[] -def ask_gemini_with_rag(query, category=None, num_results=5): - embedding = get_embedding(query) - result = analyze(query, embedding, num_results=num_results) - return result["analysis"] + if not results : + context ="No data found in database for this brand." + else : + context ="--- START OF REPORT CONTEXT ---\n" + for res in results [:num_results ]: + context +=f"RELEVANT DATA CHUNK: {res ['text']}\n\n" + context +="--- END OF REPORT CONTEXT ---\n" -def analyze_brand(brand_name): - print(f"\n{'='*60}") - print(f"Analyzing brand: {brand_name}") - print('='*60) - - try: - print("\n[1/3] Getting embedding for brand...") - embedding = get_embedding(brand_name) - - print("[2/3] Querying Chroma database...") - result = analyze(brand_name, embedding) - - print("[3/3] Gemini Analysis Complete!\n") - print("-"*60) - print("ANALYSIS:") - print("-"*60) - print(result["analysis"]) - - print("\n" + "-"*60) - print("ALTERNATIVES FROM DATABASE:") - print("-"*60) - if result["alternatives"]: - for i, alt in enumerate(result["alternatives"], 1): - print(f"\n{i}. {alt['summary']}...") - else: - print("No alternatives found in database.") - - print("\n" + "="*60) - return result - except Exception as e: - print(f"\nError during analysis: {e}") - return None + full_prompt =f"{GREENWASHING_ANALYSIS_PROMPT }\n\n{context }\n\nUSER QUERY/COMPANY FOCUS: {query }" + analysis_text =ask (full_prompt ) -def scan_and_analyze(): - from src.cv.scanner import capture_and_analyze as cv_capture - - print("\n" + "="*60) - print("CV + Gemini Greenwashing Scanner") - print("="*60) - print("Using camera to detect brands...") - - cv_result = cv_capture() - - logos = cv_result.get("logos_detected", []) - if not logos: - print("No brands detected. Try again!") - return None - - brand = logos[0].get("brand", "Unknown") - print(f"\nDetected brand: {brand}") - - return analyze_brand(brand) + alternatives =[] + seen_texts =set () + for res in results [num_results :]: + text_preview =res ['text'][:200 ] + if text_preview not in seen_texts : + seen_texts .add (text_preview ) + alternatives .append ({"text":res ['text'],"score":res .get ('score'),"summary":text_preview }) + if len (alternatives )>=num_alternatives : + break -if __name__ == "__main__": - scan_and_analyze() + return {"analysis":analysis_text ,"alternatives":alternatives } + +def ask_gemini_with_rag (query ,category =None ,num_results =5 ): + embedding =get_embedding (query ) + result =analyze (query ,embedding ,num_results =num_results ) + return result ["analysis"] + +def analyze_brand (brand_name ): + print (f"\n{'='*60 }") + print (f"Analyzing brand: {brand_name }") + print ('='*60 ) + + try : + print ("\n[1/3] Getting embedding for brand...") + embedding =get_embedding (brand_name ) + + print ("[2/3] Querying Chroma database...") + result =analyze (brand_name ,embedding ) + + print ("[3/3] Gemini Analysis Complete!\n") + print ("-"*60 ) + print ("ANALYSIS:") + print ("-"*60 ) + print (result ["analysis"]) + + print ("\n"+"-"*60 ) + print ("ALTERNATIVES FROM DATABASE:") + print ("-"*60 ) + if result ["alternatives"]: + for i ,alt in enumerate (result ["alternatives"],1 ): + print (f"\n{i }. {alt ['summary']}...") + else : + print ("No alternatives found in database.") + + print ("\n"+"="*60 ) + return result + except Exception as e : + print (f"\nError during analysis: {e }") + return None + +def scan_and_analyze (): + from src .cv .scanner import capture_and_analyze as cv_capture + + print ("\n"+"="*60 ) + print ("CV + Gemini Greenwashing Scanner") + print ("="*60 ) + print ("Using camera to detect brands...") + + cv_result =cv_capture () + + logos =cv_result .get ("logos_detected",[]) + if not logos : + print ("No brands detected. Try again!") + return None + + brand =logos [0 ].get ("brand","Unknown") + print (f"\nDetected brand: {brand }") + + return analyze_brand (brand ) + +if __name__ =="__main__": + scan_and_analyze () diff --git a/backend/src/gemini/client.py b/backend/src/gemini/client.py index 29193a4..4cbbe24 100644 --- a/backend/src/gemini/client.py +++ b/backend/src/gemini/client.py @@ -1,11 +1,11 @@ -from google import genai -import os +from google import genai +import os -def generate_content(prompt, model_name="gemini-2.0-flash-exp"): - api_key = os.environ.get("GOOGLE_API_KEY") - client = genai.Client(api_key=api_key) - response = client.models.generate_content( - model=model_name, - contents=prompt, +def generate_content (prompt ,model_name ="gemini-2.0-flash-exp"): + api_key =os .environ .get ("GOOGLE_API_KEY") + client =genai .Client (api_key =api_key ) + response =client .models .generate_content ( + model =model_name , + contents =prompt , ) - return response.text + return response .text diff --git a/backend/src/mongo/__init__.py b/backend/src/mongo/__init__.py index cefbf29..fceb486 100644 --- a/backend/src/mongo/__init__.py +++ b/backend/src/mongo/__init__.py @@ -1,21 +1,21 @@ -import os -from pymongo import MongoClient -from dotenv import load_dotenv +import os +from pymongo import MongoClient +from dotenv import load_dotenv -script_dir = os.path.dirname(os.path.abspath(__file__)) -env_path = os.path.join(script_dir, '..', 'rag', '.env') -load_dotenv(env_path) +script_dir =os .path .dirname (os .path .abspath (__file__ )) +env_path =os .path .join (script_dir ,'..','rag','.env') +load_dotenv (env_path ) -def get_database(): - uri = os.getenv("MONGO_URI") - try: - client = MongoClient(uri) - db = client["my_rag_app"] - print("SUCCESS: Connected to MongoDB Atlas!") - return db - except Exception as e: - print(f"ERROR: Could not connect to MongoDB: {e}") - return None +def get_database (): + uri =os .getenv ("MONGO_URI") + try : + client =MongoClient (uri ) + db =client ["my_rag_app"] + print ("SUCCESS: Connected to MongoDB Atlas!") + return db + except Exception as e : + print (f"ERROR: Could not connect to MongoDB: {e }") + return None -if __name__ == "__main__": - get_database() \ No newline at end of file +if __name__ =="__main__": + get_database () \ No newline at end of file diff --git a/backend/src/mongo/connection.py b/backend/src/mongo/connection.py index d868d23..63fecf0 100644 --- a/backend/src/mongo/connection.py +++ b/backend/src/mongo/connection.py @@ -1,8 +1,8 @@ -import os -from pymongo import MongoClient +import os +from pymongo import MongoClient -def get_mongo_client(): - uri = os.environ.get("MONGO_URI") - if not uri: - raise ValueError("MONGO_URI environment variable not set") - return MongoClient(uri) +def get_mongo_client (): + uri =os .environ .get ("MONGO_URI") + if not uri : + raise ValueError ("MONGO_URI environment variable not set") + return MongoClient (uri ) diff --git a/backend/src/mongo/metadata.py b/backend/src/mongo/metadata.py index 0a04e52..c121003 100644 --- a/backend/src/mongo/metadata.py +++ b/backend/src/mongo/metadata.py @@ -1,62 +1,62 @@ -from .connection import get_mongo_client -from datetime import datetime +from .connection import get_mongo_client +from datetime import datetime -DB_NAME = "hoya_metadata" +DB_NAME ="hoya_metadata" -def get_datasets_collection(): - client = get_mongo_client() - db = client.get_database(DB_NAME) - return db["datasets"] +def get_datasets_collection (): + client =get_mongo_client () + db =client .get_database (DB_NAME ) + return db ["datasets"] -def get_categories_collection(): - client = get_mongo_client() - db = client.get_database(DB_NAME) - return db["categories"] +def get_categories_collection (): + client =get_mongo_client () + db =client .get_database (DB_NAME ) + return db ["categories"] -def is_file_processed(filename): - collection = get_datasets_collection() - return collection.find_one({"filename": filename}) is not None +def is_file_processed (filename ): + collection =get_datasets_collection () + return collection .find_one ({"filename":filename })is not None -def log_processed_file(filename, category=None, chunk_count=0): - collection = get_datasets_collection() - doc = { - "filename": filename, - "category": category, - "chunk_count": chunk_count, - "processed_at": datetime.utcnow(), - "status": "processed" +def log_processed_file (filename ,category =None ,chunk_count =0 ): + collection =get_datasets_collection () + doc ={ + "filename":filename , + "category":category , + "chunk_count":chunk_count , + "processed_at":datetime .utcnow (), + "status":"processed" } - collection.insert_one(doc) + collection .insert_one (doc ) -def get_all_datasets(): - collection = get_datasets_collection() - return list(collection.find({}, {"_id": 0})) +def get_all_datasets (): + collection =get_datasets_collection () + return list (collection .find ({},{"_id":0 })) -def get_datasets_by_category(category): - collection = get_datasets_collection() - return list(collection.find({"category": category}, {"_id": 0})) +def get_datasets_by_category (category ): + collection =get_datasets_collection () + return list (collection .find ({"category":category },{"_id":0 })) -def delete_dataset_record(filename): - collection = get_datasets_collection() - result = collection.delete_one({"filename": filename}) - return result.deleted_count > 0 +def delete_dataset_record (filename ): + collection =get_datasets_collection () + result =collection .delete_one ({"filename":filename }) + return result .deleted_count >0 -def create_category(name, description=""): - collection = get_categories_collection() - if collection.find_one({"name": name}): - return False - collection.insert_one({ - "name": name, - "description": description, - "created_at": datetime.utcnow() +def create_category (name ,description =""): + collection =get_categories_collection () + if collection .find_one ({"name":name }): + return False + collection .insert_one ({ + "name":name , + "description":description , + "created_at":datetime .utcnow () }) - return True + return True -def get_all_categories(): - collection = get_categories_collection() - return list(collection.find({}, {"_id": 0})) +def get_all_categories (): + collection =get_categories_collection () + return list (collection .find ({},{"_id":0 })) -def delete_category(name): - collection = get_categories_collection() - result = collection.delete_one({"name": name}) - return result.deleted_count > 0 +def delete_category (name ): + collection =get_categories_collection () + result =collection .delete_one ({"name":name }) + return result .deleted_count >0 diff --git a/backend/src/mongo/vector_store.py b/backend/src/mongo/vector_store.py index b495a86..2919678 100644 --- a/backend/src/mongo/vector_store.py +++ b/backend/src/mongo/vector_store.py @@ -1,49 +1,49 @@ -from .connection import get_mongo_client +from .connection import get_mongo_client -def insert_rag_documents(documents, collection_name="rag_documents", db_name="vectors_db"): - client = get_mongo_client() - db = client.get_database(db_name) - collection = db[collection_name] - - if documents: - result = collection.insert_many(documents) - return len(result.inserted_ids) - return 0 +def insert_rag_documents (documents ,collection_name ="rag_documents",db_name ="vectors_db"): + client =get_mongo_client () + db =client .get_database (db_name ) + collection =db [collection_name ] -def search_rag_documents(query_embedding, collection_name="rag_documents", db_name="vectors_db", num_results=5): - client = get_mongo_client() - db = client.get_database(db_name) - collection = db[collection_name] - - pipeline = [ - { - "$vectorSearch": { - "index": "vector_index", - "path": "embedding", - "queryVector": query_embedding, - "numCandidates": num_results * 10, - "limit": num_results - } - }, - { - "$project": { - "_id": 0, - "text": 1, - "score": { "$meta": "vectorSearchScore" } - } - } + if documents : + result =collection .insert_many (documents ) + return len (result .inserted_ids ) + return 0 + +def search_rag_documents (query_embedding ,collection_name ="rag_documents",db_name ="vectors_db",num_results =5 ): + client =get_mongo_client () + db =client .get_database (db_name ) + collection =db [collection_name ] + + pipeline =[ + { + "$vectorSearch":{ + "index":"vector_index", + "path":"embedding", + "queryVector":query_embedding , + "numCandidates":num_results *10 , + "limit":num_results + } + }, + { + "$project":{ + "_id":0 , + "text":1 , + "score":{"$meta":"vectorSearchScore"} + } + } ] - - return list(collection.aggregate(pipeline)) -def is_file_processed(filename, log_collection="ingested_files", db_name="vectors_db"): - client = get_mongo_client() - db = client.get_database(db_name) - collection = db[log_collection] - return collection.find_one({"filename": filename}) is not None + return list (collection .aggregate (pipeline )) -def log_processed_file(filename, log_collection="ingested_files", db_name="vectors_db"): - client = get_mongo_client() - db = client.get_database(db_name) - collection = db[log_collection] - collection.insert_one({"filename": filename, "processed_at": 1}) +def is_file_processed (filename ,log_collection ="ingested_files",db_name ="vectors_db"): + client =get_mongo_client () + db =client .get_database (db_name ) + collection =db [log_collection ] + return collection .find_one ({"filename":filename })is not None + +def log_processed_file (filename ,log_collection ="ingested_files",db_name ="vectors_db"): + client =get_mongo_client () + db =client .get_database (db_name ) + collection =db [log_collection ] + collection .insert_one ({"filename":filename ,"processed_at":1 }) diff --git a/backend/src/ollama/__init__.py b/backend/src/ollama/__init__.py index 14c1c76..dfac33f 100644 --- a/backend/src/ollama/__init__.py +++ b/backend/src/ollama/__init__.py @@ -1,5 +1,5 @@ -from .detector import OllamaLogoDetector +from .detector import OllamaLogoDetector -__all__ = [ - "OllamaLogoDetector", +__all__ =[ +"OllamaLogoDetector", ] diff --git a/backend/src/ollama/__main__.py b/backend/src/ollama/__main__.py index 9ae637f..4e6e9b5 100644 --- a/backend/src/ollama/__main__.py +++ b/backend/src/ollama/__main__.py @@ -1,4 +1,4 @@ -from .cli import main +from .cli import main -if __name__ == "__main__": - main() +if __name__ =="__main__": + main () diff --git a/backend/src/ollama/cli.py b/backend/src/ollama/cli.py index 0fbe4eb..c01a777 100644 --- a/backend/src/ollama/cli.py +++ b/backend/src/ollama/cli.py @@ -1,103 +1,103 @@ -#!/usr/bin/env python3 -import argparse -import json -import sys -from .detector import OllamaLogoDetector -from .camera import capture_and_analyze, start_interactive_capture +import argparse +import json +import sys -def main(): - parser = argparse.ArgumentParser( - description="Detect logos and companies using Ollama vision models" +from .detector import OllamaLogoDetector +from .camera import capture_and_analyze ,start_interactive_capture + +def main (): + parser =argparse .ArgumentParser ( + description ="Detect logos and companies using Ollama vision models" ) - - parser.add_argument("--image", "-i", type=str) - parser.add_argument("--model", "-m", type=str, default="ministral-3:latest") - parser.add_argument("--output", "-o", type=str) - parser.add_argument("--host", type=str) - parser.add_argument("--single", "-s", action="store_true") - parser.add_argument("--no-save", action="store_true") - parser.add_argument("--output-dir", type=str, default="./captures") - - args = parser.parse_args() - - try: - if args.image: - print(f"Analyzing: {args.image}") - print(f"Model: {args.model}") - - detector = OllamaLogoDetector(model=args.model, host=args.host) - result = detector.detect_from_file(args.image) - - _print_results(result) - - if args.output: - with open(args.output, 'w') as f: - json.dump(result, f, indent=2) - print(f"Results saved to: {args.output}") - - elif args.single: - result = capture_and_analyze( - model=args.model, - save_image=not args.no_save, - output_dir=args.output_dir - ) - - if args.output and result: - with open(args.output, 'w') as f: - json.dump(result, f, indent=2) - print(f"Results saved to: {args.output}") - - else: - start_interactive_capture( - model=args.model, - save_images=not args.no_save, - output_dir=args.output_dir - ) - - except KeyboardInterrupt: - sys.exit(0) - except Exception as e: - print(f"Error: {e}") - sys.exit(1) -def _print_results(result: dict): - print("\n" + "=" * 50) - print("DETECTION RESULTS") - print("=" * 50) - - logos = result.get("logos_detected", []) - count = result.get("total_count", len(logos)) - - if count == 0: - print("\nNo logos or companies detected") - if desc := result.get("description"): - print(f"\nImage description: {desc}") - else: - print(f"\nFound {count} logo(s)/company(s):\n") - - for i, logo in enumerate(logos, 1): - brand = logo.get("brand", "Unknown") - conf = logo.get("confidence", "unknown") - loc = logo.get("location", "unknown") - cat = logo.get("category", "N/A") - - print(f" {i}. {brand}") - print(f" Confidence: {conf}") - print(f" Location: {loc}") - print(f" Category: {cat}") - print() - - if "error" in result: - print(f"\nError occurred: {result['error']}") - - if "raw_response" in result and result.get("parse_error"): - print(f"\nParse error: {result['parse_error']}") - print(f"Raw response:\n{result['raw_response'][:500]}...") - - print("=" * 50) - print("\nRaw JSON:") - print(json.dumps(result, indent=2)) + parser .add_argument ("--image","-i",type =str ) + parser .add_argument ("--model","-m",type =str ,default ="ministral-3:latest") + parser .add_argument ("--output","-o",type =str ) + parser .add_argument ("--host",type =str ) + parser .add_argument ("--single","-s",action ="store_true") + parser .add_argument ("--no-save",action ="store_true") + parser .add_argument ("--output-dir",type =str ,default ="./captures") -if __name__ == "__main__": - main() + args =parser .parse_args () + + try : + if args .image : + print (f"Analyzing: {args .image }") + print (f"Model: {args .model }") + + detector =OllamaLogoDetector (model =args .model ,host =args .host ) + result =detector .detect_from_file (args .image ) + + _print_results (result ) + + if args .output : + with open (args .output ,'w')as f : + json .dump (result ,f ,indent =2 ) + print (f"Results saved to: {args .output }") + + elif args .single : + result =capture_and_analyze ( + model =args .model , + save_image =not args .no_save , + output_dir =args .output_dir + ) + + if args .output and result : + with open (args .output ,'w')as f : + json .dump (result ,f ,indent =2 ) + print (f"Results saved to: {args .output }") + + else : + start_interactive_capture ( + model =args .model , + save_images =not args .no_save , + output_dir =args .output_dir + ) + + except KeyboardInterrupt : + sys .exit (0 ) + except Exception as e : + print (f"Error: {e }") + sys .exit (1 ) + +def _print_results (result :dict ): + print ("\n"+"="*50 ) + print ("DETECTION RESULTS") + print ("="*50 ) + + logos =result .get ("logos_detected",[]) + count =result .get ("total_count",len (logos )) + + if count ==0 : + print ("\nNo logos or companies detected") + if desc :=result .get ("description"): + print (f"\nImage description: {desc }") + else : + print (f"\nFound {count } logo(s)/company(s):\n") + + for i ,logo in enumerate (logos ,1 ): + brand =logo .get ("brand","Unknown") + conf =logo .get ("confidence","unknown") + loc =logo .get ("location","unknown") + cat =logo .get ("category","N/A") + + print (f" {i }. {brand }") + print (f" Confidence: {conf }") + print (f" Location: {loc }") + print (f" Category: {cat }") + print () + + if "error"in result : + print (f"\nError occurred: {result ['error']}") + + if "raw_response"in result and result .get ("parse_error"): + print (f"\nParse error: {result ['parse_error']}") + print (f"Raw response:\n{result ['raw_response'][:500 ]}...") + + print ("="*50 ) + print ("\nRaw JSON:") + print (json .dumps (result ,indent =2 )) + +if __name__ =="__main__": + main () diff --git a/backend/src/ollama/detector.py b/backend/src/ollama/detector.py index 045b9f0..33105e5 100644 --- a/backend/src/ollama/detector.py +++ b/backend/src/ollama/detector.py @@ -1,20 +1,20 @@ -import base64 -import json -import re -from pathlib import Path -from typing import Dict, List, Optional, Union +import base64 +import json +import re +from pathlib import Path +from typing import Dict ,List ,Optional ,Union -try: - import ollama - OLLAMA_AVAILABLE = True -except ImportError: - OLLAMA_AVAILABLE = False - print("Ollama not installed. Run: pip install ollama") +try : + import ollama + OLLAMA_AVAILABLE =True +except ImportError : + OLLAMA_AVAILABLE =False + print ("Ollama not installed. Run: pip install ollama") -DEFAULT_HOST = "https://ollama.sirblob.co" -DEFAULT_MODEL = "ministral-3:latest" +DEFAULT_HOST ="https://ollama.sirblob.co" +DEFAULT_MODEL ="ministral-3:latest" -DEFAULT_PROMPT = """Analyze this image and identify ALL logos, brand names, and company names visible. +DEFAULT_PROMPT ="""Analyze this image and identify ALL logos, brand names, and company names visible. For each logo or brand you detect, provide: 1. The company/brand name @@ -45,120 +45,120 @@ If no logos are found, return: IMPORTANT: Return ONLY the JSON object, no other text.""" -class OllamaLogoDetector: - def __init__(self, - model: str = DEFAULT_MODEL, - host: str = DEFAULT_HOST): - if not OLLAMA_AVAILABLE: - raise RuntimeError("Ollama not installed. Run: pip install ollama") - - self.model = model - self.host = host - self.client = ollama.Client(host=host) - - try: - models = self.client.list() - model_names = [m['name'] for m in models.get('models', [])] - - model_base = model.split(':')[0] - if not any(model_base in name for name in model_names): - print(f"Model '{model}' not found. Available models: {model_names}") - print(f"Pulling {model}...") - self.client.pull(model) - print(f"Model {model} ready!") - else: - print(f"Using Ollama model: {model}") - except Exception as e: - print(f"Could not verify model: {e}") - print("Make sure Ollama is running: ollama serve") - - def detect_from_file(self, - image_path: str, - prompt: Optional[str] = None) -> Dict: - path = Path(image_path) - if not path.exists(): - raise FileNotFoundError(f"Image not found: {image_path}") - - with open(path, 'rb') as f: - image_data = base64.b64encode(f.read()).decode('utf-8') - - return self._analyze_image(image_data, prompt) - - def detect_from_bytes(self, - image_bytes: bytes, - prompt: Optional[str] = None) -> Dict: - image_data = base64.b64encode(image_bytes).decode('utf-8') - return self._analyze_image(image_data, prompt) - - def detect_from_numpy(self, - image_array, - prompt: Optional[str] = None) -> Dict: - import cv2 - - success, buffer = cv2.imencode('.jpg', image_array) - if not success: - raise ValueError("Failed to encode image") - - return self.detect_from_bytes(buffer.tobytes(), prompt) - - def _analyze_image(self, - image_base64: str, - prompt: Optional[str] = None) -> Dict: - if prompt is None: - prompt = DEFAULT_PROMPT - - try: - response = self.client.chat( - model=self.model, - messages=[{ - 'role': 'user', - 'content': prompt, - 'images': [image_base64] - }], - options={ - 'temperature': 0.1, - } - ) - - content = response['message']['content'] - return self._parse_response(content) - - except Exception as e: - return { - "logos_detected": [], - "total_count": 0, - "error": str(e), - "raw_response": None +class OllamaLogoDetector : + def __init__ (self , + model :str =DEFAULT_MODEL , + host :str =DEFAULT_HOST ): + if not OLLAMA_AVAILABLE : + raise RuntimeError ("Ollama not installed. Run: pip install ollama") + + self .model =model + self .host =host + self .client =ollama .Client (host =host ) + + try : + models =self .client .list () + model_names =[m ['name']for m in models .get ('models',[])] + + model_base =model .split (':')[0 ] + if not any (model_base in name for name in model_names ): + print (f"Model '{model }' not found. Available models: {model_names }") + print (f"Pulling {model }...") + self .client .pull (model ) + print (f"Model {model } ready!") + else : + print (f"Using Ollama model: {model }") + except Exception as e : + print (f"Could not verify model: {e }") + print ("Make sure Ollama is running: ollama serve") + + def detect_from_file (self , + image_path :str , + prompt :Optional [str ]=None )->Dict : + path =Path (image_path ) + if not path .exists (): + raise FileNotFoundError (f"Image not found: {image_path }") + + with open (path ,'rb')as f : + image_data =base64 .b64encode (f .read ()).decode ('utf-8') + + return self ._analyze_image (image_data ,prompt ) + + def detect_from_bytes (self , + image_bytes :bytes , + prompt :Optional [str ]=None )->Dict : + image_data =base64 .b64encode (image_bytes ).decode ('utf-8') + return self ._analyze_image (image_data ,prompt ) + + def detect_from_numpy (self , + image_array , + prompt :Optional [str ]=None )->Dict : + import cv2 + + success ,buffer =cv2 .imencode ('.jpg',image_array ) + if not success : + raise ValueError ("Failed to encode image") + + return self .detect_from_bytes (buffer .tobytes (),prompt ) + + def _analyze_image (self , + image_base64 :str , + prompt :Optional [str ]=None )->Dict : + if prompt is None : + prompt =DEFAULT_PROMPT + + try : + response =self .client .chat ( + model =self .model , + messages =[{ + 'role':'user', + 'content':prompt , + 'images':[image_base64 ] + }], + options ={ + 'temperature':0.1 , } - - def _parse_response(self, content: str) -> Dict: - try: - return json.loads(content) - except json.JSONDecodeError: - pass - - json_patterns = [ - r'```json\s*([\s\S]*?)\s*```', - r'```\s*([\s\S]*?)\s*```', - r'\{[\s\S]*\}' + ) + + content =response ['message']['content'] + return self ._parse_response (content ) + + except Exception as e : + return { + "logos_detected":[], + "total_count":0 , + "error":str (e ), + "raw_response":None + } + + def _parse_response (self ,content :str )->Dict : + try : + return json .loads (content ) + except json .JSONDecodeError : + pass + + json_patterns =[ + r'```json\s*([\s\S]*?)\s*```', + r'```\s*([\s\S]*?)\s*```', + r'\{[\s\S]*\}' ] - - for pattern in json_patterns: - match = re.search(pattern, content) - if match: - try: - json_str = match.group(1) if '```' in pattern else match.group(0) - return json.loads(json_str) - except json.JSONDecodeError: - continue - + + for pattern in json_patterns : + match =re .search (pattern ,content ) + if match : + try : + json_str =match .group (1 )if '```'in pattern else match .group (0 ) + return json .loads (json_str ) + except json .JSONDecodeError : + continue + return { - "logos_detected": [], - "total_count": 0, - "raw_response": content, - "parse_error": "Could not extract valid JSON from response" + "logos_detected":[], + "total_count":0 , + "raw_response":content , + "parse_error":"Could not extract valid JSON from response" } - - def get_brands_list(self, result: Dict) -> List[str]: - logos = result.get("logos_detected", []) - return [logo.get("brand", "Unknown") for logo in logos] + + def get_brands_list (self ,result :Dict )->List [str ]: + logos =result .get ("logos_detected",[]) + return [logo .get ("brand","Unknown")for logo in logos ] diff --git a/backend/src/rag/embeddings.py b/backend/src/rag/embeddings.py index 63824c4..77cb6b6 100644 --- a/backend/src/rag/embeddings.py +++ b/backend/src/rag/embeddings.py @@ -1,32 +1,32 @@ -import ollama -import os +import ollama +import os -client = ollama.Client(host="https://ollama.sirblob.co") -DEFAULT_MODEL = "nomic-embed-text:latest" +client =ollama .Client (host ="https://ollama.sirblob.co") +DEFAULT_MODEL ="nomic-embed-text:latest" -def get_embedding(text, model=DEFAULT_MODEL): - try: - response = client.embeddings(model=model, prompt=text) - return response["embedding"] - except Exception as e: - print(f"Error getting embedding from Ollama: {e}") - raise e +def get_embedding (text ,model =DEFAULT_MODEL ): + try : + response =client .embeddings (model =model ,prompt =text ) + return response ["embedding"] + except Exception as e : + print (f"Error getting embedding from Ollama: {e }") + raise e -def get_embeddings_batch(texts, model=DEFAULT_MODEL, batch_size=50): - all_embeddings = [] - - for i in range(0, len(texts), batch_size): - batch = texts[i:i + batch_size] - try: - response = client.embed(model=model, input=batch) - - if "embeddings" in response: - all_embeddings.extend(response["embeddings"]) - else: - raise ValueError("Unexpected response format from client.embed") - - except Exception as e: - print(f"Error embedding batch {i}-{i+batch_size}: {e}") - raise e - - return all_embeddings +def get_embeddings_batch (texts ,model =DEFAULT_MODEL ,batch_size =50 ): + all_embeddings =[] + + for i in range (0 ,len (texts ),batch_size ): + batch =texts [i :i +batch_size ] + try : + response =client .embed (model =model ,input =batch ) + + if "embeddings"in response : + all_embeddings .extend (response ["embeddings"]) + else : + raise ValueError ("Unexpected response format from client.embed") + + except Exception as e : + print (f"Error embedding batch {i }-{i +batch_size }: {e }") + raise e + + return all_embeddings diff --git a/backend/src/rag/gemeni.py b/backend/src/rag/gemeni.py index 6566b69..b0870ae 100644 --- a/backend/src/rag/gemeni.py +++ b/backend/src/rag/gemeni.py @@ -1,43 +1,43 @@ -import os -from google import genai -from dotenv import load_dotenv +import os +from google import genai +from dotenv import load_dotenv -script_dir = os.path.dirname(os.path.abspath(__file__)) -load_dotenv(os.path.join(script_dir, '.env')) +script_dir =os .path .dirname (os .path .abspath (__file__ )) +load_dotenv (os .path .join (script_dir ,'.env')) -class GeminiClient: - def __init__(self): - self.api_key = os.getenv("GOOGLE_API_KEY") - - if not self.api_key: - raise ValueError("No GOOGLE_API_KEY found in .env file!") +class GeminiClient : + def __init__ (self ): + self .api_key =os .getenv ("GOOGLE_API_KEY") - self.client = genai.Client(api_key=self.api_key) - self.model_name = "gemini-2.0-flash" + if not self .api_key : + raise ValueError ("No GOOGLE_API_KEY found in .env file!") - def ask(self, prompt, context=""): - try: - if context: - full_message = f"Use this information to answer: {context}\n\nQuestion: {prompt}" - else: - full_message = prompt + self .client =genai .Client (api_key =self .api_key ) + self .model_name ="gemini-2.0-flash" - response = self.client.models.generate_content( - model=self.model_name, - contents=full_message, - config={ - 'system_instruction': 'You are a concise sustainability assistant. Your responses must be a single short paragraph, maximum 6 sentences long. Do not use bullet points or multiple sections.' - } + def ask (self ,prompt ,context =""): + try : + if context : + full_message =f"Use this information to answer: {context }\n\nQuestion: {prompt }" + else : + full_message =prompt + + response =self .client .models .generate_content ( + model =self .model_name , + contents =full_message , + config ={ + 'system_instruction':'You are a concise sustainability assistant. Your responses must be a single short paragraph, maximum 6 sentences long. Do not use bullet points or multiple sections.' + } ) - return response.text - - except Exception as e: - return f"Error talking to Gemini: {str(e)}" + return response .text -if __name__ == "__main__": - try: - brain = GeminiClient() - print("--- Testing Class Connection ---") - print(brain.ask("Hello! Give me a 1-sentence coding tip.")) - except Exception as e: - print(f"Failed to start Gemini: {e}") \ No newline at end of file + except Exception as e : + return f"Error talking to Gemini: {str (e )}" + +if __name__ =="__main__": + try : + brain =GeminiClient () + print ("--- Testing Class Connection ---") + print (brain .ask ("Hello! Give me a 1-sentence coding tip.")) + except Exception as e : + print (f"Failed to start Gemini: {e }") \ No newline at end of file diff --git a/backend/src/rag/ingest.py b/backend/src/rag/ingest.py index 364310f..094ef93 100644 --- a/backend/src/rag/ingest.py +++ b/backend/src/rag/ingest.py @@ -1,92 +1,92 @@ -import pandas as pd -from pypdf import PdfReader -import io -import os +import pandas as pd +from pypdf import PdfReader +import io +import os -def chunk_text(text, target_length=2000, overlap=100): - if not text: +def chunk_text (text ,target_length =2000 ,overlap =100 ): + if not text : return [] - - chunks = [] - paragraphs = text.split('\n\n') - current_chunk = "" - - for para in paragraphs: - if len(current_chunk) + len(para) > target_length: - if current_chunk: - chunks.append(current_chunk.strip()) - - if len(para) > target_length: - start = 0 - while start < len(para): - end = start + target_length - chunks.append(para[start:end].strip()) - start += (target_length - overlap) - current_chunk = "" - else: - current_chunk = para - else: - if current_chunk: - current_chunk += "\n\n" + para - else: - current_chunk = para - - if current_chunk: - chunks.append(current_chunk.strip()) - - return chunks -def load_csv(file_path): - df = pd.read_csv(file_path) - return df.apply(lambda x: ' | '.join(x.astype(str)), axis=1).tolist() + chunks =[] + paragraphs =text .split ('\n\n') + current_chunk ="" -def load_pdf(file_path): - reader = PdfReader(file_path) - text_chunks = [] - for page in reader.pages: - text = page.extract_text() - if text: - if len(text) > 4000: - text_chunks.extend(chunk_text(text)) - else: - text_chunks.append(text) - return text_chunks + for para in paragraphs : + if len (current_chunk )+len (para )>target_length : + if current_chunk : + chunks .append (current_chunk .strip ()) -def load_txt(file_path): - with open(file_path, 'r', encoding='utf-8') as f: - content = f.read() - return chunk_text(content) + if len (para )>target_length : + start =0 + while start 8000: - all_rows.extend(chunk_text(row_str)) - else: - all_rows.append(row_str) - return all_rows + if current_chunk : + chunks .append (current_chunk .strip ()) -def process_file(file_path): - ext = os.path.splitext(file_path)[1].lower() - if ext == '.csv': - return load_csv(file_path) - elif ext == '.pdf': - return load_pdf(file_path) - elif ext == '.txt': - return load_txt(file_path) - elif ext == '.xlsx': - return load_xlsx(file_path) - else: - raise ValueError(f"Unsupported file type: {ext}") + return chunks + +def load_csv (file_path ): + df =pd .read_csv (file_path ) + return df .apply (lambda x :' | '.join (x .astype (str )),axis =1 ).tolist () + +def load_pdf (file_path ): + reader =PdfReader (file_path ) + text_chunks =[] + for page in reader .pages : + text =page .extract_text () + if text : + if len (text )>4000 : + text_chunks .extend (chunk_text (text )) + else : + text_chunks .append (text ) + return text_chunks + +def load_txt (file_path ): + with open (file_path ,'r',encoding ='utf-8')as f : + content =f .read () + return chunk_text (content ) + +def load_xlsx (file_path ): + all_rows =[] + try : + sheets =pd .read_excel (file_path ,sheet_name =None ) + except Exception as e : + raise ValueError (f"Pandas read_excel failed: {e }") + + for sheet_name ,df in sheets .items (): + if df .empty : + continue + df =df .fillna ("") + for row in df .values : + row_items =[str (x )for x in row if str (x ).strip ()!=""] + if row_items : + row_str =f"Sheet: {str (sheet_name )} | "+" | ".join (row_items ) + if len (row_str )>8000 : + all_rows .extend (chunk_text (row_str )) + else : + all_rows .append (row_str ) + return all_rows + +def process_file (file_path ): + ext =os .path .splitext (file_path )[1 ].lower () + if ext =='.csv': + return load_csv (file_path ) + elif ext =='.pdf': + return load_pdf (file_path ) + elif ext =='.txt': + return load_txt (file_path ) + elif ext =='.xlsx': + return load_xlsx (file_path ) + else : + raise ValueError (f"Unsupported file type: {ext }") diff --git a/backend/src/rag/store.py b/backend/src/rag/store.py index a71d0ac..8ef9f23 100644 --- a/backend/src/rag/store.py +++ b/backend/src/rag/store.py @@ -1,27 +1,27 @@ -from .embeddings import get_embeddings_batch, get_embedding -from ..chroma.vector_store import insert_documents, search_documents +from .embeddings import get_embeddings_batch ,get_embedding +from ..chroma .vector_store import insert_documents ,search_documents -def ingest_documents(text_chunks, collection_name="rag_documents", source_file=None, category=None): - embeddings = get_embeddings_batch(text_chunks) - - metadata_list = None - if source_file or category: - metadata_list = [] - for _ in text_chunks: - meta = {} - if source_file: - meta["source"] = source_file - if category: - meta["category"] = category - metadata_list.append(meta) - - return insert_documents(text_chunks, embeddings, collection_name=collection_name, metadata_list=metadata_list) +def ingest_documents (text_chunks ,collection_name ="rag_documents",source_file =None ,category =None ): + embeddings =get_embeddings_batch (text_chunks ) -def vector_search(query_text, collection_name="rag_documents", num_results=5, category=None): - query_embedding = get_embedding(query_text) - - filter_metadata = None - if category: - filter_metadata = {"category": category} - - return search_documents(query_embedding, collection_name=collection_name, num_results=num_results, filter_metadata=filter_metadata) + metadata_list =None + if source_file or category : + metadata_list =[] + for _ in text_chunks : + meta ={} + if source_file : + meta ["source"]=source_file + if category : + meta ["category"]=category + metadata_list .append (meta ) + + return insert_documents (text_chunks ,embeddings ,collection_name =collection_name ,metadata_list =metadata_list ) + +def vector_search (query_text ,collection_name ="rag_documents",num_results =5 ,category =None ): + query_embedding =get_embedding (query_text ) + + filter_metadata =None + if category : + filter_metadata ={"category":category } + + return search_documents (query_embedding ,collection_name =collection_name ,num_results =num_results ,filter_metadata =filter_metadata ) diff --git a/backend/src/routes/gemini.py b/backend/src/routes/gemini.py index c662ec8..9f66a40 100644 --- a/backend/src/routes/gemini.py +++ b/backend/src/routes/gemini.py @@ -1,62 +1,62 @@ -from flask import Blueprint, request, jsonify -from src.rag.gemeni import GeminiClient -from src.gemini import ask_gemini_with_rag +from flask import Blueprint ,request ,jsonify +from src .rag .gemeni import GeminiClient +from src .gemini import ask_gemini_with_rag -gemini_bp = Blueprint('gemini', __name__) -brain = None +gemini_bp =Blueprint ('gemini',__name__ ) +brain =None -def get_brain(): - global brain - if brain is None: - brain = GeminiClient() - return brain +def get_brain (): + global brain + if brain is None : + brain =GeminiClient () + return brain -@gemini_bp.route('/ask', methods=['POST']) -def ask(): - data = request.json - prompt = data.get("prompt") - context = data.get("context", "") +@gemini_bp .route ('/ask',methods =['POST']) +def ask (): + data =request .json + prompt =data .get ("prompt") + context =data .get ("context","") - if not prompt: - return jsonify({"error": "No prompt provided"}), 400 + if not prompt : + return jsonify ({"error":"No prompt provided"}),400 - try: - client = get_brain() - response = client.ask(prompt, context) - return jsonify({ - "status": "success", - "reply": response + try : + client =get_brain () + response =client .ask (prompt ,context ) + return jsonify ({ + "status":"success", + "reply":response }) - except Exception as e: - return jsonify({ - "status": "error", - "message": str(e) - }), 500 + except Exception as e : + return jsonify ({ + "status":"error", + "message":str (e ) + }),500 -@gemini_bp.route('/rag', methods=['POST']) -def rag(): - data = request.json - prompt = data.get("prompt") - category = data.get("category") - - if not prompt: - return jsonify({"error": "No prompt provided"}), 400 - - try: - response = ask_gemini_with_rag(prompt, category=category) - return jsonify({ - "status": "success", - "reply": response +@gemini_bp .route ('/rag',methods =['POST']) +def rag (): + data =request .json + prompt =data .get ("prompt") + category =data .get ("category") + + if not prompt : + return jsonify ({"error":"No prompt provided"}),400 + + try : + response =ask_gemini_with_rag (prompt ,category =category ) + return jsonify ({ + "status":"success", + "reply":response }) - except Exception as e: - return jsonify({ - "status": "error", - "message": str(e) - }), 500 + except Exception as e : + return jsonify ({ + "status":"error", + "message":str (e ) + }),500 -@gemini_bp.route('/vision', methods=['POST']) -def vision(): - return jsonify({ - "status": "error", - "message": "Vision endpoint not yet implemented" - }), 501 +@gemini_bp .route ('/vision',methods =['POST']) +def vision (): + return jsonify ({ + "status":"error", + "message":"Vision endpoint not yet implemented" + }),501 diff --git a/backend/src/routes/incidents.py b/backend/src/routes/incidents.py index bbb1f48..b307df4 100644 --- a/backend/src/routes/incidents.py +++ b/backend/src/routes/incidents.py @@ -2,65 +2,112 @@ Incident Report API - Handles greenwashing report submissions Uses structured outputs with Pydantic for reliable JSON responses """ -import base64 -import os -from datetime import datetime -from flask import Blueprint, request, jsonify -from google import genai -from pydantic import BaseModel, Field -from typing import List, Optional, Literal +import base64 +import os +import cv2 +import numpy as np +from datetime import datetime +from flask import Blueprint ,request ,jsonify +from google import genai +from pydantic import BaseModel ,Field +from typing import List ,Optional ,Literal -from src.ollama.detector import OllamaLogoDetector -from src.chroma.vector_store import search_documents, insert_documents -from src.rag.embeddings import get_embedding -from src.mongo.connection import get_mongo_client +from src .ollama .detector import OllamaLogoDetector +from src .chroma .vector_store import search_documents ,insert_documents +from src .rag .embeddings import get_embedding +from src .mongo .connection import get_mongo_client -incidents_bp = Blueprint('incidents', __name__) - -# Initialize detector lazily -_detector = None - -def get_detector(): - global _detector - if _detector is None: - _detector = OllamaLogoDetector() - return _detector +incidents_bp =Blueprint ('incidents',__name__ ) -# ============= Pydantic Models for Structured Outputs ============= +_detector =None -class GreenwashingAnalysis(BaseModel): +def get_detector (): + global _detector + if _detector is None : + _detector =OllamaLogoDetector () + return _detector + + +def compress_image (image_bytes :bytes ,max_width :int =800 ,quality :int =85 )->str : + """ + Compress image using OpenCV and return Base64 string + + Args: + image_bytes: Original image bytes + max_width: Maximum width for resized image + quality: JPEG quality (1-100) + + Returns: + Base64 encoded compressed image + """ + try : + + nparr =np .frombuffer (image_bytes ,np .uint8 ) + img =cv2 .imdecode (nparr ,cv2 .IMREAD_COLOR ) + + if img is None : + raise ValueError ("Failed to decode image") + + + height ,width =img .shape [:2 ] + + + if width >max_width : + ratio =max_width /width + new_width =max_width + new_height =int (height *ratio ) + img =cv2 .resize (img ,(new_width ,new_height ),interpolation =cv2 .INTER_AREA ) + + + encode_param =[int (cv2 .IMWRITE_JPEG_QUALITY ),quality ] + _ ,buffer =cv2 .imencode ('.jpg',img ,encode_param ) + + + compressed_base64 =base64 .b64encode (buffer ).decode ('utf-8') + + return compressed_base64 + + except Exception as e : + print (f"Image compression error: {e }") + + return base64 .b64encode (image_bytes ).decode ('utf-8') + + + + +class GreenwashingAnalysis (BaseModel ): """Structured output for greenwashing analysis""" - is_greenwashing: bool = Field(description="Whether this is a case of greenwashing") - confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of the analysis") - verdict: str = Field(description="Brief one-sentence verdict") - reasoning: str = Field(description="Detailed explanation of why this is or isn't greenwashing") - severity: Literal["high", "medium", "low"] = Field(description="Severity of the greenwashing if detected") - recommendations: str = Field(description="What consumers should know about this case") - key_claims: List[str] = Field(description="List of specific environmental claims made by the company") - red_flags: List[str] = Field(description="List of red flags or concerning practices identified") + is_greenwashing :bool =Field (description ="Whether this is a case of greenwashing") + confidence :Literal ["high","medium","low"]=Field (description ="Confidence level of the analysis") + verdict :str =Field (description ="Brief one-sentence verdict") + reasoning :str =Field (description ="Detailed explanation of why this is or isn't greenwashing") + severity :Literal ["high","medium","low"]=Field (description ="Severity of the greenwashing if detected") + recommendations :str =Field (description ="What consumers should know about this case") + key_claims :List [str ]=Field (description ="List of specific environmental claims made by the company") + red_flags :List [str ]=Field (description ="List of red flags or concerning practices identified") -class LogoDetection(BaseModel): +class LogoDetection (BaseModel ): """Structured output for logo detection from Ollama""" - brand: str = Field(description="The company or brand name detected") - confidence: Literal["high", "medium", "low"] = Field(description="Confidence level of detection") - location: str = Field(description="Location in image (e.g., center, top-left)") - category: str = Field(description="Product category if identifiable") + brand :str =Field (description ="The company or brand name detected") + confidence :Literal ["high","medium","low"]=Field (description ="Confidence level of detection") + location :str =Field (description ="Location in image (e.g., center, top-left)") + category :str =Field (description ="Product category if identifiable") -class ImageAnalysis(BaseModel): +class ImageAnalysis (BaseModel ): """Structured output for full image analysis""" - logos_detected: List[LogoDetection] = Field(description="List of logos/brands detected in the image") - total_count: int = Field(description="Total number of logos detected") - description: str = Field(description="Brief description of what's in the image") - environmental_claims: List[str] = Field(description="Any environmental or eco-friendly claims visible in the image") - packaging_description: str = Field(description="Description of the product packaging and design") + logos_detected :List [LogoDetection ]=Field (description ="List of logos/brands detected in the image") + total_count :int =Field (description ="Total number of logos detected") + description :str =Field (description ="Brief description of what's in the image") + environmental_claims :List [str ]=Field (description ="Any environmental or eco-friendly claims visible in the image") + packaging_description :str =Field (description ="Description of the product packaging and design") -# ============= Analysis Functions ============= -GREENWASHING_ANALYSIS_PROMPT = """You are an expert at detecting greenwashing - misleading environmental claims by companies. + +GREENWASHING_ANALYSIS_PROMPT ="""You are an expert at detecting greenwashing - misleading environmental claims by companies. Analyze the following user-submitted report about a potential greenwashing incident: @@ -81,48 +128,48 @@ Based on this information, determine if this is a valid case of greenwashing. Co Provide your analysis in the structured format requested.""" -def analyze_with_gemini(product_name: str, user_description: str, detected_brand: str, - image_description: str, context: str) -> GreenwashingAnalysis: +def analyze_with_gemini (product_name :str ,user_description :str ,detected_brand :str , +image_description :str ,context :str )->GreenwashingAnalysis : """Send analysis request to Gemini with structured output""" - api_key = os.environ.get("GOOGLE_API_KEY") - if not api_key: - raise ValueError("GOOGLE_API_KEY not set") - - prompt = GREENWASHING_ANALYSIS_PROMPT.format( - product_name=product_name, - user_description=user_description, - detected_brand=detected_brand, - image_description=image_description, - context=context + api_key =os .environ .get ("GOOGLE_API_KEY") + if not api_key : + raise ValueError ("GOOGLE_API_KEY not set") + + prompt =GREENWASHING_ANALYSIS_PROMPT .format ( + product_name =product_name , + user_description =user_description , + detected_brand =detected_brand , + image_description =image_description , + context =context ) - - client = genai.Client(api_key=api_key) - - # Use structured output with Pydantic schema - response = client.models.generate_content( - model="gemini-3-flash-preview", - contents=prompt, - config={ - "response_mime_type": "application/json", - "response_json_schema": GreenwashingAnalysis.model_json_schema(), - } - ) - - # Validate and parse the response - analysis = GreenwashingAnalysis.model_validate_json(response.text) - return analysis + + client =genai .Client (api_key =api_key ) -def analyze_image_with_ollama(image_bytes: bytes) -> ImageAnalysis: + response =client .models .generate_content ( + model ="gemini-3-pro-preview", + contents =prompt , + config ={ + "response_mime_type":"application/json", + "response_json_schema":GreenwashingAnalysis .model_json_schema (), + } + ) + + + analysis =GreenwashingAnalysis .model_validate_json (response .text ) + return analysis + + +def analyze_image_with_ollama (image_bytes :bytes )->ImageAnalysis : """Analyze image using Ollama with structured output""" - try: - import ollama - - client = ollama.Client(host="https://ollama.sirblob.co") - - image_base64 = base64.b64encode(image_bytes).decode('utf-8') - - prompt = """Analyze this image for a greenwashing detection system. + try : + import ollama + + client =ollama .Client (host ="https://ollama.sirblob.co") + + image_base64 =base64 .b64encode (image_bytes ).decode ('utf-8') + + prompt ="""Analyze this image for a greenwashing detection system. Identify: 1. All visible logos, brand names, and company names @@ -131,275 +178,318 @@ Identify: Respond with structured JSON matching the schema provided.""" - response = client.chat( - model="ministral-3:latest", - messages=[{ - 'role': 'user', - 'content': prompt, - 'images': [image_base64], - }], - format=ImageAnalysis.model_json_schema(), - options={'temperature': 0.1} + response =client .chat ( + model ="ministral-3:latest", + messages =[{ + 'role':'user', + 'content':prompt , + 'images':[image_base64 ], + }], + format =ImageAnalysis .model_json_schema (), + options ={'temperature':0.1 } ) - - # Validate and parse - analysis = ImageAnalysis.model_validate_json(response['message']['content']) - return analysis - - except Exception as e: - print(f"Ollama structured analysis failed: {e}") - # Fall back to basic detection - detector = get_detector() - result = detector.detect_from_bytes(image_bytes) - - # Convert to structured format - logos = [] - for logo in result.get('logos_detected', []): - logos.append(LogoDetection( - brand=logo.get('brand', 'Unknown'), - confidence=logo.get('confidence', 'low'), - location=logo.get('location', 'unknown'), - category=logo.get('category', 'unknown') + + + analysis =ImageAnalysis .model_validate_json (response ['message']['content']) + return analysis + + except Exception as e : + print (f"Ollama structured analysis failed: {e }") + + detector =get_detector () + result =detector .detect_from_bytes (image_bytes ) + + + logos =[] + for logo in result .get ('logos_detected',[]): + logos .append (LogoDetection ( + brand =logo .get ('brand','Unknown'), + confidence =logo .get ('confidence','low'), + location =logo .get ('location','unknown'), + category =logo .get ('category','unknown') )) - - return ImageAnalysis( - logos_detected=logos, - total_count=result.get('total_count', 0), - description=result.get('description', 'No description available'), - environmental_claims=[], - packaging_description="" + + return ImageAnalysis ( + logos_detected =logos , + total_count =result .get ('total_count',0 ), + description =result .get ('description','No description available'), + environmental_claims =[], + packaging_description ="" ) -def save_to_mongodb(incident_data: dict) -> str: +def save_to_mongodb (incident_data :dict )->str : """Save incident to MongoDB and return the ID""" - client = get_mongo_client() - db = client["ethix"] - collection = db["incidents"] - - result = collection.insert_one(incident_data) - return str(result.inserted_id) + client =get_mongo_client () + db =client ["ethix"] + collection =db ["incidents"] + + result =collection .insert_one (incident_data ) + return str (result .inserted_id ) -def save_to_chromadb(incident_data: dict, incident_id: str): - """Save incident as context for the chatbot""" - analysis = incident_data['analysis'] - - # Create a rich text representation of the incident - red_flags = "\n".join(f"- {flag}" for flag in analysis.get('red_flags', [])) - key_claims = "\n".join(f"- {claim}" for claim in analysis.get('key_claims', [])) - - text = f"""GREENWASHING INCIDENT REPORT #{incident_id} -Date: {incident_data['created_at']} -Company/Product: {incident_data['product_name']} ({incident_data.get('detected_brand', 'Unknown brand')}) +def save_to_chromadb (incident_data :dict ,incident_id :str ): + """ + Save incident as context for the chatbot + Includes verdict, full analysis, and environmental impact information + """ + analysis =incident_data ['analysis'] -USER REPORT: {incident_data['user_description']} -ANALYSIS VERDICT: {analysis['verdict']} -Confidence: {analysis['confidence']} -Severity: {analysis['severity']} + red_flags ="\n".join (f"- {flag }"for flag in analysis .get ('red_flags',[])) + key_claims ="\n".join (f"- {claim }"for claim in analysis .get ('key_claims',[])) + env_claims ="\n".join (f"- {claim }"for claim in incident_data .get ('environmental_claims',[])) -DETAILED REASONING: -{analysis['reasoning']} -KEY ENVIRONMENTAL CLAIMS MADE: -{key_claims} + text =f"""GREENWASHING INCIDENT REPORT #{incident_id } +Report Date: {incident_data ['created_at']} +Company/Product: {incident_data ['product_name']} +Detected Brand: {incident_data .get ('detected_brand','Unknown brand')} +Status: {incident_data ['status']} -RED FLAGS IDENTIFIED: -{red_flags} +=== VERDICT === +{analysis ['verdict']} -CONSUMER RECOMMENDATIONS: -{analysis['recommendations']} +Greenwashing Detected: {'YES'if analysis ['is_greenwashing']else 'NO'} +Confidence Level: {analysis ['confidence']} +Severity Assessment: {analysis ['severity']} + +=== USER COMPLAINT === +{incident_data ['user_description']} + +=== IMAGE ANALYSIS === +{incident_data .get ('image_description','No image analysis available')} + +=== ENVIRONMENTAL CLAIMS IDENTIFIED === +{env_claims if env_claims else 'No specific environmental claims identified'} + +=== DETAILED ANALYSIS & REASONING === +{analysis ['reasoning']} + +=== KEY MARKETING CLAIMS === +{key_claims if key_claims else 'No key claims identified'} + +=== RED FLAGS IDENTIFIED === +{red_flags if red_flags else 'No specific red flags identified'} + +=== CONSUMER RECOMMENDATIONS === +{analysis ['recommendations']} + +=== ENVIRONMENTAL IMPACT ASSESSMENT === +This report highlights potential misleading environmental claims by {incident_data .get ('detected_brand','the company')}. +Consumers should be aware that {analysis ['severity']} severity greenwashing has been identified with {analysis ['confidence']} confidence. +This incident has been documented for future reference and to help inform sustainable purchasing decisions. """ - - # Get embedding for the incident - embedding = get_embedding(text) - - # Store in ChromaDB with metadata - metadata = { - "type": "incident_report", - "source": f"incident_{incident_id}", - "product_name": incident_data['product_name'], - "brand": incident_data.get('detected_brand', 'Unknown'), - "severity": analysis['severity'], - "confidence": analysis['confidence'], - "is_greenwashing": True, - "created_at": incident_data['created_at'] + + + embedding =get_embedding (text ) + + + metadata ={ + "type":"incident_report", + "source":f"incident_{incident_id }", + "product_name":incident_data ['product_name'], + "brand":incident_data .get ('detected_brand','Unknown'), + "severity":analysis ['severity'], + "confidence":analysis ['confidence'], + "is_greenwashing":True , + "verdict":analysis ['verdict'], + "status":incident_data ['status'], + "created_at":incident_data ['created_at'], + "num_red_flags":len (analysis .get ('red_flags',[])), + "num_claims":len (analysis .get ('key_claims',[])) } - - insert_documents( - texts=[text], - embeddings=[embedding], - metadata_list=[metadata] + + insert_documents ( + texts =[text ], + embeddings =[embedding ], + metadata_list =[metadata ] ) + print (f"✓ Incident #{incident_id } saved to ChromaDB for AI chat context") -# ============= API Endpoints ============= -@incidents_bp.route('/submit', methods=['POST']) -def submit_incident(): + + +@incidents_bp .route ('/submit',methods =['POST']) +def submit_incident (): """ Submit a greenwashing incident report Expects JSON with: - product_name: Name of the product/company - description: User's description of the misleading claim - - image: Base64 encoded image (optional, but recommended) + - report_type: 'product' or 'company' + - image: Base64 encoded image (for product reports) + - pdf_data: Base64 encoded PDF (for company reports) """ - data = request.json - - if not data: - return jsonify({"error": "No data provided"}), 400 - - product_name = data.get('product_name', '').strip() - user_description = data.get('description', '').strip() - image_base64 = data.get('image') # Base64 encoded image - - if not product_name: - return jsonify({"error": "Product name is required"}), 400 - - if not user_description: - return jsonify({"error": "Description is required"}), 400 - - try: - # Step 1: Analyze image with Ollama (structured output) - detected_brand = "Unknown" - image_description = "No image provided" - environmental_claims = [] - - if image_base64: - try: - # Remove data URL prefix if present - if ',' in image_base64: - image_base64 = image_base64.split(',')[1] - - image_bytes = base64.b64decode(image_base64) - - # Use structured image analysis - image_analysis = analyze_image_with_ollama(image_bytes) - - if image_analysis.logos_detected: - detected_brand = image_analysis.logos_detected[0].brand - - image_description = image_analysis.description - environmental_claims = image_analysis.environmental_claims - - except Exception as e: - print(f"Image analysis error: {e}") - # Continue without image analysis - - # Step 2: Get relevant context from vector database - search_query = f"{product_name} {detected_brand} environmental claims sustainability greenwashing" - query_embedding = get_embedding(search_query) - search_results = search_documents(query_embedding, num_results=5) - - context = "" - for res in search_results: - context += f"--- Document ---\n{res['text'][:500]}\n\n" - - if not context: - context = "No prior information found about this company in our database." - - # Add environmental claims from image to context - if environmental_claims: - context += "\n--- Claims visible in submitted image ---\n" - context += "\n".join(f"- {claim}" for claim in environmental_claims) - - # Step 3: Analyze with Gemini (structured output) - analysis = analyze_with_gemini( - product_name=product_name, - user_description=user_description, - detected_brand=detected_brand, - image_description=image_description, - context=context + data =request .json + + if not data : + return jsonify ({"error":"No data provided"}),400 + + product_name =data .get ('product_name','').strip () + user_description =data .get ('description','').strip () + report_type =data .get ('report_type','product') + image_base64 =data .get ('image') + + if not product_name : + return jsonify ({"error":"Product name is required"}),400 + + if not user_description : + return jsonify ({"error":"Description is required"}),400 + + try : + + detected_brand ="Unknown" + image_description ="No image provided" + environmental_claims =[] + compressed_image_base64 =None + + if report_type =='product'and image_base64 : + try : + + if ','in image_base64 : + image_base64 =image_base64 .split (',')[1 ] + + image_bytes =base64 .b64decode (image_base64 ) + + + print ("Compressing image with OpenCV...") + compressed_image_base64 =compress_image (image_bytes ,max_width =600 ,quality =75 ) + + + image_analysis =analyze_image_with_ollama (image_bytes ) + + if image_analysis .logos_detected : + detected_brand =image_analysis .logos_detected [0 ].brand + + image_description =image_analysis .description + environmental_claims =image_analysis .environmental_claims + + except Exception as e : + print (f"Image processing error: {e }") + + + + + search_query =f"{product_name } {detected_brand } environmental claims sustainability greenwashing" + query_embedding =get_embedding (search_query ) + search_results =search_documents (query_embedding ,num_results =5 ) + + context ="" + for res in search_results : + context +=f"--- Document ---\n{res ['text'][:500 ]}\n\n" + + if not context : + context ="No prior information found about this company in our database." + + + if environmental_claims : + context +="\n--- Claims visible in submitted image ---\n" + context +="\n".join (f"- {claim }"for claim in environmental_claims ) + + + analysis =analyze_with_gemini ( + product_name =product_name , + user_description =user_description , + detected_brand =detected_brand , + image_description =image_description , + context =context ) - - # Convert Pydantic model to dict - analysis_dict = analysis.model_dump() - - # Step 4: Prepare incident data - incident_data = { - "product_name": product_name, - "user_description": user_description, - "detected_brand": detected_brand, - "image_description": image_description, - "environmental_claims": environmental_claims, - "analysis": analysis_dict, - "is_greenwashing": analysis.is_greenwashing, - "created_at": datetime.utcnow().isoformat(), - "status": "confirmed" if analysis.is_greenwashing else "dismissed" + + + analysis_dict =analysis .model_dump () + + + incident_data ={ + "product_name":product_name , + "user_description":user_description , + "detected_brand":detected_brand , + "image_description":image_description , + "environmental_claims":environmental_claims , + "analysis":analysis_dict , + "is_greenwashing":analysis .is_greenwashing , + "created_at":datetime .utcnow ().isoformat (), + "status":"confirmed"if analysis .is_greenwashing else "dismissed", + "report_type":report_type } - - incident_id = None - - # Step 5: If greenwashing detected, save to databases - if analysis.is_greenwashing: - # Save to MongoDB - incident_id = save_to_mongodb(incident_data) - - # Save to ChromaDB for chatbot context - save_to_chromadb(incident_data, incident_id) - - return jsonify({ - "status": "success", - "is_greenwashing": analysis.is_greenwashing, - "incident_id": incident_id, - "analysis": analysis_dict, - "detected_brand": detected_brand, - "environmental_claims": environmental_claims + + + if compressed_image_base64 : + incident_data ["image_base64"]=compressed_image_base64 + + incident_id =None + + + if analysis .is_greenwashing : + + incident_id =save_to_mongodb (incident_data ) + + + save_to_chromadb (incident_data ,incident_id ) + + return jsonify ({ + "status":"success", + "is_greenwashing":analysis .is_greenwashing , + "incident_id":incident_id , + "analysis":analysis_dict , + "detected_brand":detected_brand , + "environmental_claims":environmental_claims }) - - except Exception as e: - import traceback - traceback.print_exc() - return jsonify({ - "status": "error", - "message": str(e) - }), 500 + + except Exception as e : + import traceback + traceback .print_exc () + return jsonify ({ + "status":"error", + "message":str (e ) + }),500 -@incidents_bp.route('/list', methods=['GET']) -def list_incidents(): +@incidents_bp .route ('/list',methods =['GET']) +def list_incidents (): """Get all confirmed greenwashing incidents""" - try: - client = get_mongo_client() - db = client["ethix"] - collection = db["incidents"] - - # Get recent incidents with full analysis details - incidents = list(collection.find( - {"is_greenwashing": True}, - {"_id": 1, "product_name": 1, "detected_brand": 1, - "user_description": 1, "analysis": 1, "created_at": 1} - ).sort("created_at", -1).limit(50)) - - # Convert ObjectId to string - for inc in incidents: - inc["_id"] = str(inc["_id"]) - - return jsonify(incidents) - - except Exception as e: - return jsonify({"error": str(e)}), 500 + try : + client =get_mongo_client () + db =client ["ethix"] + collection =db ["incidents"] -@incidents_bp.route('/', methods=['GET']) -def get_incident(incident_id): + incidents =list (collection .find ( + {"is_greenwashing":True }, + {"_id":1 ,"product_name":1 ,"detected_brand":1 , + "user_description":1 ,"analysis":1 ,"created_at":1 , + "image_base64":1 ,"report_type":1 } + ).sort ("created_at",-1 ).limit (50 )) + + + for inc in incidents : + inc ["_id"]=str (inc ["_id"]) + + return jsonify (incidents ) + + except Exception as e : + return jsonify ({"error":str (e )}),500 + + +@incidents_bp .route ('/',methods =['GET']) +def get_incident (incident_id ): """Get a specific incident by ID""" - try: - from bson import ObjectId - - client = get_mongo_client() - db = client["ethix"] - collection = db["incidents"] - - incident = collection.find_one({"_id": ObjectId(incident_id)}) - - if not incident: - return jsonify({"error": "Incident not found"}), 404 - - incident["_id"] = str(incident["_id"]) - return jsonify(incident) - - except Exception as e: - return jsonify({"error": str(e)}), 500 + try : + from bson import ObjectId + + client =get_mongo_client () + db =client ["ethix"] + collection =db ["incidents"] + + incident =collection .find_one ({"_id":ObjectId (incident_id )}) + + if not incident : + return jsonify ({"error":"Incident not found"}),404 + + incident ["_id"]=str (incident ["_id"]) + return jsonify (incident ) + + except Exception as e : + return jsonify ({"error":str (e )}),500 diff --git a/backend/src/routes/main.py b/backend/src/routes/main.py index d615f68..9f77a8f 100644 --- a/backend/src/routes/main.py +++ b/backend/src/routes/main.py @@ -1,7 +1,7 @@ -from flask import Blueprint +from flask import Blueprint -main_bp = Blueprint('main', __name__) +main_bp =Blueprint ('main',__name__ ) -@main_bp.route('/') -def index(): +@main_bp .route ('/') +def index (): return "Hello from the organized Flask App!" diff --git a/backend/src/routes/rag.py b/backend/src/routes/rag.py index dae504c..64b3127 100644 --- a/backend/src/routes/rag.py +++ b/backend/src/routes/rag.py @@ -1,24 +1,24 @@ -from flask import Blueprint, request, jsonify -from ..rag.store import vector_search, ingest_documents +from flask import Blueprint ,request ,jsonify +from ..rag .store import vector_search ,ingest_documents -rag_bp = Blueprint('rag', __name__) +rag_bp =Blueprint ('rag',__name__ ) -@rag_bp.route('/ingest', methods=['POST']) -def ingest(): - data = request.json - text_chunks = data.get('chunks', []) - if not text_chunks: - return jsonify({"error": "No chunks provided"}), 400 - - count = ingest_documents(text_chunks) - return jsonify({"message": f"Ingested {count} documents"}), 201 +@rag_bp .route ('/ingest',methods =['POST']) +def ingest (): + data =request .json + text_chunks =data .get ('chunks',[]) + if not text_chunks : + return jsonify ({"error":"No chunks provided"}),400 -@rag_bp.route('/search', methods=['POST']) -def search(): - data = request.json - query = data.get('query') - if not query: - return jsonify({"error": "No query provided"}), 400 - - results = vector_search(query) - return jsonify({"results": results}), 200 + count =ingest_documents (text_chunks ) + return jsonify ({"message":f"Ingested {count } documents"}),201 + +@rag_bp .route ('/search',methods =['POST']) +def search (): + data =request .json + query =data .get ('query') + if not query : + return jsonify ({"error":"No query provided"}),400 + + results =vector_search (query ) + return jsonify ({"results":results }),200 diff --git a/backend/src/routes/reports.py b/backend/src/routes/reports.py index 4b80f66..bdb22b7 100644 --- a/backend/src/routes/reports.py +++ b/backend/src/routes/reports.py @@ -1,233 +1,233 @@ -from flask import Blueprint, jsonify, request -from src.chroma.vector_store import get_all_metadatas, search_documents -from src.rag.embeddings import get_embedding +from flask import Blueprint ,jsonify ,request +from src .chroma .vector_store import get_all_metadatas ,search_documents +from src .rag .embeddings import get_embedding -reports_bp = Blueprint('reports', __name__) +reports_bp =Blueprint ('reports',__name__ ) -@reports_bp.route('/', methods=['GET']) -def get_reports(): - try: - # Fetch all metadatas to ensure we get diversity. - # 60k items is manageable for metadata-only fetch. - metadatas = get_all_metadatas() - - unique_reports = {} - - for meta in metadatas: - filename = meta.get('source') or meta.get('filename') - if not filename: - continue - - # Skip incident reports - these are user-submitted greenwashing reports - if meta.get('type') == 'incident_report' or filename.startswith('incident_'): - continue - - - if filename not in unique_reports: - # Attempt to extract info from filename - # Common patterns: - # 2020-tesla-impact-report.pdf - # google-2023-environmental-report.pdf - # ghgp_data_2021.xlsx - - company_name = "Unknown" - year = "N/A" - sector = "Other" - - lower_name = filename.lower() - - # Extract Year - import re - year_match = re.search(r'20\d{2}', lower_name) - if year_match: - year = year_match.group(0) - - # Extract Company (heuristics) - if 'tesla' in lower_name: - company_name = "Tesla" - sector = "Automotive" - elif 'google' in lower_name: - company_name = "Google" - sector = "Tech" - elif 'apple' in lower_name: - company_name = "Apple" - sector = "Tech" - elif 'microsoft' in lower_name: - company_name = "Microsoft" - sector = "Tech" - elif 'amazon' in lower_name: - company_name = "Amazon" - sector = "Tech" - elif 'boeing' in lower_name: - company_name = "Boeing" - sector = "Aerospace" - elif 'ghgp' in lower_name: - company_name = "GHGP Data" - sector = "Data" - elif 'salesforce' in lower_name: - company_name = "Salesforce" - sector = "Tech" - elif 'hp ' in lower_name or 'hp-' in lower_name: - company_name = "HP" - sector = "Tech" - else: - # Fallback: capitalize first word of filename - parts = re.split(r'[-_.]', filename) - if parts: - company_name = parts[0].capitalize() - if company_name.isdigit(): # If starts with year - company_name = parts[1].capitalize() if len(parts) > 1 else "Unknown" +@reports_bp .route ('/',methods =['GET']) +def get_reports (): + try : - unique_reports[filename] = { - 'company_name': company_name, - 'year': year, - 'sector': sector, - 'greenwashing_score': meta.get('greenwashing_score', 0), # Likely 0 - 'filename': filename, - 'title': f"{company_name} {year} Report" + + metadatas =get_all_metadatas () + + unique_reports ={} + + for meta in metadatas : + filename =meta .get ('source')or meta .get ('filename') + if not filename : + continue + + + if meta .get ('type')=='incident_report'or filename .startswith ('incident_'): + continue + + + if filename not in unique_reports : + + + + + + + company_name ="Unknown" + year ="N/A" + sector ="Other" + + lower_name =filename .lower () + + + import re + year_match =re .search (r'20\d{2}',lower_name ) + if year_match : + year =year_match .group (0 ) + + + if 'tesla'in lower_name : + company_name ="Tesla" + sector ="Automotive" + elif 'google'in lower_name : + company_name ="Google" + sector ="Tech" + elif 'apple'in lower_name : + company_name ="Apple" + sector ="Tech" + elif 'microsoft'in lower_name : + company_name ="Microsoft" + sector ="Tech" + elif 'amazon'in lower_name : + company_name ="Amazon" + sector ="Tech" + elif 'boeing'in lower_name : + company_name ="Boeing" + sector ="Aerospace" + elif 'ghgp'in lower_name : + company_name ="GHGP Data" + sector ="Data" + elif 'salesforce'in lower_name : + company_name ="Salesforce" + sector ="Tech" + elif 'hp 'in lower_name or 'hp-'in lower_name : + company_name ="HP" + sector ="Tech" + else : + + parts =re .split (r'[-_.]',filename ) + if parts : + company_name =parts [0 ].capitalize () + if company_name .isdigit (): + company_name =parts [1 ].capitalize ()if len (parts )>1 else "Unknown" + + unique_reports [filename ]={ + 'company_name':company_name , + 'year':year , + 'sector':sector , + 'greenwashing_score':meta .get ('greenwashing_score',0 ), + 'filename':filename , + 'title':f"{company_name } {year } Report" } - - reports_list = list(unique_reports.values()) - return jsonify(reports_list) - - except Exception as e: - print(f"Error fetching reports: {e}") - import traceback - traceback.print_exc() - return jsonify({'error': str(e)}), 500 -@reports_bp.route('/search', methods=['POST']) -def search_reports(): - data = request.json - query = data.get('query', '') - - if not query: - return jsonify([]) - - try: - import re - - # Get embedding for the query - query_embedding = get_embedding(query) - - # Search in Chroma - get more results to filter - results = search_documents(query_embedding, num_results=50) - - query_lower = query.lower() - - # Helper function to extract company info - def extract_company_info(filename): - company_name = "Unknown" - year = "N/A" - sector = "Other" - - lower_name = filename.lower() - - # Extract Year - year_match = re.search(r'20\d{2}', lower_name) - if year_match: - year = year_match.group(0) - - # Extract Company (heuristics) - if 'tesla' in lower_name: - company_name = "Tesla" - sector = "Automotive" - elif 'google' in lower_name: - company_name = "Google" - sector = "Tech" - elif 'apple' in lower_name: - company_name = "Apple" - sector = "Tech" - elif 'microsoft' in lower_name: - company_name = "Microsoft" - sector = "Tech" - elif 'amazon' in lower_name: - company_name = "Amazon" - sector = "Tech" - elif 'boeing' in lower_name: - company_name = "Boeing" - sector = "Aerospace" - elif 'ghgp' in lower_name: - company_name = "GHGP Data" - sector = "Data" - elif 'salesforce' in lower_name: - company_name = "Salesforce" - sector = "Tech" - elif 'hp ' in lower_name or 'hp-' in lower_name or lower_name.startswith('hp'): - company_name = "HP" - sector = "Tech" - else: - parts = re.split(r'[-_.]', filename) - if parts: - company_name = parts[0].capitalize() - if company_name.isdigit(): - company_name = parts[1].capitalize() if len(parts) > 1 else "Unknown" - - return company_name, year, sector - - output = [] - seen_filenames = set() - - for item in results: - meta = item.get('metadata', {}) - text = item.get('text', '') - - filename = meta.get('source') or meta.get('filename', 'Unknown') - - # Skip duplicates - if filename in seen_filenames: - continue - seen_filenames.add(filename) - - company_name, year, sector = extract_company_info(filename) - - # Calculate match score - boost if query matches company/filename - match_boost = 0 - if query_lower in filename.lower(): - match_boost = 1000 # Strong boost for filename match - if query_lower in company_name.lower(): - match_boost = 1000 # Strong boost for company match - - # Semantic score (inverted distance, higher = better) - semantic_score = 1 / (item.get('score', 1) + 0.001) if item.get('score') else 0 - - combined_score = match_boost + semantic_score - - # Format snippet - snippet = text[:300] + "..." if len(text) > 300 else text - - output.append({ - 'company_name': company_name, - 'year': year, - 'filename': filename, - 'sector': sector, - 'greenwashing_score': meta.get('greenwashing_score', 0), - 'snippet': snippet, - 'relevance_score': item.get('score'), - '_combined_score': combined_score + reports_list =list (unique_reports .values ()) + return jsonify (reports_list ) + + except Exception as e : + print (f"Error fetching reports: {e }") + import traceback + traceback .print_exc () + return jsonify ({'error':str (e )}),500 + +@reports_bp .route ('/search',methods =['POST']) +def search_reports (): + data =request .json + query =data .get ('query','') + + if not query : + return jsonify ([]) + + try : + import re + + + query_embedding =get_embedding (query ) + + + results =search_documents (query_embedding ,num_results =50 ) + + query_lower =query .lower () + + + def extract_company_info (filename ): + company_name ="Unknown" + year ="N/A" + sector ="Other" + + lower_name =filename .lower () + + + year_match =re .search (r'20\d{2}',lower_name ) + if year_match : + year =year_match .group (0 ) + + + if 'tesla'in lower_name : + company_name ="Tesla" + sector ="Automotive" + elif 'google'in lower_name : + company_name ="Google" + sector ="Tech" + elif 'apple'in lower_name : + company_name ="Apple" + sector ="Tech" + elif 'microsoft'in lower_name : + company_name ="Microsoft" + sector ="Tech" + elif 'amazon'in lower_name : + company_name ="Amazon" + sector ="Tech" + elif 'boeing'in lower_name : + company_name ="Boeing" + sector ="Aerospace" + elif 'ghgp'in lower_name : + company_name ="GHGP Data" + sector ="Data" + elif 'salesforce'in lower_name : + company_name ="Salesforce" + sector ="Tech" + elif 'hp 'in lower_name or 'hp-'in lower_name or lower_name .startswith ('hp'): + company_name ="HP" + sector ="Tech" + else : + parts =re .split (r'[-_.]',filename ) + if parts : + company_name =parts [0 ].capitalize () + if company_name .isdigit (): + company_name =parts [1 ].capitalize ()if len (parts )>1 else "Unknown" + + return company_name ,year ,sector + + output =[] + seen_filenames =set () + + for item in results : + meta =item .get ('metadata',{}) + text =item .get ('text','') + + filename =meta .get ('source')or meta .get ('filename','Unknown') + + + if filename in seen_filenames : + continue + seen_filenames .add (filename ) + + company_name ,year ,sector =extract_company_info (filename ) + + + match_boost =0 + if query_lower in filename .lower (): + match_boost =1000 + if query_lower in company_name .lower (): + match_boost =1000 + + + semantic_score =1 /(item .get ('score',1 )+0.001 )if item .get ('score')else 0 + + combined_score =match_boost +semantic_score + + + snippet =text [:300 ]+"..."if len (text )>300 else text + + output .append ({ + 'company_name':company_name , + 'year':year , + 'filename':filename , + 'sector':sector , + 'greenwashing_score':meta .get ('greenwashing_score',0 ), + 'snippet':snippet , + 'relevance_score':item .get ('score'), + '_combined_score':combined_score }) - - # Sort by combined score (descending - higher is better) - output.sort(key=lambda x: x.get('_combined_score', 0), reverse=True) - - # Remove internal score field and limit results - for item in output: - item.pop('_combined_score', None) - - return jsonify(output[:20]) - except Exception as e: - print(f"Error searching reports: {e}") - return jsonify({'error': str(e)}), 500 -@reports_bp.route('/view/', methods=['GET']) -def view_report_file(filename): - import os - from flask import send_from_directory - - # Dataset path relative to this file - # src/routes/reports.py -> src/routes -> src -> backend -> dataset - # So ../../../dataset - current_dir = os.path.dirname(os.path.abspath(__file__)) - dataset_dir = os.path.join(current_dir, '..', '..', 'dataset') - - return send_from_directory(dataset_dir, filename) + + output .sort (key =lambda x :x .get ('_combined_score',0 ),reverse =True ) + + + for item in output : + item .pop ('_combined_score',None ) + + return jsonify (output [:20 ]) + except Exception as e : + print (f"Error searching reports: {e }") + return jsonify ({'error':str (e )}),500 + +@reports_bp .route ('/view/',methods =['GET']) +def view_report_file (filename ): + import os + from flask import send_from_directory + + + + + current_dir =os .path .dirname (os .path .abspath (__file__ )) + dataset_dir =os .path .join (current_dir ,'..','..','dataset') + + return send_from_directory (dataset_dir ,filename ) diff --git a/frontend/package.json b/frontend/package.json index 3ff0c7d..09a2963 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -30,6 +30,7 @@ "@sveltejs/kit": "^2.50.1", "@sveltejs/vite-plugin-svelte": "^5.1.1", "@tauri-apps/cli": "^2.9.6", + "@types/node": "^25.0.10", "svelte": "^5.48.2", "svelte-check": "^4.3.5", "typescript": "~5.6.3", diff --git a/frontend/server/index.js b/frontend/server/index.js index eca6c12..22b0d12 100644 --- a/frontend/server/index.js +++ b/frontend/server/index.js @@ -9,14 +9,14 @@ const __dirname = path.dirname(__filename); const app = express(); const PORT = process.env.PORT || 3000; -// Enable gzip compression + app.use(compression()); -// Serve static files from the build directory (one level up from server folder) + const buildPath = path.join(__dirname, '../build'); app.use(express.static(buildPath)); -// Handle SPA routing: serve index.html for any unknown routes + app.get(/.*/, (req, res) => { res.sendFile(path.join(buildPath, 'index.html')); }); diff --git a/frontend/src-tauri/src/lib.rs b/frontend/src-tauri/src/lib.rs index 4a277ef..5df8c86 100644 --- a/frontend/src-tauri/src/lib.rs +++ b/frontend/src-tauri/src/lib.rs @@ -1,4 +1,4 @@ -// Learn more about Tauri commands at https://tauri.app/develop/calling-rust/ + #[tauri::command] fn greet(name: &str) -> String { format!("Hello, {}! You've been greeted from Rust!", name) diff --git a/frontend/src-tauri/src/main.rs b/frontend/src-tauri/src/main.rs index 034cf91..96ca189 100644 --- a/frontend/src-tauri/src/main.rs +++ b/frontend/src-tauri/src/main.rs @@ -1,4 +1,4 @@ -// Prevents additional console window on Windows in release, DO NOT REMOVE!! + #![cfg_attr(not(debug_assertions), windows_subsystem = "windows")] fn main() { diff --git a/frontend/src/lib/components/ParallaxLandscape.svelte b/frontend/src/lib/components/ParallaxLandscape.svelte index 614c5a0..c7ca33a 100644 --- a/frontend/src/lib/components/ParallaxLandscape.svelte +++ b/frontend/src/lib/components/ParallaxLandscape.svelte @@ -201,7 +201,7 @@ const config = activeConfig(); if (!config.staticScene || config.scenes) { - // Always use window scroll now + scrollContainer = null; updateMeasurements(); } diff --git a/frontend/src/lib/components/catalogue/CatalogueHeader.svelte b/frontend/src/lib/components/catalogue/CatalogueHeader.svelte index 44477f3..db19c4b 100644 --- a/frontend/src/lib/components/catalogue/CatalogueHeader.svelte +++ b/frontend/src/lib/components/catalogue/CatalogueHeader.svelte @@ -1,5 +1,6 @@
- -
-

+ +
+

Sustainability Database

-

+

{#if viewMode === "company"} Search within verified company reports and impact assessments {:else} @@ -33,35 +42,35 @@

- -
+ +
Company User Reports @@ -69,33 +78,35 @@
{#if viewMode === "company"} -
+
- +
-
+
{#each categories as category} {/each}
+ + {/if}
diff --git a/frontend/src/lib/components/catalogue/CompanyModal.svelte b/frontend/src/lib/components/catalogue/CompanyModal.svelte index a5fd451..9ec5894 100644 --- a/frontend/src/lib/components/catalogue/CompanyModal.svelte +++ b/frontend/src/lib/components/catalogue/CompanyModal.svelte @@ -25,12 +25,13 @@ aria-label="Close modal" >
e.stopPropagation()} onkeydown={(e) => e.stopPropagation()} transition:scale={{ duration: 300, start: 0.95 }} - role="document" - tabindex="0" + role="dialog" + aria-modal="true" + tabindex="-1" >
diff --git a/frontend/src/lib/components/catalogue/IncidentModal.svelte b/frontend/src/lib/components/catalogue/IncidentModal.svelte index d8b2423..249e019 100644 --- a/frontend/src/lib/components/catalogue/IncidentModal.svelte +++ b/frontend/src/lib/components/catalogue/IncidentModal.svelte @@ -8,6 +8,7 @@ detected_brand: string; user_description?: string; created_at: string; + image_base64?: string; analysis: { verdict: string; confidence: string; @@ -25,7 +26,7 @@
e.key === "Escape" && onclose()} transition:fade={{ duration: 200 }} @@ -33,8 +34,10 @@ tabindex="0" aria-label="Close modal" > + +
e.stopPropagation()} onkeydown={(e) => e.stopPropagation()} transition:scale={{ duration: 300, start: 0.95 }} @@ -42,105 +45,138 @@ tabindex="0" >
-
+
-

+

{incident.product_name}

{#if incident.detected_brand && incident.detected_brand !== "Unknown"} - Brand: {incident.detected_brand} {/if}
-
- -
+
+ + {#if incident.image_base64} +
+ Evidence +
+ + Evidence of Greenwashing +
+
+ {/if} + +
{incident.analysis?.severity || "UNKNOWN"} SEVERITY {incident.analysis?.confidence || "UNKNOWN"} CONFIDENCE {new Date(incident.created_at).toLocaleDateString()}
- -
+ +

- + Verdict

{incident.analysis?.verdict || "Greenwashing detected"}

- -
+ +

- + Detailed Analysis

-

+

{incident.analysis?.reasoning || "No detailed analysis available."}

- + {#if incident.analysis?.red_flags && incident.analysis.red_flags.length > 0}

- + Red Flags Identified

-
    +
      {#each incident.analysis.red_flags as flag}
    • {/if} - + {#if incident.analysis?.key_claims && incident.analysis.key_claims.length > 0}

      - + Environmental Claims Made

      -
        +
          {#each incident.analysis.key_claims as claim}
        • {/if} - + {#if incident.analysis?.recommendations}

          - + Consumer Recommendations

          -

          +

          {incident.analysis.recommendations}

          {/if} - + {#if incident.user_description}

          - + Original User Report

          "{incident.user_description}"

          @@ -222,7 +260,7 @@
          diff --git a/frontend/src/lib/components/chat/LoadingBubble.svelte b/frontend/src/lib/components/chat/LoadingBubble.svelte new file mode 100644 index 0000000..5a19328 --- /dev/null +++ b/frontend/src/lib/components/chat/LoadingBubble.svelte @@ -0,0 +1,14 @@ +
          +
          + + + +
          +
          diff --git a/frontend/src/lib/components/chat/Mascot.svelte b/frontend/src/lib/components/chat/Mascot.svelte new file mode 100644 index 0000000..3b6eeaf --- /dev/null +++ b/frontend/src/lib/components/chat/Mascot.svelte @@ -0,0 +1,73 @@ + + +
          + +
          +
          diff --git a/frontend/src/routes/catalogue/+page.svelte b/frontend/src/routes/catalogue/+page.svelte index 0c8aa36..113b38b 100644 --- a/frontend/src/routes/catalogue/+page.svelte +++ b/frontend/src/routes/catalogue/+page.svelte @@ -8,13 +8,12 @@ import IncidentCard from "$lib/components/catalogue/IncidentCard.svelte"; import CompanyModal from "$lib/components/catalogue/CompanyModal.svelte"; import IncidentModal from "$lib/components/catalogue/IncidentModal.svelte"; - import Pagination from "$lib/components/catalogue/Pagination.svelte"; - // View mode toggle + type ViewMode = "company" | "user"; let viewMode = $state("company"); - // Data Types + interface Report { company_name: string; year: string | number; @@ -31,6 +30,8 @@ detected_brand: string; user_description?: string; created_at: string; + image_base64?: string; + report_type?: "product" | "company"; analysis: { verdict: string; confidence: string; @@ -48,7 +49,7 @@ let searchQuery = $state(""); let isLoading = $state(false); - // Predefined categories + const categories = [ "All", "Tech", @@ -61,7 +62,7 @@ ]; let selectedCategory = $state("All"); - // Fetching logic + async function fetchReports() { isLoading = true; try { @@ -93,7 +94,7 @@ fetchIncidents(); }); - // Search + async function handleSearch() { if (!searchQuery.trim()) { fetchReports(); @@ -124,7 +125,7 @@ debounceTimer = setTimeout(handleSearch, 600); } - // Pagination & Filtering + let currentPage = $state(1); const itemsPerPage = 10; @@ -154,11 +155,10 @@ function goToPage(page: number) { if (page >= 1 && page <= totalPages) { currentPage = page; - window.scrollTo({ top: 0, behavior: "smooth" }); } } - // Modals + let selectedReport = $state(null); let selectedIncident = $state(null); @@ -172,7 +172,7 @@
          - + {#if selectedReport}
          {#if isLoading} -
          +
          -

          +

          Syncing with database...

          {:else if viewMode === "company"} {#if filteredReports.length === 0}
          -

          +

          No reports found matching your criteria.

          {:else} -
          +
          {#each paginatedReports as report} {/each}
          - {/if} {:else if incidents.length === 0}
          -

          No user reports yet.

          -

          +

          + No user reports yet. +

          +

          Be the first to report greenwashing!

          {:else} -
          +
          {#each incidents as incident} :global(body) { - background-color: #051010; + background-color: #0c0c0c; color: white; } diff --git a/frontend/src/routes/chat/+page.svelte b/frontend/src/routes/chat/+page.svelte index 4240dd8..843835f 100644 --- a/frontend/src/routes/chat/+page.svelte +++ b/frontend/src/routes/chat/+page.svelte @@ -1,10 +1,18 @@ @@ -149,426 +95,68 @@ /> -
          -
          +
          + -
          -
          -
          -
          - -
          -
          -
          -

          Ethix Assistant

          -
          +
          +
          + +
          + +
          +

          + Ethix Assistant +

          +
          Powered by Gemini
          -
          -
          + +
          +
          {#each messages as msg (msg.id)} -
          -
          - {@html marked.parse(msg.text)} -
          -
          + {/each} {#if isLoading} -
          -
          - - - -
          -
          + {/if}
          -
          - - -
          +
          diff --git a/frontend/src/routes/report-details/+page.svelte b/frontend/src/routes/report-details/+page.svelte index 961beee..6bc1195 100644 --- a/frontend/src/routes/report-details/+page.svelte +++ b/frontend/src/routes/report-details/+page.svelte @@ -30,7 +30,11 @@ {#if item}
          -

{#if item.impact === "High"} - This item takes 450+ years to decompose. Consider switching - to sustainable alternatives immediately. + This item takes 450+ years to decompose. + Consider switching to sustainable + alternatives immediately. {:else} - This item is eco-friendly or easily recyclable. Good choice! + This item is eco-friendly or easily + recyclable. Good choice! {/if}

-

Recommended Alternatives

+

+ Recommended Alternatives +

@@ -142,7 +152,10 @@
{#if item.impact !== "Low"} - + +
+
+
@@ -238,35 +282,70 @@
- Evidence (Photo) - + + {reportType === "product" + ? "Evidence (Photo)" + : "Company Report (PDF)"} + + {#if reportType === "product"} + + {:else} + + {/if}
{#if error} @@ -410,6 +489,51 @@ gap: 16px; } + .report-type-toggle { + display: flex; + background: rgba(255, 255, 255, 0.05); + border-radius: 12px; + padding: 4px; + border: 1px solid rgba(255, 255, 255, 0.1); + } + + .toggle-option { + flex: 1; + background: transparent; + border: none; + color: rgba(255, 255, 255, 0.6); + padding: 10px; + border-radius: 8px; + font-weight: 600; + font-size: 13px; + cursor: pointer; + transition: all 0.2s; + } + + .toggle-option:hover { + color: white; + background: rgba(255, 255, 255, 0.05); + } + + .toggle-option.active { + background: rgba(34, 197, 94, 0.15); + color: #4ade80; + } + + .pdf-name { + color: white !important; + font-size: 14px; + text-align: center; + padding: 0 20px; + word-break: break-all; + } + + .change-text { + font-size: 12px; + color: rgba(255, 255, 255, 0.4); + margin-top: 4px; + } + .form-group { display: flex; flex-direction: column;