hello

2026-02-04 03:34:34 -05:00 · 2026-01-24 04:26:02 -05:00
parent 4298368b63
commit 9ac637cb41
9 changed files with 581 additions and 23 deletions
--- a/backend/app.py
+++ b/backend/app.py
@@ -1,9 +1,47 @@
-from dotenv import load_dotenv
+import os
-load_dotenv()
+from flask import Flask, request, jsonify
 from flask_cors import CORS
-from src import create_app
+from src.rag.gemeni import GeminiClient
 from src.mongo import get_database
-app = create_app()
+app = Flask(__name__)
 CORS(app)  
 try:
    brain = GeminiClient()
    db = get_database()
    print("--- Backend Components Initialized Successfully ---")
 except Exception as e:
    print(f"CRITICAL ERROR during initialization: {e}")
@app.route('/')
 def health_check():
    return {
        "status": "online",
        "message": "The Waiter is ready at the counter!"
    }
@app.route('/chat', methods=['POST'])
 def chat():
    data = request.json
    user_query = data.get("message")
    if not user_query:
        return jsonify({"error": "You didn't say anything!"}), 400
    try:
        context = ""
        ai_reply = brain.ask(user_query, context)
        return jsonify({
            "status": "success",
            "reply": ai_reply
        })
    except Exception as e:
        return jsonify({
            "status": "error",
            "message": str(e)
        }), 500
 if __name__ == "__main__":
-    app.run(host='0.0.0.0', port=5000)
+    app.run(debug=True, port=5000)
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -1,7 +1,7 @@
 flask
 gunicorn
 ultralytics
-opencv-python-headless
+opencv-python
 transformers
 torch
 pandas
--- a/backend/scripts/populate_db.py
+++ b/backend/scripts/populate_db.py
@@ -50,8 +50,8 @@ def populate_from_dataset(dataset_dir, category=None):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Populate vector database from dataset files")
-    parser.add_argument("--category", "-c", type=str, help="Category to assign to ingested documents")
+    parser.add_argument("--category", "-c", type=str)
-    parser.add_argument("--dir", "-d", type=str, default=None, help="Dataset directory path")
+    parser.add_argument("--dir", "-d", type=str, default=None)
    args = parser.parse_args()
    if args.dir:
--- a/backend/src/chroma/vector_store.py
+++ b/backend/src/chroma/vector_store.py
@@ -17,9 +17,7 @@ def get_collection(collection_name=COLLECTION_NAME):
 def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadata_list=None):
    collection = get_collection(collection_name)
    ids = [f"doc_{i}_{hash(text)}" for i, text in enumerate(texts)]
    if metadata_list:
        collection.add(
            ids=ids,
@@ -33,22 +31,17 @@ def insert_documents(texts, embeddings, collection_name=COLLECTION_NAME, metadat
            embeddings=embeddings,
            documents=texts
        )
    return len(texts)
 def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_results=5, filter_metadata=None):
    collection = get_collection(collection_name)
    query_params = {
        "query_embeddings": [query_embedding],
        "n_results": num_results
    }
    if filter_metadata:
        query_params["where"] = filter_metadata
    results = collection.query(**query_params)
    output = []
    if results and results["documents"]:
        for i, doc in enumerate(results["documents"][0]):
@@ -57,7 +50,6 @@ def search_documents(query_embedding, collection_name=COLLECTION_NAME, num_resul
                "text": doc,
                "score": score
            })
    return output
 def delete_documents_by_source(source_file, collection_name=COLLECTION_NAME):
--- a/backend/src/cv/init.py
+++ b/backend/src/cv/init.py
@@ -0,0 +1,474 @@
 import cv2
 import numpy as np
 import os
 import json
 from pathlib import Path
 from typing import List, Dict, Optional, Tuple
 CV_DIR = Path(__file__).parent
 DATA_DIR = CV_DIR / "data"
 MODELS_DIR = CV_DIR / "models"
 SUPER_CATEGORIES = {
    "Food": 932,
    "Clothes": 604,
    "Necessities": 432,
    "Others": 371,
    "Electronic": 224,
    "Transportation": 213,
    "Leisure": 111,
    "Sports": 66,
    "Medical": 47
 }
 COMMON_BRANDS = [
    "McDonalds", "Starbucks", "CocaCola", "Pepsi", "KFC", "BurgerKing", 
    "Subway", "DunkinDonuts", "PizzaHut", "Dominos", "Nestle", "Heineken",
    "Nike", "Adidas", "Puma", "UnderArmour", "Levis", "HM", "Zara", "Gap",
    "Gucci", "LouisVuitton", "Chanel", "Versace", "Prada", "Armani",
    "Apple", "Samsung", "HP", "Dell", "Intel", "AMD", "Nvidia", "Microsoft",
    "Sony", "LG", "Huawei", "Xiaomi", "Lenovo", "Asus", "Acer",
    "BMW", "Mercedes", "Audi", "Toyota", "Honda", "Ford", "Chevrolet",
    "Volkswagen", "Tesla", "Porsche", "Ferrari", "Lamborghini", "Nissan",
    "Google", "Facebook", "Twitter", "Instagram", "YouTube", "Amazon",
    "Netflix", "Spotify", "Uber", "Airbnb", "PayPal", "Visa", "Mastercard"
 ]
 class LogoDet3KDataset:
    def __init__(self, dataset_path: Optional[str] = None):
        self.dataset_path = None
        self.categories = {}
        self.brand_templates = {}
        if dataset_path and os.path.exists(dataset_path):
            self.dataset_path = Path(dataset_path)
        else:
            default_paths = [
                DATA_DIR / "LogoDet-3K",
                DATA_DIR / "logodet3k",
                Path.home() / "Downloads" / "LogoDet-3K",
                Path.home() / ".kaggle" / "datasets" / "lyly99" / "logodet3k",
            ]
            for path in default_paths:
                if path.exists():
                    self.dataset_path = path
                    break
        if self.dataset_path:
            self._load_categories()
            print(f"LogoDet-3K dataset loaded from: {self.dataset_path}")
            print(f"Found {len(self.categories)} brand categories")
        else:
            print("LogoDet-3K dataset not found locally.")
            print("\nTo download the dataset:")
            print("1. Install kaggle CLI: pip install kaggle")
            print("2. Download: kaggle datasets download -d lyly99/logodet3k")
            print("3. Extract to:", DATA_DIR / "LogoDet-3K")
    def _load_categories(self):
        if not self.dataset_path:
            return
        for super_cat in self.dataset_path.iterdir():
            if super_cat.is_dir() and not super_cat.name.startswith('.'):
                for brand_dir in super_cat.iterdir():
                    if brand_dir.is_dir():
                        brand_name = brand_dir.name
                        self.categories[brand_name] = {
                            "super_category": super_cat.name,
                            "path": brand_dir,
                            "images": list(brand_dir.glob("*.jpg")) + list(brand_dir.glob("*.png"))
                        }
    def get_brand_templates(self, brand_name: str, max_templates: int = 5) -> List[np.ndarray]:
        if brand_name not in self.categories:
            return []
        templates = []
        images = self.categories[brand_name]["images"][:max_templates]
        for img_path in images:
            img = cv2.imread(str(img_path))
            if img is not None:
                templates.append(img)
        return templates
    def get_all_brands(self) -> List[str]:
        return list(self.categories.keys())
    def get_brands_by_category(self, super_category: str) -> List[str]:
        return [
            name for name, info in self.categories.items()
            if info["super_category"].lower() == super_category.lower()
        ]
 class LogoDetector:
    def __init__(self, 
                 model_path: Optional[str] = None,
                 dataset_path: Optional[str] = None,
                 use_gpu: bool = True):
        self.model_path = model_path
        self.use_gpu = use_gpu
        self.net = None
        self.dataset = LogoDet3KDataset(dataset_path)
        self.conf_threshold = 0.3
        self.nms_threshold = 0.4
        self.orb = cv2.ORB_create(nfeatures=1000)
        self.bf_matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
        try:
            self.sift = cv2.SIFT_create()
            self.flann_matcher = cv2.FlannBasedMatcher(
                {"algorithm": 1, "trees": 5},
                {"checks": 50}
            )
        except:
            self.sift = None
            self.flann_matcher = None
        self.brand_features = {}
        self._load_model()
        self._cache_brand_features()
    def _load_model(self):
        if not self.model_path or not os.path.exists(self.model_path):
            return
        try:
            print(f"Loading model: {self.model_path}")
            if self.model_path.endswith('.onnx'):
                self.net = cv2.dnn.readNetFromONNX(self.model_path)
            else:
                self.net = cv2.dnn.readNet(self.model_path)
            if self.use_gpu:
                try:
                    self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
                    self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
                    print("✅ Using OpenCL GPU acceleration")
                except:
                    print("⚠️ GPU not available, using CPU")
            print("Model loaded successfully!")
        except Exception as e:
            print(f"Failed to load model: {e}")
            self.net = None
    def _cache_brand_features(self):
        if not self.dataset.categories:
            return
        print("Caching brand features (this may take a moment)...")
        brands_to_cache = [b for b in COMMON_BRANDS if b in self.dataset.categories][:50]
        for brand in brands_to_cache:
            templates = self.dataset.get_brand_templates(brand, max_templates=3)
            if templates:
                features = []
                for tmpl in templates:
                    gray = cv2.cvtColor(tmpl, cv2.COLOR_BGR2GRAY)
                    kp, des = self.orb.detectAndCompute(gray, None)
                    if des is not None:
                        features.append((kp, des))
                if features:
                    self.brand_features[brand] = features
        print(f"Cached features for {len(self.brand_features)} brands")
    def detect(self, frame: np.ndarray, conf_threshold: float = None) -> List[Dict]:
        if conf_threshold is None:
            conf_threshold = self.conf_threshold
        detections = []
        if self.net is not None:
            detections = self._detect_with_model(frame, conf_threshold)
        if not detections and self.brand_features:
            detections = self._detect_with_features(frame, conf_threshold)
        if not detections:
            detections = self._detect_logo_regions(frame)
        return detections
    def _detect_with_model(self, frame: np.ndarray, conf_threshold: float) -> List[Dict]:
        height, width = frame.shape[:2]
        blob = cv2.dnn.blobFromImage(
            frame,
            scalefactor=1/255.0,
            size=(640, 640),
            swapRB=True,
            crop=False
        )
        self.net.setInput(blob)
        try:
            output_names = self.net.getUnconnectedOutLayersNames()
            outputs = self.net.forward(output_names)
        except:
            outputs = [self.net.forward()]
        detections = []
        boxes = []
        confidences = []
        class_ids = []
        for output in outputs:
            if len(output.shape) == 3:
                output = output[0]
            for detection in output:
                if len(detection) < 5:
                    continue
                scores = detection[4:] if len(detection) > 5 else [detection[4]]
                class_id = np.argmax(scores) if len(scores) > 1 else 0
                confidence = float(scores[class_id]) if len(scores) > 1 else float(scores[0])
                if confidence > conf_threshold:
                    cx, cy, w, h = detection[:4]
                    scale_x = width / 640
                    scale_y = height / 640
                    x1 = int((cx - w/2) * scale_x)
                    y1 = int((cy - h/2) * scale_y)
                    x2 = int((cx + w/2) * scale_x)
                    y2 = int((cy + h/2) * scale_y)
                    boxes.append([x1, y1, x2-x1, y2-y1])
                    confidences.append(confidence)
                    class_ids.append(class_id)
        if boxes:
            indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, self.nms_threshold)
            for i in indices:
                idx = i[0] if isinstance(i, (list, tuple, np.ndarray)) else i
                x, y, w, h = boxes[idx]
                detections.append({
                    "bbox": (x, y, x + w, y + h),
                    "label": f"Logo-{class_ids[idx]}",
                    "confidence": confidences[idx],
                    "class_id": class_ids[idx]
                })
        return detections
    def _detect_with_features(self, frame: np.ndarray, conf_threshold: float) -> List[Dict]:
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        kp_frame, des_frame = self.orb.detectAndCompute(gray, None)
        if des_frame is None or len(kp_frame) < 10:
            return []
        detections = []
        best_matches = []
        for brand, feature_list in self.brand_features.items():
            for kp_tmpl, des_tmpl in feature_list:
                try:
                    matches = self.bf_matcher.match(des_tmpl, des_frame)
                    matches = sorted(matches, key=lambda x: x.distance)
                    good_matches = [m for m in matches[:50] if m.distance < 60]
                    if len(good_matches) >= 8:
                        pts = np.float32([kp_frame[m.trainIdx].pt for m in good_matches])
                        if len(pts) > 0:
                            x_min, y_min = pts.min(axis=0).astype(int)
                            x_max, y_max = pts.max(axis=0).astype(int)
                            avg_dist = np.mean([m.distance for m in good_matches])
                            confidence = max(0.3, 1.0 - (avg_dist / 100))
                            if confidence >= conf_threshold:
                                best_matches.append({
                                    "bbox": (x_min, y_min, x_max, y_max),
                                    "label": brand,
                                    "confidence": confidence,
                                    "match_count": len(good_matches)
                                })
                except Exception:
                    continue
        if best_matches:
            best_matches.sort(key=lambda x: x["confidence"], reverse=True)
            detections = best_matches[:5]
        return detections
    def _detect_logo_regions(self, frame: np.ndarray) -> List[Dict]:
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
        edges = cv2.Canny(blurred, 80, 200)
        kernel = np.ones((3, 3), np.uint8)
        edges = cv2.dilate(edges, kernel, iterations=1)
        edges = cv2.erode(edges, kernel, iterations=1)
        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        detections = []
        height, width = frame.shape[:2]
        min_area = (width * height) * 0.01 
        max_area = (width * height) * 0.15 
        for contour in contours:
            area = cv2.contourArea(contour)
            if area < min_area or area > max_area:
                continue
            x, y, w, h = cv2.boundingRect(contour)
            aspect_ratio = w / h if h > 0 else 0
            if aspect_ratio < 0.5 or aspect_ratio > 2.0:
                continue
            hull = cv2.convexHull(contour)
            hull_area = cv2.contourArea(hull)
            solidity = area / hull_area if hull_area > 0 else 0
            if solidity < 0.3:
                continue
            roi = gray[y:y+h, x:x+w]
            if roi.size == 0:
                continue
            corners = cv2.goodFeaturesToTrack(roi, 50, 0.01, 5)
            if corners is None or len(corners) < 15:
                continue
            roi_edges = edges[y:y+h, x:x+w]
            edge_density = np.sum(roi_edges > 0) / (w * h) if (w * h) > 0 else 0
            if edge_density < 0.05 or edge_density > 0.5:
                continue
            corner_score = min(1.0, len(corners) / 40)
            solidity_score = solidity
            aspect_score = 1.0 - abs(1.0 - aspect_ratio) / 2
            confidence = (corner_score * 0.4 + solidity_score * 0.3 + aspect_score * 0.3)
            if confidence >= 0.6:
                detections.append({
                    "bbox": (x, y, x + w, y + h),
                    "label": "Potential Logo",
                    "confidence": confidence,
                    "class_id": -1
                })
        detections.sort(key=lambda x: x["confidence"], reverse=True)
        return detections[:3]
    def draw_detections(self, frame: np.ndarray, detections: List[Dict]) -> np.ndarray:
        result = frame.copy()
        for det in detections:
            x1, y1, x2, y2 = det["bbox"]
            label = det["label"]
            conf = det["confidence"]
            if conf > 0.7:
                color = (0, 255, 0)
            elif conf > 0.5:
                color = (0, 255, 255)
            else:
                color = (0, 165, 255)
            cv2.rectangle(result, (x1, y1), (x2, y2), color, 2)
            label_text = f"{label}: {conf:.2f}"
            (text_w, text_h), _ = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            cv2.rectangle(result, (x1, y1 - text_h - 6), (x1 + text_w + 4, y1), color, -1)
            cv2.putText(result, label_text, (x1 + 2, y1 - 4),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
        return result
 def start_scanner(model_path: Optional[str] = None, 
                  dataset_path: Optional[str] = None,
                  use_gui: bool = True):
    print("=" * 60)
    print("LogoDet-3K Logo Scanner")
    print("3,000 logo categories | 9 super-categories | 200K+ objects")
    print("=" * 60)
    detector = LogoDetector(
        model_path=model_path,
        dataset_path=dataset_path,
        use_gpu=True
    )
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("\nError: Could not access camera.")
        return
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    writer = None
    output_path = CV_DIR / "output.mp4"
    print(f"\n📷 Camera: {width}x{height} @ {fps:.1f}fps")
    print("Press 'q' to quit\n")
    frame_count = 0
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame_count += 1
            detections = detector.detect(frame)
            result_frame = detector.draw_detections(frame, detections)
            info_text = f"Logos: {len(detections)} | Frame: {frame_count}"
            cv2.putText(result_frame, info_text, (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            if use_gui:
                try:
                    cv2.imshow('LogoDet-3K Scanner', result_frame)
                    key = cv2.waitKey(1) & 0xFF
                    if key == ord('q'):
                        break
                    elif key == ord('s'):
                        cv2.imwrite(str(CV_DIR / f"screenshot_{frame_count}.jpg"), result_frame)
                except cv2.error:
                    use_gui = False
                    writer = cv2.VideoWriter(
                        str(output_path),
                        cv2.VideoWriter_fourcc(*'mp4v'),
                        fps,
                        (width, height)
                    )
            if not use_gui and writer:
                writer.write(result_frame)
    except KeyboardInterrupt:
        pass
    finally:
        cap.release()
        if writer:
            writer.release()
        cv2.destroyAllWindows()
 if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", "-m", type=str)
    parser.add_argument("--dataset", "-d", type=str)
    parser.add_argument("--no-gui", action="store_true")
    args = parser.parse_args()
    start_scanner(model_path=args.model, dataset_path=args.dataset, use_gui=not args.no_gui)
--- a/backend/src/gemini/client.py
+++ b/backend/src/gemini/client.py
@@ -5,7 +5,6 @@ def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
    api_key = os.environ.get("GOOGLE_API_KEY")
    if not api_key:
        return "Error: GOOGLE_API_KEY not found."
    try:
        client = genai.Client(api_key=api_key)
        response = client.models.generate_content(
--- a/backend/src/mongo/init.py
+++ b/backend/src/mongo/init.py
@@ -0,0 +1,21 @@
 import os
 from pymongo import MongoClient
 from dotenv import load_dotenv
 script_dir = os.path.dirname(os.path.abspath(__file__))
 env_path = os.path.join(script_dir, '..', 'rag', '.env')
 load_dotenv(env_path)
 def get_database():
    uri = os.getenv("MONGO_URI")
    try:
        client = MongoClient(uri)
        db = client["my_rag_app"]
        print("SUCCESS: Connected to MongoDB Atlas!")
        return db
    except Exception as e:
        print(f"ERROR: Could not connect to MongoDB: {e}")
        return None
 if __name__ == "__main__":
    get_database()
--- a/backend/src/rag/gemeni.py
+++ b/backend/src/rag/gemeni.py
@@ -0,0 +1,40 @@
 import os
 from google import genai
 from dotenv import load_dotenv
 script_dir = os.path.dirname(os.path.abspath(__file__))
 load_dotenv(os.path.join(script_dir, '.env'))
 class GeminiClient:
    def __init__(self):
        self.api_key = os.getenv("GOOGLE_API_KEY")
        if not self.api_key:
            raise ValueError("No GOOGLE_API_KEY found in .env file!")
        self.client = genai.Client(api_key=self.api_key)
        self.model_name = "gemini-2.0-flash"
    def ask(self, prompt, context=""):
        try:
            if context:
                full_message = f"Use this information to answer: {context}\n\nQuestion: {prompt}"
            else:
                full_message = prompt
            response = self.client.models.generate_content(
                model=self.model_name,
                contents=full_message
            )
            return response.text
        except Exception as e:
            return f"Error talking to Gemini: {str(e)}"
 if __name__ == "__main__":
    try:
        brain = GeminiClient()
        print("--- Testing Class Connection ---")
        print(brain.ask("Hello! Give me a 1-sentence coding tip."))
    except Exception as e:
        print(f"Failed to start Gemini: {e}")
--- a/backend/src/rag/ingest.py
+++ b/backend/src/rag/ingest.py
@@ -8,7 +8,6 @@ def chunk_text(text, target_length=2000, overlap=100):
        return []
    chunks = []
    paragraphs = text.split('\n\n')
    current_chunk = ""
@@ -56,7 +55,6 @@ def load_pdf(file_path):
 def load_txt(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    return chunk_text(content)
 def load_xlsx(file_path):
@@ -69,19 +67,15 @@ def load_xlsx(file_path):
    for sheet_name, df in sheets.items():
        if df.empty:
            continue
        df = df.fillna("")
        for row in df.values:
            row_items = [str(x) for x in row if str(x).strip() != ""]
            if row_items:
                row_str = f"Sheet: {str(sheet_name)} | " + " | ".join(row_items)
                if len(row_str) > 8000:
                    all_rows.extend(chunk_text(row_str))
                else:
                    all_rows.append(row_str)
    return all_rows
 def process_file(file_path):