Code Update

This commit is contained in:
2026-01-24 04:01:05 +00:00
parent c87279ed52
commit aad7e6e08d
13 changed files with 18 additions and 469 deletions

View File

@@ -5,9 +5,8 @@ from .routes.rag import rag_bp
def create_app():
app = Flask(__name__)
CORS(app) # Enable CORS for all routes
CORS(app)
# Register Blueprints
app.register_blueprint(main_bp)
app.register_blueprint(rag_bp, url_prefix='/api/rag')

View File

@@ -2,10 +2,6 @@ from google import genai
import os
def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
"""
Generates content using the Google GenAI SDK.
Defaults to gemini-2.0-flash-exp as per request (or similar).
"""
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
return "Error: GOOGLE_API_KEY not found."

View File

@@ -2,9 +2,6 @@ from google import genai
import os
def get_embedding(text, model="gemini-embedding-001"):
"""
Generates an embedding for the given text using the Gemini API.
"""
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY environment variable not set")
@@ -17,9 +14,6 @@ def get_embedding(text, model="gemini-embedding-001"):
return result.embeddings[0].values
def get_embeddings_batch(texts, model="gemini-embedding-001"):
"""
Generates embeddings for a list of texts.
"""
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY environment variable not set")
@@ -29,5 +23,4 @@ def get_embeddings_batch(texts, model="gemini-embedding-001"):
model=model,
contents=texts
)
# The SDK returns a list of embedding objects
return [emb.values for emb in result.embeddings]

View File

@@ -4,18 +4,10 @@ import io
import os
def load_csv(file_path):
"""
Loads a CSV file and returns a list of strings (one per row).
This is a simplistic implementation - in production you might want specific columns.
"""
df = pd.read_csv(file_path)
# Convert each row to a string representation
return df.apply(lambda x: ' | '.join(x.astype(str)), axis=1).tolist()
def load_pdf(file_path):
"""
Loads a PDF file and returns a list of strings (one per page).
"""
reader = PdfReader(file_path)
text_chunks = []
for page in reader.pages:
@@ -25,9 +17,6 @@ def load_pdf(file_path):
return text_chunks
def process_file(file_path):
"""
Determines file type and returns text chunks.
"""
ext = os.path.splitext(file_path)[1].lower()
if ext == '.csv':
return load_csv(file_path)

View File

@@ -9,14 +9,10 @@ def get_mongo_client():
return MongoClient(uri)
def ingest_documents(text_chunks, collection_name="rag_documents"):
"""
Generates embeddings for text chunks and stores them in MongoDB.
"""
client = get_mongo_client()
db = client.get_database("vectors_db") # Default DB name
db = client.get_database("vectors_db")
collection = db[collection_name]
# Generate embeddings in batches (handling API limits might be needed for large sets)
embeddings = get_embeddings_batch(text_chunks)
documents = []
@@ -32,10 +28,6 @@ def ingest_documents(text_chunks, collection_name="rag_documents"):
return 0
def vector_search(query_text, collection_name="rag_documents", num_results=5):
"""
Performs a vector search in MongoDB.
"""
# 1. Get embedding for the query
from .embeddings import get_embedding
query_embedding = get_embedding(query_text)
@@ -43,7 +35,6 @@ def vector_search(query_text, collection_name="rag_documents", num_results=5):
db = client.get_database("vectors_db")
collection = db[collection_name]
# Note: You must have a vector search index defined in MongoDB Atlas for this to work.
pipeline = [
{
"$vectorSearch": {