mirror of
https://github.com/SirBlobby/Hoya26.git
synced 2026-02-03 19:24:34 -05:00
Code Update
This commit is contained in:
@@ -5,9 +5,8 @@ from .routes.rag import rag_bp
|
||||
|
||||
def create_app():
|
||||
app = Flask(__name__)
|
||||
CORS(app) # Enable CORS for all routes
|
||||
CORS(app)
|
||||
|
||||
# Register Blueprints
|
||||
app.register_blueprint(main_bp)
|
||||
app.register_blueprint(rag_bp, url_prefix='/api/rag')
|
||||
|
||||
|
||||
@@ -2,10 +2,6 @@ from google import genai
|
||||
import os
|
||||
|
||||
def generate_content(prompt, model_name="gemini-2.0-flash-exp"):
|
||||
"""
|
||||
Generates content using the Google GenAI SDK.
|
||||
Defaults to gemini-2.0-flash-exp as per request (or similar).
|
||||
"""
|
||||
api_key = os.environ.get("GOOGLE_API_KEY")
|
||||
if not api_key:
|
||||
return "Error: GOOGLE_API_KEY not found."
|
||||
|
||||
@@ -2,9 +2,6 @@ from google import genai
|
||||
import os
|
||||
|
||||
def get_embedding(text, model="gemini-embedding-001"):
|
||||
"""
|
||||
Generates an embedding for the given text using the Gemini API.
|
||||
"""
|
||||
api_key = os.environ.get("GOOGLE_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("GOOGLE_API_KEY environment variable not set")
|
||||
@@ -17,9 +14,6 @@ def get_embedding(text, model="gemini-embedding-001"):
|
||||
return result.embeddings[0].values
|
||||
|
||||
def get_embeddings_batch(texts, model="gemini-embedding-001"):
|
||||
"""
|
||||
Generates embeddings for a list of texts.
|
||||
"""
|
||||
api_key = os.environ.get("GOOGLE_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("GOOGLE_API_KEY environment variable not set")
|
||||
@@ -29,5 +23,4 @@ def get_embeddings_batch(texts, model="gemini-embedding-001"):
|
||||
model=model,
|
||||
contents=texts
|
||||
)
|
||||
# The SDK returns a list of embedding objects
|
||||
return [emb.values for emb in result.embeddings]
|
||||
|
||||
@@ -4,18 +4,10 @@ import io
|
||||
import os
|
||||
|
||||
def load_csv(file_path):
|
||||
"""
|
||||
Loads a CSV file and returns a list of strings (one per row).
|
||||
This is a simplistic implementation - in production you might want specific columns.
|
||||
"""
|
||||
df = pd.read_csv(file_path)
|
||||
# Convert each row to a string representation
|
||||
return df.apply(lambda x: ' | '.join(x.astype(str)), axis=1).tolist()
|
||||
|
||||
def load_pdf(file_path):
|
||||
"""
|
||||
Loads a PDF file and returns a list of strings (one per page).
|
||||
"""
|
||||
reader = PdfReader(file_path)
|
||||
text_chunks = []
|
||||
for page in reader.pages:
|
||||
@@ -25,9 +17,6 @@ def load_pdf(file_path):
|
||||
return text_chunks
|
||||
|
||||
def process_file(file_path):
|
||||
"""
|
||||
Determines file type and returns text chunks.
|
||||
"""
|
||||
ext = os.path.splitext(file_path)[1].lower()
|
||||
if ext == '.csv':
|
||||
return load_csv(file_path)
|
||||
|
||||
@@ -9,14 +9,10 @@ def get_mongo_client():
|
||||
return MongoClient(uri)
|
||||
|
||||
def ingest_documents(text_chunks, collection_name="rag_documents"):
|
||||
"""
|
||||
Generates embeddings for text chunks and stores them in MongoDB.
|
||||
"""
|
||||
client = get_mongo_client()
|
||||
db = client.get_database("vectors_db") # Default DB name
|
||||
db = client.get_database("vectors_db")
|
||||
collection = db[collection_name]
|
||||
|
||||
# Generate embeddings in batches (handling API limits might be needed for large sets)
|
||||
embeddings = get_embeddings_batch(text_chunks)
|
||||
|
||||
documents = []
|
||||
@@ -32,10 +28,6 @@ def ingest_documents(text_chunks, collection_name="rag_documents"):
|
||||
return 0
|
||||
|
||||
def vector_search(query_text, collection_name="rag_documents", num_results=5):
|
||||
"""
|
||||
Performs a vector search in MongoDB.
|
||||
"""
|
||||
# 1. Get embedding for the query
|
||||
from .embeddings import get_embedding
|
||||
query_embedding = get_embedding(query_text)
|
||||
|
||||
@@ -43,7 +35,6 @@ def vector_search(query_text, collection_name="rag_documents", num_results=5):
|
||||
db = client.get_database("vectors_db")
|
||||
collection = db[collection_name]
|
||||
|
||||
# Note: You must have a vector search index defined in MongoDB Atlas for this to work.
|
||||
pipeline = [
|
||||
{
|
||||
"$vectorSearch": {
|
||||
|
||||
Reference in New Issue
Block a user