Populate DB Chromadb

This commit is contained in:
2026-01-24 07:52:48 +00:00
parent d145f7e94c
commit 4298368b63
10 changed files with 279 additions and 48 deletions

View File

@@ -1,26 +1,32 @@
from google import genai
import ollama
import os
def get_embedding(text, model="gemini-embedding-001"):
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY environment variable not set")
client = genai.Client(api_key=api_key)
result = client.models.embed_content(
model=model,
contents=text
)
return result.embeddings[0].values
client = ollama.Client(host="https://ollama.sirblob.co")
DEFAULT_MODEL = "nomic-embed-text:latest"
def get_embeddings_batch(texts, model="gemini-embedding-001"):
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY environment variable not set")
client = genai.Client(api_key=api_key)
result = client.models.embed_content(
model=model,
contents=texts
)
return [emb.values for emb in result.embeddings]
def get_embedding(text, model=DEFAULT_MODEL):
try:
response = client.embeddings(model=model, prompt=text)
return response["embedding"]
except Exception as e:
print(f"Error getting embedding from Ollama: {e}")
raise e
def get_embeddings_batch(texts, model=DEFAULT_MODEL, batch_size=50):
all_embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
try:
response = client.embed(model=model, input=batch)
if "embeddings" in response:
all_embeddings.extend(response["embeddings"])
else:
raise ValueError("Unexpected response format from client.embed")
except Exception as e:
print(f"Error embedding batch {i}-{i+batch_size}: {e}")
raise e
return all_embeddings