Demo Update 30

This commit is contained in:
2025-03-30 11:45:44 -04:00
parent 693a8d98e6
commit bb089c09d0

View File

@@ -541,10 +541,12 @@ def generate_ai_response(user_text, session_id):
inputs = llm_tokenizer(prompt, return_tensors="pt").to(device) inputs = llm_tokenizer(prompt, return_tensors="pt").to(device)
output = llm_model.generate( output = llm_model.generate(
inputs.input_ids, inputs.input_ids,
attention_mask=inputs.attention_mask, # Add attention mask
max_new_tokens=100, # Keep responses shorter for voice max_new_tokens=100, # Keep responses shorter for voice
temperature=0.7, temperature=0.7,
top_p=0.9, top_p=0.9,
do_sample=True do_sample=True,
pad_token_id=llm_tokenizer.eos_token_id # Explicitly set pad_token_id
) )
response = llm_tokenizer.decode(output[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) response = llm_tokenizer.decode(output[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)