Files
HooHacks-12/Backend/server.py
2025-03-30 01:28:07 -04:00

53 lines
1.8 KiB
Python

import os
import logging
import torch
import eventlet
import base64
import tempfile
from io import BytesIO
from flask import Flask, render_template, request, jsonify
from flask_socketio import SocketIO, emit
import whisper
import torchaudio
from src.models.conversation import Segment
from src.services.tts_service import load_csm_1b
from src.llm.generator import generate_llm_response
from transformers import AutoTokenizer, AutoModelForCausalLM
from src.audio.streaming import AudioStreamer
from src.services.transcription_service import TranscriptionService
from src.services.tts_service import TextToSpeechService
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
app = Flask(__name__, static_folder='static', template_folder='templates')
app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', 'your-secret-key')
socketio = SocketIO(app)
# Initialize services
transcription_service = TranscriptionService()
tts_service = TextToSpeechService()
audio_streamer = AudioStreamer()
@socketio.on('audio_input')
def handle_audio_input(data):
audio_chunk = data['audio']
speaker_id = data['speaker']
# Process audio and convert to text
text = transcription_service.transcribe(audio_chunk)
logging.info(f"Transcribed text: {text}")
# Generate response using Llama 3.2
response_text = tts_service.generate_response(text, speaker_id)
logging.info(f"Generated response: {response_text}")
# Convert response text to audio
audio_response = tts_service.text_to_speech(response_text, speaker_id)
# Stream audio response back to client
socketio.emit('audio_response', {'audio': audio_response})
if __name__ == '__main__':
socketio.run(app, host='0.0.0.0', port=5000)