Complete Refactor

2025-03-30 01:28:07 -04:00
parent 158afc78c7
commit 46be33b10a
28 changed files with 723 additions and 3081 deletions
--- a/Backend/src/audio/processor.py
+++ b/Backend/src/audio/processor.py
@@ -0,0 +1,28 @@
+from scipy.io import wavfile
+import numpy as np
+import torchaudio
+
+def load_audio(file_path):
+    sample_rate, audio_data = wavfile.read(file_path)
+    return sample_rate, audio_data
+
+def normalize_audio(audio_data):
+    audio_data = audio_data.astype(np.float32)
+    max_val = np.max(np.abs(audio_data))
+    if max_val > 0:
+        audio_data /= max_val
+    return audio_data
+
+def reduce_noise(audio_data, noise_factor=0.1):
+    noise = np.random.randn(len(audio_data))
+    noisy_audio = audio_data + noise_factor * noise
+    return noisy_audio
+
+def save_audio(file_path, sample_rate, audio_data):
+    torchaudio.save(file_path, torch.tensor(audio_data).unsqueeze(0), sample_rate)
+
+def process_audio(file_path, output_path):
+    sample_rate, audio_data = load_audio(file_path)
+    normalized_audio = normalize_audio(audio_data)
+    denoised_audio = reduce_noise(normalized_audio)
+    save_audio(output_path, sample_rate, denoised_audio)
--- a/Backend/src/audio/streaming.py
+++ b/Backend/src/audio/streaming.py
@@ -0,0 +1,35 @@
+from flask import Blueprint, request
+from flask_socketio import SocketIO, emit
+from src.audio.processor import process_audio
+from src.services.transcription_service import TranscriptionService
+from src.services.tts_service import TextToSpeechService
+
+streaming_bp = Blueprint('streaming', __name__)
+socketio = SocketIO()
+
+transcription_service = TranscriptionService()
+tts_service = TextToSpeechService()
+
+@socketio.on('audio_stream')
+def handle_audio_stream(data):
+    audio_chunk = data['audio']
+    speaker_id = data['speaker']
+    
+    # Process the audio chunk
+    processed_audio = process_audio(audio_chunk)
+    
+    # Transcribe the audio to text
+    transcription = transcription_service.transcribe(processed_audio)
+    
+    # Generate a response using the LLM
+    response_text = generate_response(transcription, speaker_id)
+    
+    # Convert the response text back to audio
+    response_audio = tts_service.convert_text_to_speech(response_text, speaker_id)
+    
+    # Emit the response audio back to the client
+    emit('audio_response', {'audio': response_audio})
+
+def generate_response(transcription, speaker_id):
+    # Placeholder for the actual response generation logic
+    return f"Response to: {transcription}"