Complete Refactor
This commit is contained in:
28
Backend/src/audio/processor.py
Normal file
28
Backend/src/audio/processor.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from scipy.io import wavfile
|
||||
import numpy as np
|
||||
import torchaudio
|
||||
|
||||
def load_audio(file_path):
|
||||
sample_rate, audio_data = wavfile.read(file_path)
|
||||
return sample_rate, audio_data
|
||||
|
||||
def normalize_audio(audio_data):
|
||||
audio_data = audio_data.astype(np.float32)
|
||||
max_val = np.max(np.abs(audio_data))
|
||||
if max_val > 0:
|
||||
audio_data /= max_val
|
||||
return audio_data
|
||||
|
||||
def reduce_noise(audio_data, noise_factor=0.1):
|
||||
noise = np.random.randn(len(audio_data))
|
||||
noisy_audio = audio_data + noise_factor * noise
|
||||
return noisy_audio
|
||||
|
||||
def save_audio(file_path, sample_rate, audio_data):
|
||||
torchaudio.save(file_path, torch.tensor(audio_data).unsqueeze(0), sample_rate)
|
||||
|
||||
def process_audio(file_path, output_path):
|
||||
sample_rate, audio_data = load_audio(file_path)
|
||||
normalized_audio = normalize_audio(audio_data)
|
||||
denoised_audio = reduce_noise(normalized_audio)
|
||||
save_audio(output_path, sample_rate, denoised_audio)
|
||||
35
Backend/src/audio/streaming.py
Normal file
35
Backend/src/audio/streaming.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from flask import Blueprint, request
|
||||
from flask_socketio import SocketIO, emit
|
||||
from src.audio.processor import process_audio
|
||||
from src.services.transcription_service import TranscriptionService
|
||||
from src.services.tts_service import TextToSpeechService
|
||||
|
||||
streaming_bp = Blueprint('streaming', __name__)
|
||||
socketio = SocketIO()
|
||||
|
||||
transcription_service = TranscriptionService()
|
||||
tts_service = TextToSpeechService()
|
||||
|
||||
@socketio.on('audio_stream')
|
||||
def handle_audio_stream(data):
|
||||
audio_chunk = data['audio']
|
||||
speaker_id = data['speaker']
|
||||
|
||||
# Process the audio chunk
|
||||
processed_audio = process_audio(audio_chunk)
|
||||
|
||||
# Transcribe the audio to text
|
||||
transcription = transcription_service.transcribe(processed_audio)
|
||||
|
||||
# Generate a response using the LLM
|
||||
response_text = generate_response(transcription, speaker_id)
|
||||
|
||||
# Convert the response text back to audio
|
||||
response_audio = tts_service.convert_text_to_speech(response_text, speaker_id)
|
||||
|
||||
# Emit the response audio back to the client
|
||||
emit('audio_response', {'audio': response_audio})
|
||||
|
||||
def generate_response(transcription, speaker_id):
|
||||
# Placeholder for the actual response generation logic
|
||||
return f"Response to: {transcription}"
|
||||
Reference in New Issue
Block a user