From bb5e0c4765f010d4bd313d1b4d7198e43c764ac5 Mon Sep 17 00:00:00 2001 From: GamerBoss101 Date: Sun, 30 Mar 2025 00:17:39 -0400 Subject: [PATCH] Demo Fixes 1 --- Backend/server.py | 60 +++++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/Backend/server.py b/Backend/server.py index a6b70a3..d0dee80 100644 --- a/Backend/server.py +++ b/Backend/server.py @@ -75,32 +75,51 @@ except Exception as model_error: # Already tried CPU and it failed raise RuntimeError("Failed to load speech synthesis model on any device") +# Replace the WhisperX model loading section + # Initialize WhisperX for ASR with robust error handling print("Loading WhisperX model...") +asr_model = None # Initialize to None first to avoid scope issues + try: - # First try the smallest model ("tiny") to avoid memory issues - asr_model = whisperx.load_model("tiny", device, compute_type=compute_type) - print("WhisperX 'tiny' model loaded successfully") + # Always start with the tiny model on CPU for stability + asr_model = whisperx.load_model("tiny", "cpu", compute_type="int8") + print("WhisperX 'tiny' model loaded on CPU successfully") - # If tiny worked and we have CUDA, try upgrading to small + # If CPU works, try CUDA if available if device == "cuda": try: - asr_model = whisperx.load_model("small", device, compute_type=compute_type) - print("WhisperX 'small' model loaded successfully") - except Exception as upgrade_error: - print(f"Staying with 'tiny' model: {str(upgrade_error)}") + print("Trying to load WhisperX on CUDA...") + cuda_model = whisperx.load_model("tiny", "cuda", compute_type="float16") + # Test the model to ensure it works + test_audio = torch.zeros(16000) # 1 second of silence at 16kHz + _ = cuda_model.transcribe(test_audio.numpy(), batch_size=1) + # If we get here, CUDA works + asr_model = cuda_model + print("WhisperX model moved to CUDA successfully") + + # Try to upgrade to small model on CUDA + try: + small_model = whisperx.load_model("small", "cuda", compute_type="float16") + # Test it + _ = small_model.transcribe(test_audio.numpy(), batch_size=1) + asr_model = small_model + print("WhisperX 'small' model loaded on CUDA successfully") + except Exception as upgrade_error: + print(f"Staying with 'tiny' model on CUDA: {str(upgrade_error)}") + except Exception as cuda_error: + print(f"CUDA loading failed, staying with CPU model: {str(cuda_error)}") except Exception as e: - print(f"Error loading models on {device}: {str(e)}") - print("Falling back to CPU model") - try: - # Force CPU as last resort - device = "cpu" - compute_type = "int8" - asr_model = whisperx.load_model("tiny", "cpu", compute_type="int8") - print("WhisperX loaded on CPU as last resort") - except Exception as cpu_error: - print(f"Fatal error - could not load any model: {str(cpu_error)}") - raise RuntimeError("No ASR model could be loaded. Please check your CUDA installation.") + print(f"Error loading WhisperX model: {str(e)}") + # Create a minimal dummy model as last resort + class DummyModel: + def __init__(self): + self.device = "cpu" + def transcribe(self, *args, **kwargs): + return {"segments": [{"text": "Speech recognition currently unavailable."}]} + + asr_model = DummyModel() + print("WARNING: Using dummy transcription model - ASR functionality limited") # Silence detection parameters SILENCE_THRESHOLD = 0.01 # Adjust based on your audio normalization @@ -262,6 +281,8 @@ def encode_audio_data(audio_tensor: torch.Tensor) -> str: def transcribe_audio(audio_tensor: torch.Tensor) -> str: """Transcribe audio using WhisperX with robust error handling""" + global asr_model # Declare global at the beginning of the function + try: # Save the tensor to a temporary file temp_path = os.path.join(base_dir, "temp_audio.wav") @@ -291,7 +312,6 @@ def transcribe_audio(audio_tensor: torch.Tensor) -> str: # Try to load a CPU model as fallback try: - global asr_model # Move model to CPU and try again asr_model = whisperx.load_model("tiny", "cpu", compute_type="int8") result = asr_model.transcribe(audio, batch_size=1)