Demo Fixes 11

2025-03-30 07:30:23 -04:00
parent 263127ed18
commit 30388d816f
1 changed files with 32 additions and 13 deletions
@@ -24,14 +24,26 @@ app = Flask(__name__)
 app.config['SECRET_KEY'] = 'your-secret-key'
 socketio = SocketIO(app, cors_allowed_origins="*")

-# Select the best available device
-if torch.cuda.is_available():
-    device = "cuda"
-    whisper_compute_type = "float16"
-elif torch.backends.mps.is_available():
-    device = "mps"
-    whisper_compute_type = "float32"
-else:
+# Check for CUDA availability and handle potential CUDA/cuDNN issues
+try:
+    cuda_available = torch.cuda.is_available()
+    # Try to initialize CUDA to check if libraries are properly loaded
+    if cuda_available:
+        _ = torch.zeros(1).cuda()
+        device = "cuda"
+        whisper_compute_type = "float16"
+        print("CUDA is available and initialized successfully")
+    elif torch.backends.mps.is_available():
+        device = "mps"
+        whisper_compute_type = "float32"
+        print("MPS is available (Apple Silicon)")
+    else:
+        device = "cpu"
+        whisper_compute_type = "int8"
+        print("Using CPU (CUDA/MPS not available)")
+except Exception as e:
+    print(f"Error initializing CUDA: {e}")
+    print("Falling back to CPU")
    device = "cpu"
    whisper_compute_type = "int8"
    
@@ -51,7 +63,9 @@ def load_models():
        print("Loading Whisper model...")
        # Import here to avoid immediate import errors if package is missing
        from faster_whisper import WhisperModel
-        whisper_model = WhisperModel("base", device=device, compute_type=whisper_compute_type, download_root="./models/whisper")
+        # Force CPU for Whisper if we had CUDA issues
+        whisper_device = device if device != "cpu" else "cpu"
+        whisper_model = WhisperModel("base", device=whisper_device, compute_type=whisper_compute_type, download_root="./models/whisper")
        print("Whisper model loaded successfully")
    except Exception as e:
        print(f"Error loading Whisper model: {e}")
@@ -60,7 +74,9 @@ def load_models():
    # Initialize CSM model for audio generation
    try:
        print("Loading CSM model...")
-        csm_generator = load_csm_1b(device=device)
+        # Force CPU for CSM if we had CUDA issues
+        csm_device = device if device != "cpu" else "cpu"
+        csm_generator = load_csm_1b(device=csm_device)
        print("CSM model loaded successfully")
    except Exception as e:
        print(f"Error loading CSM model: {e}")
@@ -71,11 +87,14 @@ def load_models():
        print("Loading Llama 3.2 model...")
        llm_model_id = "meta-llama/Llama-3.2-1B"  # Choose appropriate size based on resources
        llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_id, cache_dir="./models/llama")
+        # Force CPU for LLM if we had CUDA issues
+        llm_device = device if device != "cpu" else "cpu"
        llm_model = AutoModelForCausalLM.from_pretrained(
            llm_model_id,
-            torch_dtype=torch.bfloat16,
-            device_map=device,
-            cache_dir="./models/llama"
+            torch_dtype=torch.bfloat16 if llm_device != "cpu" else torch.float32,
+            device_map=llm_device,
+            cache_dir="./models/llama",
+            low_cpu_mem_usage=True
        )
        print("Llama 3.2 model loaded successfully")
    except Exception as e: