Demo Fixes 11

2025-03-30 07:30:23 -04:00
parent 263127ed18
commit 30388d816f
1 changed files with 32 additions and 13 deletions
--- a/Backend/server.py
+++ b/Backend/server.py
@@ -24,14 +24,26 @@ app = Flask(__name__)
 app.config['SECRET_KEY'] = 'your-secret-key'
 socketio = SocketIO(app, cors_allowed_origins="*")
-# Select the best available device
+# Check for CUDA availability and handle potential CUDA/cuDNN issues
-if torch.cuda.is_available():
+try:
    cuda_available = torch.cuda.is_available()
    # Try to initialize CUDA to check if libraries are properly loaded
    if cuda_available:
        _ = torch.zeros(1).cuda()
        device = "cuda"
        whisper_compute_type = "float16"
-elif torch.backends.mps.is_available():
+        print("CUDA is available and initialized successfully")
    elif torch.backends.mps.is_available():
        device = "mps"
        whisper_compute_type = "float32"
-else:
+        print("MPS is available (Apple Silicon)")
    else:
        device = "cpu"
        whisper_compute_type = "int8"
        print("Using CPU (CUDA/MPS not available)")
 except Exception as e:
    print(f"Error initializing CUDA: {e}")
    print("Falling back to CPU")
    device = "cpu"
    whisper_compute_type = "int8"
@@ -51,7 +63,9 @@ def load_models():
        print("Loading Whisper model...")
        # Import here to avoid immediate import errors if package is missing
        from faster_whisper import WhisperModel
-        whisper_model = WhisperModel("base", device=device, compute_type=whisper_compute_type, download_root="./models/whisper")
+        # Force CPU for Whisper if we had CUDA issues
        whisper_device = device if device != "cpu" else "cpu"
        whisper_model = WhisperModel("base", device=whisper_device, compute_type=whisper_compute_type, download_root="./models/whisper")
        print("Whisper model loaded successfully")
    except Exception as e:
        print(f"Error loading Whisper model: {e}")
@@ -60,7 +74,9 @@ def load_models():
    # Initialize CSM model for audio generation
    try:
        print("Loading CSM model...")
-        csm_generator = load_csm_1b(device=device)
+        # Force CPU for CSM if we had CUDA issues
        csm_device = device if device != "cpu" else "cpu"
        csm_generator = load_csm_1b(device=csm_device)
        print("CSM model loaded successfully")
    except Exception as e:
        print(f"Error loading CSM model: {e}")
@@ -71,11 +87,14 @@ def load_models():
        print("Loading Llama 3.2 model...")
        llm_model_id = "meta-llama/Llama-3.2-1B"  # Choose appropriate size based on resources
        llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_id, cache_dir="./models/llama")
        # Force CPU for LLM if we had CUDA issues
        llm_device = device if device != "cpu" else "cpu"
        llm_model = AutoModelForCausalLM.from_pretrained(
            llm_model_id,
-            torch_dtype=torch.bfloat16,
+            torch_dtype=torch.bfloat16 if llm_device != "cpu" else torch.float32,
-            device_map=device,
+            device_map=llm_device,
-            cache_dir="./models/llama"
+            cache_dir="./models/llama",
            low_cpu_mem_usage=True
        )
        print("Llama 3.2 model loaded successfully")
    except Exception as e: