diff --git a/Backend/server.py b/Backend/server.py index 352f5cd..9e98d60 100644 --- a/Backend/server.py +++ b/Backend/server.py @@ -24,14 +24,26 @@ app = Flask(__name__) app.config['SECRET_KEY'] = 'your-secret-key' socketio = SocketIO(app, cors_allowed_origins="*") -# Select the best available device -if torch.cuda.is_available(): - device = "cuda" - whisper_compute_type = "float16" -elif torch.backends.mps.is_available(): - device = "mps" - whisper_compute_type = "float32" -else: +# Check for CUDA availability and handle potential CUDA/cuDNN issues +try: + cuda_available = torch.cuda.is_available() + # Try to initialize CUDA to check if libraries are properly loaded + if cuda_available: + _ = torch.zeros(1).cuda() + device = "cuda" + whisper_compute_type = "float16" + print("CUDA is available and initialized successfully") + elif torch.backends.mps.is_available(): + device = "mps" + whisper_compute_type = "float32" + print("MPS is available (Apple Silicon)") + else: + device = "cpu" + whisper_compute_type = "int8" + print("Using CPU (CUDA/MPS not available)") +except Exception as e: + print(f"Error initializing CUDA: {e}") + print("Falling back to CPU") device = "cpu" whisper_compute_type = "int8" @@ -51,7 +63,9 @@ def load_models(): print("Loading Whisper model...") # Import here to avoid immediate import errors if package is missing from faster_whisper import WhisperModel - whisper_model = WhisperModel("base", device=device, compute_type=whisper_compute_type, download_root="./models/whisper") + # Force CPU for Whisper if we had CUDA issues + whisper_device = device if device != "cpu" else "cpu" + whisper_model = WhisperModel("base", device=whisper_device, compute_type=whisper_compute_type, download_root="./models/whisper") print("Whisper model loaded successfully") except Exception as e: print(f"Error loading Whisper model: {e}") @@ -60,7 +74,9 @@ def load_models(): # Initialize CSM model for audio generation try: print("Loading CSM model...") - csm_generator = load_csm_1b(device=device) + # Force CPU for CSM if we had CUDA issues + csm_device = device if device != "cpu" else "cpu" + csm_generator = load_csm_1b(device=csm_device) print("CSM model loaded successfully") except Exception as e: print(f"Error loading CSM model: {e}") @@ -71,11 +87,14 @@ def load_models(): print("Loading Llama 3.2 model...") llm_model_id = "meta-llama/Llama-3.2-1B" # Choose appropriate size based on resources llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_id, cache_dir="./models/llama") + # Force CPU for LLM if we had CUDA issues + llm_device = device if device != "cpu" else "cpu" llm_model = AutoModelForCausalLM.from_pretrained( llm_model_id, - torch_dtype=torch.bfloat16, - device_map=device, - cache_dir="./models/llama" + torch_dtype=torch.bfloat16 if llm_device != "cpu" else torch.float32, + device_map=llm_device, + cache_dir="./models/llama", + low_cpu_mem_usage=True ) print("Llama 3.2 model loaded successfully") except Exception as e: