diff --git a/Backend/server.py b/Backend/server.py
index 9e98d60..93fac92 100644
--- a/Backend/server.py
+++ b/Backend/server.py
@@ -13,6 +13,11 @@ import requests
 import huggingface_hub
 from generator import load_csm_1b, Segment
 
+# Force CPU mode regardless of what's available
+# This bypasses the CUDA/cuDNN library requirements
+os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Hide all CUDA devices
+torch.backends.cudnn.enabled = False  # Disable cuDNN
+
 # Configure environment with longer timeouts
 os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600"  # 10 minutes timeout for downloads
 requests.adapters.DEFAULT_TIMEOUT = 60  # Increase default requests timeout
@@ -24,30 +29,10 @@ app = Flask(__name__)
 app.config['SECRET_KEY'] = 'your-secret-key'
 socketio = SocketIO(app, cors_allowed_origins="*")
 
-# Check for CUDA availability and handle potential CUDA/cuDNN issues
-try:
-    cuda_available = torch.cuda.is_available()
-    # Try to initialize CUDA to check if libraries are properly loaded
-    if cuda_available:
-        _ = torch.zeros(1).cuda()
-        device = "cuda"
-        whisper_compute_type = "float16"
-        print("CUDA is available and initialized successfully")
-    elif torch.backends.mps.is_available():
-        device = "mps"
-        whisper_compute_type = "float32"
-        print("MPS is available (Apple Silicon)")
-    else:
-        device = "cpu"
-        whisper_compute_type = "int8"
-        print("Using CPU (CUDA/MPS not available)")
-except Exception as e:
-    print(f"Error initializing CUDA: {e}")
-    print("Falling back to CPU")
-    device = "cpu"
-    whisper_compute_type = "int8"
-    
-print(f"Using device: {device}")
+# Force CPU regardless of what hardware is available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+whisper_compute_type = "int8"
+print(f"Forcing CPU mode for all models")
 
 # Initialize models with proper error handling
 whisper_model = None
@@ -60,12 +45,10 @@ def load_models():
     
     # Initialize Faster-Whisper for transcription
     try:
-        print("Loading Whisper model...")
+        print("Loading Whisper model on CPU...")
         # Import here to avoid immediate import errors if package is missing
         from faster_whisper import WhisperModel
-        # Force CPU for Whisper if we had CUDA issues
-        whisper_device = device if device != "cpu" else "cpu"
-        whisper_model = WhisperModel("base", device=whisper_device, compute_type=whisper_compute_type, download_root="./models/whisper")
+        whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8", download_root="./models/whisper")
         print("Whisper model loaded successfully")
     except Exception as e:
         print(f"Error loading Whisper model: {e}")
@@ -73,10 +56,8 @@ def load_models():
     
     # Initialize CSM model for audio generation
     try:
-        print("Loading CSM model...")
-        # Force CPU for CSM if we had CUDA issues
-        csm_device = device if device != "cpu" else "cpu"
-        csm_generator = load_csm_1b(device=csm_device)
+        print("Loading CSM model on CPU...")
+        csm_generator = load_csm_1b(device="cpu")
         print("CSM model loaded successfully")
     except Exception as e:
         print(f"Error loading CSM model: {e}")
@@ -84,15 +65,13 @@ def load_models():
     
     # Initialize Llama 3.2 model for response generation
     try:
-        print("Loading Llama 3.2 model...")
+        print("Loading Llama 3.2 model on CPU...")
         llm_model_id = "meta-llama/Llama-3.2-1B"  # Choose appropriate size based on resources
         llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_id, cache_dir="./models/llama")
-        # Force CPU for LLM if we had CUDA issues
-        llm_device = device if device != "cpu" else "cpu"
         llm_model = AutoModelForCausalLM.from_pretrained(
             llm_model_id,
-            torch_dtype=torch.bfloat16 if llm_device != "cpu" else torch.float32,
-            device_map=llm_device,
+            torch_dtype=torch.float32,  # Use float32 on CPU
+            device_map="cpu",
             cache_dir="./models/llama",
             low_cpu_mem_usage=True
         )
@@ -379,7 +358,7 @@ if __name__ == '__main__':
         os.rename('index.html', 'templates/index.html')
     
     # Load models asynchronously before starting the server
-    print("Starting model loading...")
+    print("Starting CPU-only model loading...")
     # In a production environment, you could load models in a separate thread
     load_models()