Demo Fixes 11
This commit is contained in:
@@ -24,14 +24,26 @@ app = Flask(__name__)
|
|||||||
app.config['SECRET_KEY'] = 'your-secret-key'
|
app.config['SECRET_KEY'] = 'your-secret-key'
|
||||||
socketio = SocketIO(app, cors_allowed_origins="*")
|
socketio = SocketIO(app, cors_allowed_origins="*")
|
||||||
|
|
||||||
# Select the best available device
|
# Check for CUDA availability and handle potential CUDA/cuDNN issues
|
||||||
if torch.cuda.is_available():
|
try:
|
||||||
|
cuda_available = torch.cuda.is_available()
|
||||||
|
# Try to initialize CUDA to check if libraries are properly loaded
|
||||||
|
if cuda_available:
|
||||||
|
_ = torch.zeros(1).cuda()
|
||||||
device = "cuda"
|
device = "cuda"
|
||||||
whisper_compute_type = "float16"
|
whisper_compute_type = "float16"
|
||||||
elif torch.backends.mps.is_available():
|
print("CUDA is available and initialized successfully")
|
||||||
|
elif torch.backends.mps.is_available():
|
||||||
device = "mps"
|
device = "mps"
|
||||||
whisper_compute_type = "float32"
|
whisper_compute_type = "float32"
|
||||||
else:
|
print("MPS is available (Apple Silicon)")
|
||||||
|
else:
|
||||||
|
device = "cpu"
|
||||||
|
whisper_compute_type = "int8"
|
||||||
|
print("Using CPU (CUDA/MPS not available)")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error initializing CUDA: {e}")
|
||||||
|
print("Falling back to CPU")
|
||||||
device = "cpu"
|
device = "cpu"
|
||||||
whisper_compute_type = "int8"
|
whisper_compute_type = "int8"
|
||||||
|
|
||||||
@@ -51,7 +63,9 @@ def load_models():
|
|||||||
print("Loading Whisper model...")
|
print("Loading Whisper model...")
|
||||||
# Import here to avoid immediate import errors if package is missing
|
# Import here to avoid immediate import errors if package is missing
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
whisper_model = WhisperModel("base", device=device, compute_type=whisper_compute_type, download_root="./models/whisper")
|
# Force CPU for Whisper if we had CUDA issues
|
||||||
|
whisper_device = device if device != "cpu" else "cpu"
|
||||||
|
whisper_model = WhisperModel("base", device=whisper_device, compute_type=whisper_compute_type, download_root="./models/whisper")
|
||||||
print("Whisper model loaded successfully")
|
print("Whisper model loaded successfully")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error loading Whisper model: {e}")
|
print(f"Error loading Whisper model: {e}")
|
||||||
@@ -60,7 +74,9 @@ def load_models():
|
|||||||
# Initialize CSM model for audio generation
|
# Initialize CSM model for audio generation
|
||||||
try:
|
try:
|
||||||
print("Loading CSM model...")
|
print("Loading CSM model...")
|
||||||
csm_generator = load_csm_1b(device=device)
|
# Force CPU for CSM if we had CUDA issues
|
||||||
|
csm_device = device if device != "cpu" else "cpu"
|
||||||
|
csm_generator = load_csm_1b(device=csm_device)
|
||||||
print("CSM model loaded successfully")
|
print("CSM model loaded successfully")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error loading CSM model: {e}")
|
print(f"Error loading CSM model: {e}")
|
||||||
@@ -71,11 +87,14 @@ def load_models():
|
|||||||
print("Loading Llama 3.2 model...")
|
print("Loading Llama 3.2 model...")
|
||||||
llm_model_id = "meta-llama/Llama-3.2-1B" # Choose appropriate size based on resources
|
llm_model_id = "meta-llama/Llama-3.2-1B" # Choose appropriate size based on resources
|
||||||
llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_id, cache_dir="./models/llama")
|
llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_id, cache_dir="./models/llama")
|
||||||
|
# Force CPU for LLM if we had CUDA issues
|
||||||
|
llm_device = device if device != "cpu" else "cpu"
|
||||||
llm_model = AutoModelForCausalLM.from_pretrained(
|
llm_model = AutoModelForCausalLM.from_pretrained(
|
||||||
llm_model_id,
|
llm_model_id,
|
||||||
torch_dtype=torch.bfloat16,
|
torch_dtype=torch.bfloat16 if llm_device != "cpu" else torch.float32,
|
||||||
device_map=device,
|
device_map=llm_device,
|
||||||
cache_dir="./models/llama"
|
cache_dir="./models/llama",
|
||||||
|
low_cpu_mem_usage=True
|
||||||
)
|
)
|
||||||
print("Llama 3.2 model loaded successfully")
|
print("Llama 3.2 model loaded successfully")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user