diff --git a/Backend/.gitignore b/Backend/.gitignore
deleted file mode 100644
index 4b7fc9d..0000000
--- a/Backend/.gitignore
+++ /dev/null
@@ -1,46 +0,0 @@
-# Python
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-
-# Virtual Environment
-.env
-.venv
-env/
-venv/
-ENV/
-
-# IDE
-.idea/
-.vscode/
-*.swp
-*.swo
-
-# Project specific
-.python-version
-*.wav
-output_*/
-basic_audio.wav
-full_conversation.wav
-context_audio.wav
-
-# Model files
-*.pt
-*.ckpt
\ No newline at end of file
diff --git a/Backend/README.md b/Backend/README.md
deleted file mode 100644
index 44cab4d..0000000
--- a/Backend/README.md
+++ /dev/null
@@ -1,154 +0,0 @@
-# CSM
-
-**2025/03/13** - We are releasing the 1B CSM variant. The checkpoint is [hosted on Hugging Face](https://huggingface.co/sesame/csm_1b).
-
----
-
-CSM (Conversational Speech Model) is a speech generation model from [Sesame](https://www.sesame.com) that generates RVQ audio codes from text and audio inputs. The model architecture employs a [Llama](https://www.llama.com/) backbone and a smaller audio decoder that produces [Mimi](https://huggingface.co/kyutai/mimi) audio codes.
-
-A fine-tuned variant of CSM powers the [interactive voice demo](https://www.sesame.com/voicedemo) shown in our [blog post](https://www.sesame.com/research/crossing_the_uncanny_valley_of_voice).
-
-A hosted [Hugging Face space](https://huggingface.co/spaces/sesame/csm-1b) is also available for testing audio generation.
-
-## Requirements
-
-* A CUDA-compatible GPU
-* The code has been tested on CUDA 12.4 and 12.6, but it may also work on other versions
-* Similarly, Python 3.10 is recommended, but newer versions may be fine
-* For some audio operations, `ffmpeg` may be required
-* Access to the following Hugging Face models:
-  * [Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B)
-  * [CSM-1B](https://huggingface.co/sesame/csm-1b)
-
-### Setup
-
-```bash
-git clone git@github.com:SesameAILabs/csm.git
-cd csm
-python3.10 -m venv .venv
-source .venv/bin/activate
-pip install -r requirements.txt
-
-# Disable lazy compilation in Mimi
-export NO_TORCH_COMPILE=1
-
-# You will need access to CSM-1B and Llama-3.2-1B
-huggingface-cli login
-```
-
-### Windows Setup
-
-The `triton` package cannot be installed in Windows. Instead use `pip install triton-windows`.
-
-## Quickstart
-
-This script will generate a conversation between 2 characters, using a prompt for each character.
-
-```bash
-python run_csm.py
-```
-
-## Usage
-
-If you want to write your own applications with CSM, the following examples show basic usage.
-
-#### Generate a sentence
-
-This will use a random speaker identity, as no prompt or context is provided.
-
-```python
-from generator import load_csm_1b
-import torchaudio
-import torch
-
-if torch.backends.mps.is_available():
-    device = "mps"
-elif torch.cuda.is_available():
-    device = "cuda"
-else:
-    device = "cpu"
-
-generator = load_csm_1b(device=device)
-
-audio = generator.generate(
-    text="Hello from Sesame.",
-    speaker=0,
-    context=[],
-    max_audio_length_ms=10_000,
-)
-
-torchaudio.save("audio.wav", audio.unsqueeze(0).cpu(), generator.sample_rate)
-```
-
-#### Generate with context
-
-CSM sounds best when provided with context. You can prompt or provide context to the model using a `Segment` for each speaker's utterance.
-
-NOTE: The following example is instructional and the audio files do not exist. It is intended as an example for using context with CSM.
-
-```python
-from generator import Segment
-
-speakers = [0, 1, 0, 0]
-transcripts = [
-    "Hey how are you doing.",
-    "Pretty good, pretty good.",
-    "I'm great.",
-    "So happy to be speaking to you.",
-]
-audio_paths = [
-    "utterance_0.wav",
-    "utterance_1.wav",
-    "utterance_2.wav",
-    "utterance_3.wav",
-]
-
-def load_audio(audio_path):
-    audio_tensor, sample_rate = torchaudio.load(audio_path)
-    audio_tensor = torchaudio.functional.resample(
-        audio_tensor.squeeze(0), orig_freq=sample_rate, new_freq=generator.sample_rate
-    )
-    return audio_tensor
-
-segments = [
-    Segment(text=transcript, speaker=speaker, audio=load_audio(audio_path))
-    for transcript, speaker, audio_path in zip(transcripts, speakers, audio_paths)
-]
-audio = generator.generate(
-    text="Me too, this is some cool stuff huh?",
-    speaker=1,
-    context=segments,
-    max_audio_length_ms=10_000,
-)
-
-torchaudio.save("audio.wav", audio.unsqueeze(0).cpu(), generator.sample_rate)
-```
-
-## FAQ
-
-**Does this model come with any voices?**
-
-The model open-sourced here is a base generation model. It is capable of producing a variety of voices, but it has not been fine-tuned on any specific voice.
-
-**Can I converse with the model?**
-
-CSM is trained to be an audio generation model and not a general-purpose multimodal LLM. It cannot generate text. We suggest using a separate LLM for text generation.
-
-**Does it support other languages?**
-
-The model has some capacity for non-English languages due to data contamination in the training data, but it likely won't do well.
-
-## Misuse and abuse ⚠️
-
-This project provides a high-quality speech generation model for research and educational purposes. While we encourage responsible and ethical use, we **explicitly prohibit** the following:
-
-- **Impersonation or Fraud**: Do not use this model to generate speech that mimics real individuals without their explicit consent.
-- **Misinformation or Deception**: Do not use this model to create deceptive or misleading content, such as fake news or fraudulent calls.
-- **Illegal or Harmful Activities**: Do not use this model for any illegal, harmful, or malicious purposes.
-
-By using this model, you agree to comply with all applicable laws and ethical guidelines. We are **not responsible** for any misuse, and we strongly condemn unethical applications of this technology.
-
----
-
-## Authors
-Johan Schalkwyk, Ankit Kumar, Dan Lyth, Sefik Emre Eskimez, Zack Hodari, Cinjon Resnick, Ramon Sanabria, Raven Jiang, and the Sesame team.
diff --git a/Backend/app.py b/Backend/app.py
new file mode 100644
index 0000000..091de8e
--- /dev/null
+++ b/Backend/app.py
@@ -0,0 +1,229 @@
+import os
+import io
+import base64
+import time
+import torch
+import torchaudio
+import numpy as np
+from flask import Flask, render_template, request
+from flask_socketio import SocketIO, emit
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import speech_recognition as sr
+from generator import load_csm_1b, Segment
+from collections import deque
+
+app = Flask(__name__)
+app.config['SECRET_KEY'] = 'your-secret-key'
+socketio = SocketIO(app, cors_allowed_origins="*")
+
+# Select the best available device
+if torch.cuda.is_available():
+    device = "cuda"
+elif torch.backends.mps.is_available():
+    device = "mps"
+else:
+    device = "cpu"
+print(f"Using device: {device}")
+
+# Initialize CSM model for audio generation
+print("Loading CSM model...")
+csm_generator = load_csm_1b(device=device)
+
+# Initialize Llama 3.2 model for response generation
+print("Loading Llama 3.2 model...")
+llm_model_id = "meta-llama/Llama-3.2-1B"  # Choose appropriate size based on resources
+llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
+llm_model = AutoModelForCausalLM.from_pretrained(
+    llm_model_id,
+    torch_dtype=torch.bfloat16,
+    device_map=device
+)
+
+# Initialize speech recognition
+recognizer = sr.Recognizer()
+
+# Store conversation context
+conversation_context = {}  # session_id -> context
+
+@app.route('/')
+def index():
+    return render_template('index.html')
+
+@socketio.on('connect')
+def handle_connect():
+    print(f"Client connected: {request.sid}")
+    conversation_context[request.sid] = {
+        'segments': [],
+        'speakers': [0, 1],  # 0 = user, 1 = bot
+        'audio_buffer': deque(maxlen=10),  # Store recent audio chunks
+        'is_speaking': False,
+        'silence_start': None
+    }
+    emit('ready', {'message': 'Connection established'})
+
+@socketio.on('disconnect')
+def handle_disconnect():
+    print(f"Client disconnected: {request.sid}")
+    if request.sid in conversation_context:
+        del conversation_context[request.sid]
+
+@socketio.on('start_speaking')
+def handle_start_speaking():
+    if request.sid in conversation_context:
+        conversation_context[request.sid]['is_speaking'] = True
+        conversation_context[request.sid]['audio_buffer'].clear()
+        print(f"User {request.sid} started speaking")
+
+@socketio.on('audio_chunk')
+def handle_audio_chunk(data):
+    if request.sid not in conversation_context:
+        return
+    
+    context = conversation_context[request.sid]
+    
+    # Decode audio data
+    audio_data = base64.b64decode(data['audio'])
+    audio_numpy = np.frombuffer(audio_data, dtype=np.float32)
+    audio_tensor = torch.tensor(audio_numpy)
+    
+    # Add to buffer
+    context['audio_buffer'].append(audio_tensor)
+    
+    # Check for silence to detect end of speech
+    if context['is_speaking'] and is_silence(audio_tensor):
+        if context['silence_start'] is None:
+            context['silence_start'] = time.time()
+        elif time.time() - context['silence_start'] > 1.0:  # 1 second of silence
+            # Process the complete utterance
+            process_user_utterance(request.sid)
+    else:
+        context['silence_start'] = None
+
+@socketio.on('stop_speaking')
+def handle_stop_speaking():
+    if request.sid in conversation_context:
+        conversation_context[request.sid]['is_speaking'] = False
+        process_user_utterance(request.sid)
+        print(f"User {request.sid} stopped speaking")
+
+def is_silence(audio_tensor, threshold=0.02):
+    """Check if an audio chunk is silence based on amplitude threshold"""
+    return torch.mean(torch.abs(audio_tensor)) < threshold
+
+def process_user_utterance(session_id):
+    """Process completed user utterance, generate response and send audio back"""
+    context = conversation_context[session_id]
+    
+    if not context['audio_buffer']:
+        return
+    
+    # Combine audio chunks
+    full_audio = torch.cat(list(context['audio_buffer']), dim=0)
+    context['audio_buffer'].clear()
+    context['is_speaking'] = False
+    context['silence_start'] = None
+    
+    # Convert audio to 16kHz for speech recognition
+    audio_16k = torchaudio.functional.resample(
+        full_audio, 
+        orig_freq=44100,  # Assuming 44.1kHz from client
+        new_freq=16000
+    )
+    
+    # Transcribe speech
+    try:
+        # Convert to wav format for speech_recognition
+        audio_data = io.BytesIO()
+        torchaudio.save(audio_data, audio_16k.unsqueeze(0), 16000, format="wav")
+        audio_data.seek(0)
+        
+        with sr.AudioFile(audio_data) as source:
+            audio = recognizer.record(source)
+            user_text = recognizer.recognize_google(audio)
+            print(f"Transcribed: {user_text}")
+            
+            # Add to conversation segments
+            user_segment = Segment(
+                text=user_text,
+                speaker=0,  # User is speaker 0
+                audio=full_audio
+            )
+            context['segments'].append(user_segment)
+            
+            # Generate bot response
+            bot_response = generate_llm_response(user_text, context['segments'])
+            print(f"Bot response: {bot_response}")
+            
+            # Convert to audio using CSM
+            bot_audio = generate_audio_response(bot_response, context['segments'])
+            
+            # Convert audio to base64 for sending over websocket
+            audio_bytes = io.BytesIO()
+            torchaudio.save(audio_bytes, bot_audio.unsqueeze(0).cpu(), csm_generator.sample_rate, format="wav")
+            audio_bytes.seek(0)
+            audio_b64 = base64.b64encode(audio_bytes.read()).decode('utf-8')
+            
+            # Add bot response to conversation history
+            bot_segment = Segment(
+                text=bot_response,
+                speaker=1,  # Bot is speaker 1
+                audio=bot_audio
+            )
+            context['segments'].append(bot_segment)
+            
+            # Send transcribed text to client
+            emit('transcription', {'text': user_text}, room=session_id)
+            
+            # Send audio response to client
+            emit('audio_response', {
+                'audio': audio_b64,
+                'text': bot_response
+            }, room=session_id)
+            
+    except Exception as e:
+        print(f"Error processing speech: {e}")
+        emit('error', {'message': f'Error processing speech: {str(e)}'}, room=session_id)
+
+def generate_llm_response(user_text, conversation_segments):
+    """Generate text response using Llama 3.2"""
+    # Format conversation history for the LLM
+    conversation_history = ""
+    for segment in conversation_segments[-5:]:  # Use last 5 utterances for context
+        speaker_name = "User" if segment.speaker == 0 else "Assistant"
+        conversation_history += f"{speaker_name}: {segment.text}\n"
+    
+    # Add the current user query
+    conversation_history += f"User: {user_text}\nAssistant:"
+    
+    # Generate response
+    inputs = llm_tokenizer(conversation_history, return_tensors="pt").to(device)
+    output = llm_model.generate(
+        inputs.input_ids, 
+        max_new_tokens=150,
+        temperature=0.7,
+        top_p=0.9,
+        do_sample=True
+    )
+    
+    response = llm_tokenizer.decode(output[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+    return response.strip()
+
+def generate_audio_response(text, conversation_segments):
+    """Generate audio response using CSM"""
+    # Use the last few conversation segments as context
+    context_segments = conversation_segments[-4:] if len(conversation_segments) > 4 else conversation_segments
+    
+    # Generate audio for bot response
+    audio = csm_generator.generate(
+        text=text,
+        speaker=1,  # Bot is speaker 1
+        context=context_segments,
+        max_audio_length_ms=10000,  # 10 seconds max
+        temperature=0.9,
+        topk=50
+    )
+    
+    return audio
+
+if __name__ == '__main__':
+    socketio.run(app, host='0.0.0.0', port=5000, debug=True)
\ No newline at end of file
diff --git a/Backend/index.html b/Backend/index.html
index 5ea925c..359ed41 100644
--- a/Backend/index.html
+++ b/Backend/index.html
@@ -3,490 +3,454 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Sesame AI Voice Chat</title>
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
-    <!-- Socket.IO client library -->
+    <title>Voice Assistant - CSM & Whisper</title>
     <script src="https://cdn.socket.io/4.6.0/socket.io.min.js"></script>
     <style>
-        :root {
-            --primary-color: #4c84ff;
-            --secondary-color: #3367d6;
-            --text-color: #333;
-            --background-color: #f9f9f9;
-            --card-background: #ffffff;
-            --accent-color: #ff5252;
-            --success-color: #4CAF50;
-            --border-color: #e0e0e0;
-            --shadow-color: rgba(0, 0, 0, 0.1);
-        }
-
-        * {
-            box-sizing: border-box;
-            margin: 0;
-            padding: 0;
-        }
-
         body {
             font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-            background-color: var(--background-color);
-            color: var(--text-color);
-            line-height: 1.6;
-            max-width: 1000px;
+            max-width: 800px;
             margin: 0 auto;
             padding: 20px;
-            transition: all 0.3s ease;
+            background-color: #f5f7fa;
+            color: #333;
         }
-
-        header {
+        
+        h1 {
+            color: #2c3e50;
             text-align: center;
             margin-bottom: 30px;
         }
-
-        h1 {
-            color: var(--primary-color);
-            font-size: 2.5rem;
-            margin-bottom: 10px;
-        }
-
-        .subtitle {
-            color: #666;
-            font-weight: 300;
-        }
-
-        .app-container {
-            display: grid;
-            grid-template-columns: 1fr;
-            gap: 20px;
-        }
-
-        @media (min-width: 768px) {
-            .app-container {
-                grid-template-columns: 1fr 1fr;
-            }
-        }
-
-        .chat-container, .control-panel {
-            background-color: var(--card-background);
-            border-radius: 12px;
-            box-shadow: 0 4px 12px var(--shadow-color);
+        
+        #conversation {
+            height: 400px;
+            border: 1px solid #ddd;
+            border-radius: 10px;
             padding: 20px;
+            margin-bottom: 20px;
+            overflow-y: auto;
+            background-color: white;
+            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
         }
-
-        .control-panel {
+        
+        .message-container {
             display: flex;
             flex-direction: column;
-            gap: 20px;
-        }
-
-        .chat-header {
-            display: flex;
-            justify-content: space-between;
-            align-items: center;
             margin-bottom: 15px;
-            padding-bottom: 10px;
-            border-bottom: 1px solid var(--border-color);
         }
-
-        .conversation {
-            height: 400px;
-            overflow-y: auto;
-            padding: 10px;
-            border-radius: 8px;
-            background-color: #f7f9fc;
-            margin-bottom: 20px;
-            scroll-behavior: smooth;
+        
+        .user-message-container {
+            align-items: flex-end;
         }
-
+        
+        .bot-message-container {
+            align-items: flex-start;
+        }
+        
         .message {
-            margin-bottom: 15px;
-            padding: 12px 15px;
-            border-radius: 12px;
-            max-width: 85%;
+            max-width: 80%;
+            padding: 12px;
+            border-radius: 18px;
             position: relative;
-            animation: fade-in 0.3s ease-out forwards;
+            word-break: break-word;
         }
-
-        @keyframes fade-in {
-            from { opacity: 0; transform: translateY(10px); }
-            to { opacity: 1; transform: translateY(0); }
-        }
-
-        .user {
-            background-color: #e3f2fd;
-            color: #0d47a1;
-            margin-left: auto;
+        
+        .user-message {
+            background-color: #dcf8c6;
+            color: #000;
             border-bottom-right-radius: 4px;
         }
-
-        .ai {
-            background-color: #f1f1f1;
-            color: #37474f;
-            margin-right: auto;
+        
+        .bot-message {
+            background-color: #f1f0f0;
+            color: #000;
             border-bottom-left-radius: 4px;
         }
-
-        .system {
-            background-color: #f8f9fa;
-            font-style: italic;
-            color: #666;
-            text-align: center;
-            max-width: 90%;
-            margin: 10px auto;
-            font-size: 0.9em;
-            padding: 8px 12px;
-            border-radius: 8px;
+        
+        .message-label {
+            font-size: 0.8em;
+            margin-bottom: 4px;
+            color: #657786;
         }
-
-        .message-time {
-            font-size: 0.7em;
-            color: #999;
-            position: absolute;
-            bottom: 5px;
-            right: 10px;
-        }
-
-        .audio-player {
-            width: 100%;
-            margin-top: 8px;
-            border-radius: 8px;
-        }
-
-        .visualizer-section {
-            margin-bottom: 20px;
-        }
-
-        .visualizer-container {
-            height: 150px;
-            background-color: #000;
-            border-radius: 12px;
-            overflow: hidden;
-            position: relative;
-            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
-        }
-
-        .visualizer-label {
-            position: absolute;
-            top: 50%;
-            left: 50%;
-            transform: translate(-50%, -50%);
-            color: rgba(255, 255, 255, 0.7);
-            font-size: 1rem;
-            text-align: center;
-            pointer-events: none;
-            transition: opacity 0.3s ease;
-            z-index: 1;
-        }
-
-        #audioVisualizer {
-            width: 100%;
-            height: 100%;
-            display: block;
-        }
-
-        .controls {
-            display: flex;
-            gap: 15px;
-            flex-wrap: wrap;
-        }
-
-        .control-group {
-            flex: 1;
-            min-width: 200px;
-        }
-
-        .control-label {
-            font-weight: 600;
-            margin-bottom: 10px;
-            color: #555;
-        }
-
-        .button-row {
+        
+        #controls {
             display: flex;
             gap: 10px;
-            margin-top: 15px;
-        }
-
-        button {
-            padding: 12px 20px;
-            border-radius: 8px;
-            border: none;
-            background-color: var(--primary-color);
-            color: white;
-            font-weight: 600;
-            cursor: pointer;
-            transition: all 0.2s ease;
-            display: flex;
-            align-items: center;
             justify-content: center;
-            gap: 8px;
-            flex: 1;
-        }
-
-        button:hover {
-            background-color: var(--secondary-color);
-            transform: translateY(-2px);
-            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
-        }
-
-        button:active {
-            transform: translateY(0);
-        }
-
-        button.recording {
-            background-color: var(--accent-color);
-            animation: pulse 1.5s infinite;
-        }
-
-        button.processing {
-            background-color: #ff9800;
-        }
-
-        @keyframes pulse {
-            0% { opacity: 1; }
-            50% { opacity: 0.8; }
-            100% { opacity: 1; }
-        }
-
-        select, .slider-container {
-            width: 100%;
-            padding: 10px;
-            border-radius: 8px;
-            border: 1px solid var(--border-color);
-            background-color: white;
             margin-bottom: 15px;
         }
-
-        .slider-container {
-            display: flex;
-            flex-direction: column;
-            gap: 5px;
-        }
-
-        .slider-label {
-            display: flex;
-            justify-content: space-between;
-        }
-
-        input[type="range"] {
-            width: 100%;
+        
+        button {
+            padding: 12px 24px;
+            font-size: 16px;
             cursor: pointer;
+            border-radius: 50px;
+            border: none;
+            outline: none;
+            transition: all 0.3s ease;
         }
-
-        .volume-indicator {
-            height: 30px;
-            background: linear-gradient(to right, #4CAF50, #FFEB3B, #F44336);
-            border-radius: 4px;
-            margin-top: 5px;
-            position: relative;
-            overflow: hidden;
+        
+        #recordButton {
+            background-color: #4CAF50;
+            color: white;
+            width: 200px;
+            box-shadow: 0 4px 8px rgba(76, 175, 80, 0.3);
         }
-
-        .volume-level {
-            height: 100%;
-            width: 0%;
-            background-color: rgba(0, 0, 0, 0.5);
-            position: absolute;
-            right: 0;
-            top: 0;
-            transition: width 0.1s ease;
+        
+        #recordButton:hover {
+            background-color: #45a049;
+            transform: translateY(-2px);
         }
-
-        .status-indicator {
-            display: flex;
-            align-items: center;
-            gap: 8px;
-            padding: 10px;
-            border-radius: 8px;
-            background-color: #f5f5f5;
-            margin-top: 20px;
+        
+        #recordButton.recording {
+            background-color: #f44336;
+            animation: pulse 1.5s infinite;
+            box-shadow: 0 4px 8px rgba(244, 67, 54, 0.3);
         }
-
-        .status-dot {
-            width: 12px;
-            height: 12px;
-            border-radius: 50%;
-            background-color: #ccc;
-            transition: background-color 0.3s ease;
+        
+        @keyframes pulse {
+            0% {
+                transform: scale(1);
+            }
+            50% {
+                transform: scale(1.05);
+            }
+            100% {
+                transform: scale(1);
+            }
         }
-
-        .status-dot.active {
-            background-color: var(--success-color);
-        }
-
-        .status-text {
-            font-size: 0.9em;
-            color: #666;
-        }
-
-        /* Custom Scrollbar */
-        .conversation::-webkit-scrollbar {
-            width: 8px;
-        }
-
-        .conversation::-webkit-scrollbar-track {
-            background: #f1f1f1;
-            border-radius: 10px;
-        }
-
-        .conversation::-webkit-scrollbar-thumb {
-            background: #ccc;
-            border-radius: 10px;
-        }
-
-        .conversation::-webkit-scrollbar-thumb:hover {
-            background: #aaa;
-        }
-
-        /* Settings Panel */
-        .settings-panel {
-            margin-top: 20px;
-        }
-
-        .settings-toggles {
-            display: grid;
-            grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
-            gap: 10px;
-            margin-top: 10px;
-        }
-
-        .toggle-switch {
-            display: flex;
-            align-items: center;
-        }
-
-        .toggle-switch input {
-            opacity: 0;
-            width: 0;
-            height: 0;
-        }
-
-        .toggle-switch label {
-            position: relative;
-            display: inline-block;
-            width: 50px;
-            height: 24px;
-            background-color: #ccc;
-            border-radius: 34px;
-            transition: .4s;
-            margin-right: 10px;
-            cursor: pointer;
-        }
-
-        .toggle-switch label:before {
-            position: absolute;
-            content: "";
-            height: 16px;
-            width: 16px;
-            left: 4px;
-            bottom: 4px;
-            background-color: white;
-            transition: .4s;
-            border-radius: 50%;
-        }
-
-        .toggle-switch input:checked + label {
-            background-color: var(--primary-color);
-        }
-
-        .toggle-switch input:checked + label:before {
-            transform: translateX(26px);
-        }
-
-        footer {
+        
+        #status {
             text-align: center;
-            margin-top: 40px;
-            padding-top: 20px;
-            border-top: 1px solid var(--border-color);
+            margin-top: 15px;
+            font-style: italic;
+            color: #657786;
+        }
+        
+        .audio-wave {
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            height: 40px;
+            gap: 3px;
+        }
+        
+        .audio-wave span {
+            display: block;
+            width: 3px;
+            height: 100%;
+            background-color: #4CAF50;
+            animation: wave 1.5s infinite ease-in-out;
+            border-radius: 6px;
+        }
+        
+        .audio-wave span:nth-child(2) {
+            animation-delay: 0.2s;
+        }
+        .audio-wave span:nth-child(3) {
+            animation-delay: 0.4s;
+        }
+        .audio-wave span:nth-child(4) {
+            animation-delay: 0.6s;
+        }
+        .audio-wave span:nth-child(5) {
+            animation-delay: 0.8s;
+        }
+        
+        @keyframes wave {
+            0%, 100% {
+                height: 8px;
+            }
+            50% {
+                height: 30px;
+            }
+        }
+        
+        .hidden {
+            display: none;
+        }
+        
+        .transcription-info {
+            font-size: 0.8em;
             color: #888;
-            font-size: 0.9em;
+            margin-top: 4px;
+            text-align: right;
         }
     </style>
 </head>
 <body>
-    <header>
-        <h1>Sesame AI Voice Chat</h1>
-        <p class="subtitle">Speak naturally and have a conversation with AI</p>
-    </header>
-
-    <div class="app-container">
-        <div class="chat-container">
-            <div class="chat-header">
-                <h2>Conversation</h2>
-                <button id="clearButton" class="small-button">
-                    <i class="fas fa-trash"></i> Clear Chat
-                </button>
-            </div>
-            <div class="conversation" id="conversation"></div>
-        </div>
-
-        <div class="control-panel">
-            <div class="visualizer-section">
-                <h3>Audio Visualizer</h3>
-                <div class="visualizer-container">
-                    <canvas id="audioVisualizer"></canvas>
-                    <div id="visualizerLabel" class="visualizer-label">Speak to see audio visualization</div>
-                </div>
-            </div>
-
-            <div class="controls">
-                <div class="control-group">
-                    <div class="control-label">Voice Settings</div>
-                    <select id="speakerSelect">
-                        <option value="0">Speaker 0 (You)</option>
-                        <option value="1">Speaker 1 (AI)</option>
-                    </select>
-                    
-                    <div class="slider-container">
-                        <div class="slider-label">
-                            <span>Silence Threshold</span>
-                            <span id="thresholdValue">0.01</span>
-                        </div>
-                        <input type="range" id="thresholdSlider" min="0.001" max="0.1" step="0.001" value="0.01">
-                    </div>
-                    
-                    <div class="volume-indicator">
-                        <div id="volumeLevel" class="volume-level"></div>
-                    </div>
-                </div>
-
-                <div class="control-group">
-                    <div class="control-label">Conversation Controls</div>
-                    <div class="button-row">
-                        <button id="streamButton" class="main-button">
-                            <i class="fas fa-microphone"></i> Start Conversation
-                        </button>
-                    </div>
-                </div>
-            </div>
-
-            <div class="settings-panel">
-                <div class="control-label">Settings</div>
-                <div class="settings-toggles">
-                    <div class="toggle-switch">
-                        <input type="checkbox" id="autoPlayResponses" checked>
-                        <label for="autoPlayResponses"></label>
-                        <span>Auto-play responses</span>
-                    </div>
-                    <div class="toggle-switch">
-                        <input type="checkbox" id="showVisualizer" checked>
-                        <label for="showVisualizer"></label>
-                        <span>Show visualizer</span>
-                    </div>
-                </div>
-            </div>
-
-            <div class="status-indicator">
-                <div class="status-dot" id="statusDot"></div>
-                <div class="status-text" id="statusText">Not connected</div>
-            </div>
-        </div>
+    <h1>Voice Assistant with CSM & Whisper</h1>
+    <div id="conversation"></div>
+    
+    <div id="controls">
+        <button id="recordButton">Hold to Speak</button>
     </div>
+    
+    <div id="audioWave" class="audio-wave hidden">
+        <span></span>
+        <span></span>
+        <span></span>
+        <span></span>
+        <span></span>
+    </div>
+    
+    <div id="status">Connecting to server...</div>
 
-    <footer>
-        <p>Powered by Sesame AI | WhisperX for speech recognition</p>
-    </footer>
-
-    <!-- Load our JavaScript file -->
-    <script src="./voice-chat.js"></script>
+    <script>
+        const socket = io();
+        const recordButton = document.getElementById('recordButton');
+        const conversation = document.getElementById('conversation');
+        const status = document.getElementById('status');
+        const audioWave = document.getElementById('audioWave');
+        
+        let mediaRecorder;
+        let audioChunks = [];
+        let isRecording = false;
+        let audioSendInterval;
+        let sessionActive = false;
+        
+        // Initialize audio context
+        const audioContext = new (window.AudioContext || window.webkitAudioContext)();
+        
+        // Connect to server
+        socket.on('connect', () => {
+            status.textContent = 'Connected to server';
+            sessionActive = true;
+        });
+        
+        socket.on('disconnect', () => {
+            status.textContent = 'Disconnected from server';
+            sessionActive = false;
+        });
+        
+        socket.on('ready', (data) => {
+            status.textContent = data.message;
+            setupAudioRecording();
+        });
+        
+        socket.on('transcription', (data) => {
+            addMessage('user', data.text);
+            status.textContent = 'Assistant is thinking...';
+        });
+        
+        socket.on('audio_response', (data) => {
+            // Play audio
+            status.textContent = 'Playing response...';
+            const audio = new Audio('data:audio/wav;base64,' + data.audio);
+            
+            audio.onended = () => {
+                status.textContent = 'Ready to record';
+            };
+            
+            audio.onerror = () => {
+                status.textContent = 'Error playing audio';
+                console.error('Error playing audio response');
+            };
+            
+            audio.play().catch(err => {
+                status.textContent = 'Error playing audio: ' + err.message;
+                console.error('Error playing audio:', err);
+            });
+            
+            // Display text
+            addMessage('bot', data.text);
+        });
+        
+        socket.on('error', (data) => {
+            status.textContent = 'Error: ' + data.message;
+            console.error('Server error:', data.message);
+        });
+        
+        function setupAudioRecording() {
+            // Check if browser supports required APIs
+            if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
+                status.textContent = 'Your browser does not support audio recording';
+                return;
+            }
+            
+            // Get user media
+            navigator.mediaDevices.getUserMedia({ audio: true })
+                .then(stream => {
+                    // Setup recording with better audio quality
+                    const options = { 
+                        mimeType: 'audio/webm',
+                        audioBitsPerSecond: 128000 
+                    };
+                    
+                    try {
+                        mediaRecorder = new MediaRecorder(stream, options);
+                    } catch (e) {
+                        // Fallback if the specified options aren't supported
+                        mediaRecorder = new MediaRecorder(stream);
+                    }
+                    
+                    mediaRecorder.ondataavailable = event => {
+                        if (event.data.size > 0) {
+                            audioChunks.push(event.data);
+                        }
+                    };
+                    
+                    mediaRecorder.onstop = () => {
+                        processRecording();
+                    };
+                    
+                    // Create audio analyzer for visualization
+                    const source = audioContext.createMediaStreamSource(stream);
+                    const analyzer = audioContext.createAnalyser();
+                    analyzer.fftSize = 2048;
+                    source.connect(analyzer);
+                    
+                    // Setup button handlers with better touch handling
+                    recordButton.addEventListener('mousedown', startRecording);
+                    recordButton.addEventListener('touchstart', (e) => {
+                        e.preventDefault(); // Prevent default touch behavior
+                        startRecording();
+                    });
+                    
+                    recordButton.addEventListener('mouseup', stopRecording);
+                    recordButton.addEventListener('touchend', (e) => {
+                        e.preventDefault();
+                        stopRecording();
+                    });
+                    
+                    recordButton.addEventListener('mouseleave', stopRecording);
+                    
+                    status.textContent = 'Ready to record';
+                })
+                .catch(err => {
+                    status.textContent = 'Error accessing microphone: ' + err.message;
+                    console.error('Error accessing microphone:', err);
+                });
+        }
+        
+        function startRecording() {
+            if (!isRecording && sessionActive) {
+                audioChunks = [];
+                mediaRecorder.start(100); // Collect data in 100ms chunks
+                recordButton.classList.add('recording');
+                recordButton.textContent = 'Release to Stop';
+                status.textContent = 'Recording...';
+                audioWave.classList.remove('hidden');
+                isRecording = true;
+                
+                socket.emit('start_speaking');
+                
+                // Start sending audio chunks periodically
+                audioSendInterval = setInterval(() => {
+                    if (mediaRecorder.state === 'recording') {
+                        mediaRecorder.requestData(); // Force ondataavailable to fire
+                    }
+                }, 300); // Send every 300ms
+            }
+        }
+        
+        function stopRecording() {
+            if (isRecording) {
+                clearInterval(audioSendInterval);
+                mediaRecorder.stop();
+                recordButton.classList.remove('recording');
+                recordButton.textContent = 'Hold to Speak';
+                status.textContent = 'Processing speech...';
+                audioWave.classList.add('hidden');
+                isRecording = false;
+            }
+        }
+        
+        function processRecording() {
+            if (audioChunks.length === 0) {
+                status.textContent = 'No audio recorded';
+                return;
+            }
+            
+            const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
+            
+            // Convert to ArrayBuffer for processing
+            const fileReader = new FileReader();
+            fileReader.onloadend = () => {
+                try {
+                    const arrayBuffer = fileReader.result;
+                    // Convert to Float32Array - this works better with WebAudio API
+                    const audioData = convertToFloat32(arrayBuffer);
+                    
+                    // Convert to base64 for sending
+                    const base64String = arrayBufferToBase64(audioData.buffer);
+                    socket.emit('audio_chunk', { audio: base64String });
+                    
+                    // Signal end of speech
+                    socket.emit('stop_speaking');
+                } catch (e) {
+                    console.error('Error processing audio:', e);
+                    status.textContent = 'Error processing audio';
+                }
+            };
+            
+            fileReader.onerror = () => {
+                status.textContent = 'Error reading audio data';
+            };
+            
+            fileReader.readAsArrayBuffer(audioBlob);
+        }
+        
+        function convertToFloat32(arrayBuffer) {
+            // Get raw audio data as Int16 (common format for audio)
+            const int16Array = new Int16Array(arrayBuffer);
+            
+            // Convert to Float32 (normalize between -1 and 1)
+            const float32Array = new Float32Array(int16Array.length);
+            for (let i = 0; i < int16Array.length; i++) {
+                float32Array[i] = int16Array[i] / 32768.0;
+            }
+            
+            return float32Array;
+        }
+        
+        function addMessage(sender, text) {
+            const containerDiv = document.createElement('div');
+            containerDiv.className = sender === 'user' ? 'message-container user-message-container' : 'message-container bot-message-container';
+            
+            const labelDiv = document.createElement('div');
+            labelDiv.className = 'message-label';
+            labelDiv.textContent = sender === 'user' ? 'You' : 'Assistant';
+            containerDiv.appendChild(labelDiv);
+            
+            const messageDiv = document.createElement('div');
+            messageDiv.className = sender === 'user' ? 'message user-message' : 'message bot-message';
+            messageDiv.textContent = text;
+            containerDiv.appendChild(messageDiv);
+            
+            if (sender === 'user') {
+                const infoDiv = document.createElement('div');
+                infoDiv.className = 'transcription-info';
+                infoDiv.textContent = 'Transcribed with Whisper';
+                containerDiv.appendChild(infoDiv);
+            }
+            
+            conversation.appendChild(containerDiv);
+            conversation.scrollTop = conversation.scrollHeight;
+        }
+        
+        function arrayBufferToBase64(buffer) {
+            let binary = '';
+            const bytes = new Uint8Array(buffer);
+            const len = bytes.byteLength;
+            for (let i = 0; i < len; i++) {
+                binary += String.fromCharCode(bytes[i]);
+            }
+            return window.btoa(binary);
+        }
+        
+        // Handle page visibility change to avoid issues with background tabs
+        document.addEventListener('visibilitychange', () => {
+            if (document.hidden && isRecording) {
+                stopRecording();
+            }
+        });
+        
+        // Clean disconnection when page is closed
+        window.addEventListener('beforeunload', () => {
+            if (socket && socket.connected) {
+                socket.disconnect();
+            }
+        });
+    </script>
 </body>
 </html>
\ No newline at end of file
diff --git a/Backend/req.txt b/Backend/req.txt
new file mode 100644
index 0000000..a3edbdc
--- /dev/null
+++ b/Backend/req.txt
@@ -0,0 +1 @@
+pip install faster-whisper
\ No newline at end of file
diff --git a/Backend/requirements.txt b/Backend/requirements.txt
deleted file mode 100644
index ba8a04f..0000000
--- a/Backend/requirements.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-torch==2.4.0
-torchaudio==2.4.0
-tokenizers==0.21.0
-transformers==4.49.0
-huggingface_hub==0.28.1
-moshi==0.2.2
-torchtune==0.4.0
-torchao==0.9.0
-silentcipher @ git+https://github.com/SesameAILabs/silentcipher@master
\ No newline at end of file
diff --git a/Backend/server.py b/Backend/server.py
index 2cf721e..9e98d60 100644
--- a/Backend/server.py
+++ b/Backend/server.py
@@ -1,904 +1,388 @@
 import os
+import io
 import base64
-import json
 import time
-import math
-import gc
-import logging
-import numpy as np
 import torch
 import torchaudio
-from io import BytesIO
-from typing import List, Dict, Any, Optional
-from flask import Flask, request, send_from_directory, Response
-from flask_cors import CORS
-from flask_socketio import SocketIO, emit, disconnect
-from generator import load_csm_1b, Segment
+import numpy as np
+from flask import Flask, render_template, request
+from flask_socketio import SocketIO, emit
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from collections import deque
-from threading import Lock
-from transformers import pipeline
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import requests
+import huggingface_hub
+from generator import load_csm_1b, Segment
 
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger("sesame-server")
+# Configure environment with longer timeouts
+os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600"  # 10 minutes timeout for downloads
+requests.adapters.DEFAULT_TIMEOUT = 60  # Increase default requests timeout
 
-# Determine best compute device
-if torch.backends.mps.is_available():
-    device = "mps"
-elif torch.cuda.is_available():
-    try:
-        # Test CUDA functionality
-        torch.rand(10, device="cuda")
-        torch.backends.cuda.matmul.allow_tf32 = True
-        torch.backends.cudnn.allow_tf32 = True
-        torch.backends.cudnn.benchmark = True
-        device = "cuda"
-        logger.info("CUDA is fully functional")
-    except Exception as e:
-        logger.warning(f"CUDA available but not working correctly: {e}")
-        device = "cpu"
-else:
-    device = "cpu"
-    logger.info("Using CPU")
+# Create a models directory for caching
+os.makedirs("models", exist_ok=True)
 
-# Constants and Configuration
-SILENCE_THRESHOLD = 0.01
-SILENCE_DURATION_SEC = 0.75
-MAX_BUFFER_SIZE = 30  # Maximum chunks to buffer before processing
-CHUNK_SIZE_MS = 500  # Size of audio chunks when streaming responses
-
-# Define the base directory and static files directory
-base_dir = os.path.dirname(os.path.abspath(__file__))
-static_dir = os.path.join(base_dir, "static")
-os.makedirs(static_dir, exist_ok=True)
-
-# Define a simple energy-based speech detector
-class SpeechDetector:
-    def __init__(self):
-        self.min_speech_energy = 0.01
-        self.speech_window = 0.2  # seconds
-    
-    def detect_speech(self, audio_tensor, sample_rate):
-        # Calculate frame size based on window size
-        frame_size = int(sample_rate * self.speech_window)
-        
-        # If audio is shorter than frame size, use the entire audio
-        if audio_tensor.shape[0] < frame_size:
-            frames = [audio_tensor]
-        else:
-            # Split audio into frames
-            frames = [audio_tensor[i:i+frame_size] for i in range(0, len(audio_tensor), frame_size)]
-        
-        # Calculate energy per frame
-        energies = [torch.mean(frame**2).item() for frame in frames]
-        
-        # Determine if there's speech based on energy threshold
-        has_speech = any(e > self.min_speech_energy for e in energies)
-        
-        return has_speech
-
-speech_detector = SpeechDetector()
-logger.info("Initialized simple speech detector")
-
-# Model Loading Functions
-def load_speech_models():
-    """Load speech generation and recognition models"""
-    # Load CSM (existing code)
-    generator = load_csm_1b(device=device)
-    
-    # Load Whisper model for speech recognition
-    try:
-        logger.info(f"Loading speech recognition model on {device}...")
-        
-        # Try with newer API first
-        try:
-            from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
-            
-            model_id = "openai/whisper-small"
-            
-            # Load model and processor
-            model = AutoModelForSpeechSeq2Seq.from_pretrained(
-                model_id,
-                torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-                device_map=device,
-            )
-            processor = AutoProcessor.from_pretrained(model_id)
-            
-            # Create pipeline with specific parameters
-            speech_recognizer = pipeline(
-                "automatic-speech-recognition",
-                model=model,
-                tokenizer=processor.tokenizer,
-                feature_extractor=processor.feature_extractor,
-                max_new_tokens=128,
-                chunk_length_s=30,
-                batch_size=16,
-                device=device,
-            )
-            
-        except Exception as api_error:
-            logger.warning(f"Newer API loading failed: {api_error}, trying simpler approach")
-            
-            # Fallback to simpler API
-            speech_recognizer = pipeline(
-                "automatic-speech-recognition", 
-                model="openai/whisper-small", 
-                device=device
-            )
-        
-        logger.info("Speech recognition model loaded successfully")
-        return generator, speech_recognizer
-        
-    except Exception as e:
-        logger.error(f"Error loading speech recognition model: {e}")
-        return generator, None
-
-# Unpack both models
-generator, speech_recognizer = load_speech_models()
-
-# Initialize Llama 3.2 model for conversation responses
-def load_llm_model():
-    """Load Llama 3.2 model for generating text responses"""
-    try:
-        logger.info("Loading Llama 3.2 model for conversational responses...")
-        model_id = "meta-llama/Llama-3.2-1B-Instruct"
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        
-        # Determine compute device for LLM
-        llm_device = "cpu"  # Default to CPU for LLM
-        
-        # Use CUDA if available and there's enough VRAM
-        if device == "cuda" and torch.cuda.is_available():
-            try:
-                free_mem = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)
-                # If we have at least 2GB free, use CUDA for LLM
-                if free_mem > 2 * 1024 * 1024 * 1024:
-                    llm_device = "cuda"
-            except:
-                pass
-        
-        logger.info(f"Using {llm_device} for Llama 3.2 model")
-        
-        # Load the model with lower precision for efficiency
-        model = AutoModelForCausalLM.from_pretrained(
-            model_id, 
-            torch_dtype=torch.float16 if llm_device == "cuda" else torch.float32,
-            device_map=llm_device
-        )
-        
-        # Create a pipeline for easier inference
-        llm = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            max_length=512,
-            do_sample=True,
-            temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.1
-        )
-        
-        logger.info("Llama 3.2 model loaded successfully")
-        return llm
-    except Exception as e:
-        logger.error(f"Error loading Llama 3.2 model: {e}")
-        return None
-
-# Load the LLM model
-llm = load_llm_model()
-
-# Set up Flask and Socket.IO
 app = Flask(__name__)
-CORS(app)
-socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
+app.config['SECRET_KEY'] = 'your-secret-key'
+socketio = SocketIO(app, cors_allowed_origins="*")
 
-# Socket connection management
-thread_lock = Lock()
-active_clients = {}  # Map client_id to client context
-
-# Audio Utility Functions
-def decode_audio_data(audio_data: str) -> torch.Tensor:
-    """Decode base64 audio data to a torch tensor with improved error handling"""
-    try:
-        # Skip empty audio data
-        if not audio_data or len(audio_data) < 100:
-            logger.warning("Empty or too short audio data received")
-            return torch.zeros(generator.sample_rate // 2)  # 0.5 seconds of silence
-            
-        # Extract the actual base64 content
-        if ',' in audio_data:
-            audio_data = audio_data.split(',')[1]
-            
-        # Decode base64 audio data
-        try:
-            binary_data = base64.b64decode(audio_data)
-            logger.debug(f"Decoded base64 data: {len(binary_data)} bytes")
-            
-            # Check if we have enough data for a valid WAV
-            if len(binary_data) < 44:  # WAV header is 44 bytes
-                logger.warning("Data too small to be a valid WAV file")
-                return torch.zeros(generator.sample_rate // 2)
-        except Exception as e:
-            logger.error(f"Base64 decoding error: {e}")
-            return torch.zeros(generator.sample_rate // 2)
-        
-        # Multiple approaches to handle audio data
-        audio_tensor = None
-        sample_rate = None
-        
-        # Approach 1: Direct loading with torchaudio
-        try:
-            with BytesIO(binary_data) as temp_file:
-                temp_file.seek(0)
-                audio_tensor, sample_rate = torchaudio.load(temp_file, format="wav")
-                logger.debug(f"Loaded audio: shape={audio_tensor.shape}, rate={sample_rate}Hz")
-                
-                # Validate tensor
-                if audio_tensor.numel() == 0 or torch.isnan(audio_tensor).any():
-                    raise ValueError("Invalid audio tensor")
-        except Exception as e:
-            logger.warning(f"Direct loading failed: {e}")
-            
-            # Approach 2: Using wave module and numpy
-            try:
-                temp_path = os.path.join(base_dir, f"temp_{time.time()}.wav")
-                with open(temp_path, 'wb') as f:
-                    f.write(binary_data)
-                
-                import wave
-                with wave.open(temp_path, 'rb') as wf:
-                    n_channels = wf.getnchannels()
-                    sample_width = wf.getsampwidth()
-                    sample_rate = wf.getframerate()
-                    n_frames = wf.getnframes()
-                    frames = wf.readframes(n_frames)
-                    
-                    # Convert to numpy array
-                    if sample_width == 2:  # 16-bit audio
-                        data = np.frombuffer(frames, dtype=np.int16).astype(np.float32) / 32768.0
-                    elif sample_width == 1:  # 8-bit audio
-                        data = np.frombuffer(frames, dtype=np.uint8).astype(np.float32) / 128.0 - 1.0
-                    else:
-                        raise ValueError(f"Unsupported sample width: {sample_width}")
-                    
-                    # Convert to mono if needed
-                    if n_channels > 1:
-                        data = data.reshape(-1, n_channels)
-                        data = data.mean(axis=1)
-                    
-                    # Convert to torch tensor
-                    audio_tensor = torch.from_numpy(data)
-                    logger.info(f"Loaded audio using wave: shape={audio_tensor.shape}")
-                
-                # Clean up temp file
-                if os.path.exists(temp_path):
-                    os.remove(temp_path)
-                    
-            except Exception as e2:
-                logger.error(f"All audio loading methods failed: {e2}")
-                return torch.zeros(generator.sample_rate // 2)
-        
-        # Format corrections
-        if audio_tensor is None:
-            return torch.zeros(generator.sample_rate // 2)
-            
-        # Ensure audio is mono
-        if len(audio_tensor.shape) > 1 and audio_tensor.shape[0] > 1:
-            audio_tensor = torch.mean(audio_tensor, dim=0)
-        
-        # Ensure 1D tensor
-        audio_tensor = audio_tensor.squeeze()
-            
-        # Resample if needed
-        if sample_rate != generator.sample_rate:
-            try:
-                logger.debug(f"Resampling from {sample_rate}Hz to {generator.sample_rate}Hz")
-                resampler = torchaudio.transforms.Resample(
-                    orig_freq=sample_rate, 
-                    new_freq=generator.sample_rate
-                )
-                audio_tensor = resampler(audio_tensor)
-            except Exception as e:
-                logger.warning(f"Resampling error: {e}")
-        
-        # Normalize audio to avoid issues
-        if torch.abs(audio_tensor).max() > 0:
-            audio_tensor = audio_tensor / torch.abs(audio_tensor).max()
-        
-        return audio_tensor
-    except Exception as e:
-        logger.error(f"Unhandled error in decode_audio_data: {e}")
-        return torch.zeros(generator.sample_rate // 2)
-
-def encode_audio_data(audio_tensor: torch.Tensor) -> str:
-    """Encode torch tensor audio to base64 string"""
-    try:
-        buf = BytesIO()
-        torchaudio.save(buf, audio_tensor.unsqueeze(0).cpu(), generator.sample_rate, format="wav")
-        buf.seek(0)
-        audio_base64 = base64.b64encode(buf.read()).decode('utf-8')
-        return f"data:audio/wav;base64,{audio_base64}"
-    except Exception as e:
-        logger.error(f"Error encoding audio: {e}")
-        # Return a minimal silent audio file
-        silence = torch.zeros(generator.sample_rate // 2).unsqueeze(0)
-        buf = BytesIO()
-        torchaudio.save(buf, silence, generator.sample_rate, format="wav")
-        buf.seek(0)
-        return f"data:audio/wav;base64,{base64.b64encode(buf.read()).decode('utf-8')}"
-
-def process_speech(audio_tensor: torch.Tensor, client_id: str) -> str:
-    """Process speech with speech recognition"""
-    if not speech_recognizer:
-        # Fallback to basic detection if model failed to load
-        return detect_speech_energy(audio_tensor)
-    
-    try:
-        # Save audio to temp file for Whisper
-        temp_path = os.path.join(base_dir, f"temp_{time.time()}.wav")
-        torchaudio.save(temp_path, audio_tensor.unsqueeze(0).cpu(), generator.sample_rate)
-        
-        # Perform speech recognition - handle the warning differently
-        # Just pass the path without any additional parameters
-        try:
-            # First try - use default parameters
-            result = speech_recognizer(temp_path)
-            transcription = result["text"]
-        except Exception as whisper_error:
-            logger.warning(f"First transcription attempt failed: {whisper_error}")
-            # Try with explicit parameters for older versions of transformers
-            import numpy as np
-            import soundfile as sf
-            
-            # Load audio as numpy array
-            audio_np, sr = sf.read(temp_path)
-            if sr != 16000:
-                # Whisper expects 16kHz audio
-                from scipy import signal
-                audio_np = signal.resample(audio_np, int(len(audio_np) * 16000 / sr))
-            
-            # Try with numpy array directly
-            result = speech_recognizer(audio_np)
-            transcription = result["text"]
-        
-        # Clean up temp file
-        if os.path.exists(temp_path):
-            os.remove(temp_path)
-        
-        # Return empty string if no speech detected
-        if not transcription or transcription.isspace():
-            return "I didn't detect any speech. Could you please try again?"
-        
-        logger.info(f"Transcription successful: '{transcription}'")
-        return transcription
-        
-    except Exception as e:
-        logger.error(f"Speech recognition error: {e}")
-        return "Sorry, I couldn't understand what you said. Could you try again?"
-
-def detect_speech_energy(audio_tensor: torch.Tensor) -> str:
-    """Basic speech detection based on audio energy levels"""
-    # Calculate audio energy
-    energy = torch.mean(torch.abs(audio_tensor)).item()
-    
-    logger.debug(f"Audio energy detected: {energy:.6f}")
-    
-    # Generate response based on energy level
-    if energy > 0.1:  # Louder speech
-        return "I heard you speaking clearly. How can I help you today?"
-    elif energy > 0.05:  # Moderate speech
-        return "I heard you say something. Could you please repeat that?"
-    elif energy > 0.02:  # Soft speech
-        return "I detected some speech, but it was quite soft. Could you speak up a bit?"
-    else:  # Very soft or no speech
-        return "I didn't detect any speech. Could you please try again?"
-
-def generate_response(text: str, conversation_history: List[Segment]) -> str:
-    """Generate a contextual response based on the transcribed text using Llama 3.2"""
-    # If LLM is not available, use simple responses
-    if llm is None:
-        return generate_simple_response(text)
-    
-    try:
-        # Create a conversational prompt based on history
-        # Format: recent conversation turns (up to 4) + current user input
-        history_str = ""
-        
-        # Add up to 4 recent conversation turns (excluding the current one)
-        recent_segments = [
-            seg for seg in conversation_history[-8:] 
-            if seg.text and not seg.text.isspace()
-        ]
-        
-        for i, segment in enumerate(recent_segments):
-            speaker_name = "User" if segment.speaker == 0 else "Assistant"
-            history_str += f"{speaker_name}: {segment.text}\n"
-            
-        # Construct the prompt for Llama 3.2
-        prompt = f"""<|system|>
-You are Sesame, a helpful, friendly and concise voice assistant. 
-Keep your responses conversational, natural, and to the point.
-Respond to the user's latest message in the context of the conversation.
-<|end|>
-
-{history_str}
-User: {text}
-Assistant:"""
-
-        logger.debug(f"LLM Prompt: {prompt}")
-        
-        # Generate response with the LLM
-        result = llm(
-            prompt,
-            max_new_tokens=150,
-            do_sample=True,
-            temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.1
-        )
-        
-        # Extract the generated text
-        response = result[0]["generated_text"]
-        
-        # Extract just the Assistant's response (after the prompt)
-        response = response.split("Assistant:")[-1].strip()
-        
-        # Clean up and ensure it's not too long for TTS
-        response = response.split("\n")[0].strip()
-        if len(response) > 200:
-            response = response[:197] + "..."
-            
-        logger.info(f"LLM response: {response}")
-        return response
-        
-    except Exception as e:
-        logger.error(f"Error generating LLM response: {e}")
-        # Fall back to simple responses
-        return generate_simple_response(text)
-
-def generate_simple_response(text: str) -> str:
-    """Generate a simple rule-based response as fallback"""
-    responses = {
-        "hello": "Hello there! How can I help you today?",
-        "hi": "Hi there! What can I do for you?",
-        "how are you": "I'm doing well, thanks for asking! How about you?",
-        "what is your name": "I'm Sesame, your voice assistant. How can I help you?",
-        "who are you": "I'm Sesame, an AI voice assistant. I'm here to chat with you!",
-        "bye": "Goodbye! It was nice chatting with you.",
-        "thank you": "You're welcome! Is there anything else I can help with?",
-        "weather": "I don't have real-time weather data, but I hope it's nice where you are!",
-        "help": "I can chat with you using natural voice. Just speak normally and I'll respond.",
-        "what can you do": "I can have a conversation with you, answer questions, and provide assistance with various topics.",
-    }
-    
-    text_lower = text.lower()
-    
-    # Check for matching keywords
-    for key, response in responses.items():
-        if key in text_lower:
-            return response
-    
-    # Default responses based on text length
-    if not text:
-        return "I didn't catch that. Could you please repeat?"
-    elif len(text) < 10:
-        return "Thanks for your message. Could you elaborate a bit more?"
+# Check for CUDA availability and handle potential CUDA/cuDNN issues
+try:
+    cuda_available = torch.cuda.is_available()
+    # Try to initialize CUDA to check if libraries are properly loaded
+    if cuda_available:
+        _ = torch.zeros(1).cuda()
+        device = "cuda"
+        whisper_compute_type = "float16"
+        print("CUDA is available and initialized successfully")
+    elif torch.backends.mps.is_available():
+        device = "mps"
+        whisper_compute_type = "float32"
+        print("MPS is available (Apple Silicon)")
     else:
-        return f"I heard you say something about that. Can you tell me more?"
+        device = "cpu"
+        whisper_compute_type = "int8"
+        print("Using CPU (CUDA/MPS not available)")
+except Exception as e:
+    print(f"Error initializing CUDA: {e}")
+    print("Falling back to CPU")
+    device = "cpu"
+    whisper_compute_type = "int8"
+    
+print(f"Using device: {device}")
+
+# Initialize models with proper error handling
+whisper_model = None
+csm_generator = None
+llm_model = None
+llm_tokenizer = None
+
+def load_models():
+    global whisper_model, csm_generator, llm_model, llm_tokenizer
+    
+    # Initialize Faster-Whisper for transcription
+    try:
+        print("Loading Whisper model...")
+        # Import here to avoid immediate import errors if package is missing
+        from faster_whisper import WhisperModel
+        # Force CPU for Whisper if we had CUDA issues
+        whisper_device = device if device != "cpu" else "cpu"
+        whisper_model = WhisperModel("base", device=whisper_device, compute_type=whisper_compute_type, download_root="./models/whisper")
+        print("Whisper model loaded successfully")
+    except Exception as e:
+        print(f"Error loading Whisper model: {e}")
+        print("Will use backup speech recognition method if available")
+    
+    # Initialize CSM model for audio generation
+    try:
+        print("Loading CSM model...")
+        # Force CPU for CSM if we had CUDA issues
+        csm_device = device if device != "cpu" else "cpu"
+        csm_generator = load_csm_1b(device=csm_device)
+        print("CSM model loaded successfully")
+    except Exception as e:
+        print(f"Error loading CSM model: {e}")
+        print("Audio generation will not be available")
+    
+    # Initialize Llama 3.2 model for response generation
+    try:
+        print("Loading Llama 3.2 model...")
+        llm_model_id = "meta-llama/Llama-3.2-1B"  # Choose appropriate size based on resources
+        llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_id, cache_dir="./models/llama")
+        # Force CPU for LLM if we had CUDA issues
+        llm_device = device if device != "cpu" else "cpu"
+        llm_model = AutoModelForCausalLM.from_pretrained(
+            llm_model_id,
+            torch_dtype=torch.bfloat16 if llm_device != "cpu" else torch.float32,
+            device_map=llm_device,
+            cache_dir="./models/llama",
+            low_cpu_mem_usage=True
+        )
+        print("Llama 3.2 model loaded successfully")
+    except Exception as e:
+        print(f"Error loading Llama 3.2 model: {e}")
+        print("Will use a fallback response generation method")
+
+# Store conversation context
+conversation_context = {}  # session_id -> context
 
-# Flask Routes
 @app.route('/')
 def index():
-    return send_from_directory(base_dir, 'index.html')
+    return render_template('index.html')
 
-@app.route('/favicon.ico')
-def favicon():
-    if os.path.exists(os.path.join(static_dir, 'favicon.ico')):
-        return send_from_directory(static_dir, 'favicon.ico')
-    return Response(status=204)
-
-@app.route('/voice-chat.js')
-def voice_chat_js():
-    return send_from_directory(base_dir, 'voice-chat.js')
-
-@app.route('/static/<path:path>')
-def serve_static(path):
-    return send_from_directory(static_dir, path)
-
-# Socket.IO Event Handlers
 @socketio.on('connect')
 def handle_connect():
-    client_id = request.sid
-    logger.info(f"Client connected: {client_id}")
-    
-    # Initialize client context
-    active_clients[client_id] = {
-        'context_segments': [],
-        'streaming_buffer': [],
-        'is_streaming': False,
-        'is_silence': False,
-        'last_active_time': time.time(),
-        'energy_window': deque(maxlen=10)
+    print(f"Client connected: {request.sid}")
+    conversation_context[request.sid] = {
+        'segments': [],
+        'speakers': [0, 1],  # 0 = user, 1 = bot
+        'audio_buffer': deque(maxlen=10),  # Store recent audio chunks
+        'is_speaking': False,
+        'silence_start': None
     }
-    
-    emit('status', {'type': 'connected', 'message': 'Connected to server'})
+    emit('ready', {'message': 'Connection established'})
 
 @socketio.on('disconnect')
 def handle_disconnect():
-    client_id = request.sid
-    if client_id in active_clients:
-        del active_clients[client_id]
-    logger.info(f"Client disconnected: {client_id}")
+    print(f"Client disconnected: {request.sid}")
+    if request.sid in conversation_context:
+        del conversation_context[request.sid]
 
-@socketio.on('generate')
-def handle_generate(data):
-    client_id = request.sid
-    if client_id not in active_clients:
-        emit('error', {'message': 'Client not registered'})
+@socketio.on('start_speaking')
+def handle_start_speaking():
+    if request.sid in conversation_context:
+        conversation_context[request.sid]['is_speaking'] = True
+        conversation_context[request.sid]['audio_buffer'].clear()
+        print(f"User {request.sid} started speaking")
+
+@socketio.on('audio_chunk')
+def handle_audio_chunk(data):
+    if request.sid not in conversation_context:
         return
     
-    try:
-        text = data.get('text', '')
-        speaker_id = data.get('speaker', 0)
-        
-        logger.info(f"Generating audio for: '{text}' with speaker {speaker_id}")
-        
-        # Generate audio response
-        audio_tensor = generator.generate(
-            text=text,
-            speaker=speaker_id,
-            context=active_clients[client_id]['context_segments'],
-            max_audio_length_ms=10_000,
-        )
-        
-        # Add to conversation context
-        active_clients[client_id]['context_segments'].append(
-            Segment(text=text, speaker=speaker_id, audio=audio_tensor)
-        )
-        
-        # Convert audio to base64 and send back to client
-        audio_base64 = encode_audio_data(audio_tensor)
-        emit('audio_response', {
-            'type': 'audio_response',
-            'audio': audio_base64,
-            'text': text
-        })
-        
-    except Exception as e:
-        logger.error(f"Error generating audio: {e}")
-        emit('error', {
-            'type': 'error',
-            'message': f"Error generating audio: {str(e)}"
-        })
+    context = conversation_context[request.sid]
+    
+    # Decode audio data
+    audio_data = base64.b64decode(data['audio'])
+    audio_numpy = np.frombuffer(audio_data, dtype=np.float32)
+    audio_tensor = torch.tensor(audio_numpy)
+    
+    # Add to buffer
+    context['audio_buffer'].append(audio_tensor)
+    
+    # Check for silence to detect end of speech
+    if context['is_speaking'] and is_silence(audio_tensor):
+        if context['silence_start'] is None:
+            context['silence_start'] = time.time()
+        elif time.time() - context['silence_start'] > 1.0:  # 1 second of silence
+            # Process the complete utterance
+            process_user_utterance(request.sid)
+    else:
+        context['silence_start'] = None
 
-@socketio.on('add_to_context')
-def handle_add_to_context(data):
-    client_id = request.sid
-    if client_id not in active_clients:
-        emit('error', {'message': 'Client not registered'})
+@socketio.on('stop_speaking')
+def handle_stop_speaking():
+    if request.sid in conversation_context:
+        conversation_context[request.sid]['is_speaking'] = False
+        process_user_utterance(request.sid)
+        print(f"User {request.sid} stopped speaking")
+
+def is_silence(audio_tensor, threshold=0.02):
+    """Check if an audio chunk is silence based on amplitude threshold"""
+    return torch.mean(torch.abs(audio_tensor)) < threshold
+
+def process_user_utterance(session_id):
+    """Process completed user utterance, generate response and send audio back"""
+    context = conversation_context[session_id]
+    
+    if not context['audio_buffer']:
         return
     
-    try:
-        text = data.get('text', '')
-        speaker_id = data.get('speaker', 0)
-        audio_data = data.get('audio', '')
-        
-        # Convert received audio to tensor
-        audio_tensor = decode_audio_data(audio_data)
-        
-        # Add to conversation context
-        active_clients[client_id]['context_segments'].append(
-            Segment(text=text, speaker=speaker_id, audio=audio_tensor)
-        )
-        
-        emit('context_updated', {
-            'type': 'context_updated',
-            'message': 'Audio added to context'
-        })
-        
-    except Exception as e:
-        logger.error(f"Error adding to context: {e}")
-        emit('error', {
-            'type': 'error',
-            'message': f"Error processing audio: {str(e)}"
-        })
-
-@socketio.on('clear_context')
-def handle_clear_context():
-    client_id = request.sid
-    if client_id in active_clients:
-        active_clients[client_id]['context_segments'] = []
-        
-    emit('context_updated', {
-        'type': 'context_updated',
-        'message': 'Context cleared'
-    })
-
-@socketio.on('stream_audio')
-def handle_stream_audio(data):
-    client_id = request.sid
-    if client_id not in active_clients:
-        emit('error', {'message': 'Client not registered'})
-        return
+    # Combine audio chunks
+    full_audio = torch.cat(list(context['audio_buffer']), dim=0)
+    context['audio_buffer'].clear()
+    context['is_speaking'] = False
+    context['silence_start'] = None
     
-    client = active_clients[client_id]
+    # Save audio to temporary WAV file for transcription
+    temp_audio_path = f"temp_audio_{session_id}.wav"
+    torchaudio.save(
+        temp_audio_path, 
+        full_audio.unsqueeze(0), 
+        44100  # Assuming 44.1kHz from client
+    )
     
     try:
-        speaker_id = data.get('speaker', 0)
-        audio_data = data.get('audio', '')
-        
-        # Skip if no audio data (might be just a connection test)
-        if not audio_data:
-            logger.debug("Empty audio data received, ignoring")
-            return
-        
-        # Convert received audio to tensor
-        audio_chunk = decode_audio_data(audio_data)
-        
-        # Start streaming mode if not already started
-        if not client['is_streaming']:
-            client['is_streaming'] = True
-            client['streaming_buffer'] = []
-            client['energy_window'].clear()
-            client['is_silence'] = False
-            client['last_active_time'] = time.time()
-            logger.info(f"[{client_id[:8]}] Streaming started with speaker ID: {speaker_id}")
-            emit('streaming_status', {
-                'type': 'streaming_status',
-                'status': 'started'
-            })
-        
-        # Calculate audio energy for silence detection
-        chunk_energy = torch.mean(torch.abs(audio_chunk)).item()
-        client['energy_window'].append(chunk_energy)
-        avg_energy = sum(client['energy_window']) / len(client['energy_window'])
-        
-        # Check if audio is silent
-        current_silence = avg_energy < SILENCE_THRESHOLD
-        
-        # Track silence transition
-        if not client['is_silence'] and current_silence:
-            # Transition to silence
-            client['is_silence'] = True
-            client['last_active_time'] = time.time()
-        elif client['is_silence'] and not current_silence:
-            # User started talking again
-            client['is_silence'] = False
-        
-        # Add chunk to buffer regardless of silence state
-        client['streaming_buffer'].append(audio_chunk)
-            
-        # Check if silence has persisted long enough to consider "stopped talking"
-        silence_elapsed = time.time() - client['last_active_time']
-        
-        if client['is_silence'] and silence_elapsed >= SILENCE_DURATION_SEC and len(client['streaming_buffer']) > 0:
-            # User has stopped talking - process the collected audio
-            logger.info(f"[{client_id[:8]}] Processing audio after {silence_elapsed:.2f}s of silence")
-            process_complete_utterance(client_id, client, speaker_id)
-        
-        # If buffer gets too large without silence, process it anyway
-        elif len(client['streaming_buffer']) >= MAX_BUFFER_SIZE:
-            logger.info(f"[{client_id[:8]}] Processing long audio segment without silence")
-            process_complete_utterance(client_id, client, speaker_id, is_incomplete=True)
-            
-            # Keep half of the buffer for context (sliding window approach)
-            half_point = len(client['streaming_buffer']) // 2
-            client['streaming_buffer'] = client['streaming_buffer'][half_point:]
-            
-    except Exception as e:
-        import traceback
-        traceback.print_exc()
-        logger.error(f"Error processing streaming audio: {e}")
-        emit('error', {
-            'type': 'error',
-            'message': f"Error processing streaming audio: {str(e)}"
-        })
-
-def process_complete_utterance(client_id, client, speaker_id, is_incomplete=False):
-    """Process a complete utterance (after silence or buffer limit)"""
-    try:
-        # Combine audio chunks
-        full_audio = torch.cat(client['streaming_buffer'], dim=0)
-        
-        # Process audio to generate a response (using speech recognition)
-        generated_text = process_speech(full_audio, client_id)
-        
-        # Add suffix for incomplete utterances
-        if is_incomplete:
-            generated_text += " (processing continued speech...)"
-        
-        # Log the generated text
-        logger.info(f"[{client_id[:8]}] Generated text: '{generated_text}'")
-        
-        # Handle the result
-        if generated_text:
-            # Add user message to context
-            user_segment = Segment(text=generated_text, speaker=speaker_id, audio=full_audio)
-            client['context_segments'].append(user_segment)
-            
-            # Send the text to client
-            emit('transcription', {
-                'type': 'transcription',
-                'text': generated_text
-            }, room=client_id)
-            
-            # Only generate a response if this is a complete utterance
-            if not is_incomplete:
-                # Generate a contextual response
-                response_text = generate_response(generated_text, client['context_segments'])
-                logger.info(f"[{client_id[:8]}] Generating response: '{response_text}'")
-                
-                # Let the client know we're processing
-                emit('processing_status', {
-                    'type': 'processing_status',
-                    'status': 'generating_audio',
-                    'message': 'Generating audio response...'
-                }, room=client_id)
-                
-                # Generate audio for the response
-                try:
-                    # Use a different speaker than the user
-                    ai_speaker_id = 1 if speaker_id == 0 else 0
-                    
-                    # Generate the full response
-                    audio_tensor = generator.generate(
-                        text=response_text,
-                        speaker=ai_speaker_id,
-                        context=client['context_segments'],
-                        max_audio_length_ms=10_000,
-                    )
-                    
-                    # Add response to context
-                    ai_segment = Segment(
-                        text=response_text, 
-                        speaker=ai_speaker_id, 
-                        audio=audio_tensor
-                    )
-                    client['context_segments'].append(ai_segment)
-                    
-                    # CHANGE HERE: Use the streaming function instead of sending all at once
-                    # Check if the audio is short enough to send at once or if it should be streamed
-                    if audio_tensor.size(0) < generator.sample_rate * 2:  # Less than 2 seconds
-                        # For short responses, just send in one go for better responsiveness
-                        audio_base64 = encode_audio_data(audio_tensor)
-                        emit('audio_response', {
-                            'type': 'audio_response',
-                            'text': response_text,
-                            'audio': audio_base64
-                        }, room=client_id)
-                        logger.info(f"[{client_id[:8]}] Short audio response sent in one piece")
-                    else:
-                        # For longer responses, use streaming
-                        logger.info(f"[{client_id[:8]}] Using streaming for audio response")
-                        # Start a new thread for streaming to avoid blocking the main thread
-                        import threading
-                        stream_thread = threading.Thread(
-                            target=stream_audio_to_client,
-                            args=(client_id, audio_tensor, response_text, ai_speaker_id)
-                        )
-                        stream_thread.start()
-                        
-                except Exception as e:
-                    logger.error(f"Error generating audio response: {e}")
-                    emit('error', {
-                        'type': 'error',
-                        'message': "Sorry, there was an error generating the audio response."
-                    }, room=client_id)
+        # Try using Whisper first if available
+        if whisper_model is not None:
+            user_text = transcribe_with_whisper(temp_audio_path)
         else:
-            # If processing failed, send a notification
-            emit('error', {
-                'type': 'error',
-                'message': "Sorry, I couldn't understand what you said. Could you try again?"
-            }, room=client_id)
+            # Fallback to Google's speech recognition
+            user_text = transcribe_with_google(temp_audio_path)
         
-        # Only clear buffer for complete utterances
-        if not is_incomplete:
-            # Reset state
-            client['streaming_buffer'] = []
-            client['energy_window'].clear()
-            client['is_silence'] = False
-            client['last_active_time'] = time.time()
-            
-    except Exception as e:
-        logger.error(f"Error processing utterance: {e}")
-        emit('error', {
-            'type': 'error',
-            'message': f"Error processing audio: {str(e)}"
-        }, room=client_id)
-
-@socketio.on('stop_streaming')
-def handle_stop_streaming(data):
-    client_id = request.sid
-    if client_id not in active_clients:
-        return
-    
-    client = active_clients[client_id]
-    client['is_streaming'] = False
-    
-    if client['streaming_buffer'] and len(client['streaming_buffer']) > 5:
-        # Process any remaining audio in the buffer
-        logger.info(f"[{client_id[:8]}] Processing final audio buffer on stop")
-        process_complete_utterance(client_id, client, data.get("speaker", 0))
-    
-    client['streaming_buffer'] = []
-    emit('streaming_status', {
-        'type': 'streaming_status',
-        'status': 'stopped'
-    })
-
-def stream_audio_to_client(client_id, audio_tensor, text, speaker_id, chunk_size_ms=CHUNK_SIZE_MS):
-    """Stream audio to client in chunks to simulate real-time generation"""
-    try:
-        if client_id not in active_clients:
-            logger.warning(f"Client {client_id} not found for streaming")
+        if not user_text:
+            print("No speech detected.")
+            emit('error', {'message': 'No speech detected. Please try again.'}, room=session_id)
             return
             
-        # Calculate chunk size in samples
-        chunk_size = int(generator.sample_rate * chunk_size_ms / 1000)
-        total_chunks = math.ceil(audio_tensor.size(0) / chunk_size)
+        print(f"Transcribed: {user_text}")
         
-        logger.info(f"Streaming audio in {total_chunks} chunks of {chunk_size_ms}ms each")
+        # Add to conversation segments
+        user_segment = Segment(
+            text=user_text,
+            speaker=0,  # User is speaker 0
+            audio=full_audio
+        )
+        context['segments'].append(user_segment)
         
-        # Send initial response with text but no audio yet
-        socketio.emit('audio_response_start', {
-            'type': 'audio_response_start',
-            'text': text,
-            'total_chunks': total_chunks
-        }, room=client_id)
+        # Generate bot response
+        bot_response = generate_llm_response(user_text, context['segments'])
+        print(f"Bot response: {bot_response}")
         
-        # Stream each chunk
-        for i in range(total_chunks):
-            start_idx = i * chunk_size
-            end_idx = min(start_idx + chunk_size, audio_tensor.size(0))
-            
-            # Extract chunk
-            chunk = audio_tensor[start_idx:end_idx]
-            
-            # Encode chunk
-            chunk_base64 = encode_audio_data(chunk)
-            
-            # Send chunk
-            socketio.emit('audio_response_chunk', {
-                'type': 'audio_response_chunk',
-                'chunk_index': i,
-                'total_chunks': total_chunks,
-                'audio': chunk_base64,
-                'is_last': i == total_chunks - 1
-            }, room=client_id)
-            
-            # Brief pause between chunks to simulate streaming
-            time.sleep(0.1)
-            
-        # Send completion message
-        socketio.emit('audio_response_complete', {
-            'type': 'audio_response_complete',
-            'text': text
-        }, room=client_id)
+        # Send transcribed text to client
+        emit('transcription', {'text': user_text}, room=session_id)
         
-        logger.info(f"Audio streaming complete: {total_chunks} chunks sent")
+        # Generate and send audio response if CSM is available
+        if csm_generator is not None:
+            # Convert to audio using CSM
+            bot_audio = generate_audio_response(bot_response, context['segments'])
+            
+            # Convert audio to base64 for sending over websocket
+            audio_bytes = io.BytesIO()
+            torchaudio.save(audio_bytes, bot_audio.unsqueeze(0).cpu(), csm_generator.sample_rate, format="wav")
+            audio_bytes.seek(0)
+            audio_b64 = base64.b64encode(audio_bytes.read()).decode('utf-8')
+            
+            # Add bot response to conversation history
+            bot_segment = Segment(
+                text=bot_response,
+                speaker=1,  # Bot is speaker 1
+                audio=bot_audio
+            )
+            context['segments'].append(bot_segment)
+            
+            # Send audio response to client
+            emit('audio_response', {
+                'audio': audio_b64,
+                'text': bot_response
+            }, room=session_id)
+        else:
+            # Send text-only response if audio generation isn't available
+            emit('text_response', {'text': bot_response}, room=session_id)
+            
+            # Add text-only bot response to conversation history
+            bot_segment = Segment(
+                text=bot_response,
+                speaker=1,  # Bot is speaker 1
+                audio=torch.zeros(1)  # Placeholder empty audio
+            )
+            context['segments'].append(bot_segment)
         
     except Exception as e:
-        logger.error(f"Error streaming audio to client: {e}")
-        import traceback
-        traceback.print_exc()
+        print(f"Error processing speech: {e}")
+        emit('error', {'message': f'Error processing speech: {str(e)}'}, room=session_id)
+    finally:
+        # Cleanup temp file
+        if os.path.exists(temp_audio_path):
+            os.remove(temp_audio_path)
 
-# Main server start
-if __name__ == "__main__":
-    print(f"\n{'='*60}")
-    print(f"🔊 Sesame AI Voice Chat Server")
-    print(f"{'='*60}")
-    print(f"📡 Server Information:")
-    print(f"   - Local URL: http://localhost:5000")
-    print(f"   - Network URL: http://<your-ip-address>:5000")
-    print(f"{'='*60}")
-    print(f"🌐 Device: {device.upper()}")
-    print(f"🧠 Models: Sesame CSM (TTS only)")
-    print(f"🔧 Serving from: {os.path.join(base_dir, 'index.html')}")
-    print(f"{'='*60}")
-    print(f"Ready to receive connections! Press Ctrl+C to stop the server.\n")
+def transcribe_with_whisper(audio_path):
+    """Transcribe audio using Faster-Whisper"""
+    segments, info = whisper_model.transcribe(audio_path, beam_size=5)
     
-    socketio.run(app, host="0.0.0.0", port=5000, debug=False)
\ No newline at end of file
+    # Collect all text from segments
+    user_text = ""
+    for segment in segments:
+        segment_text = segment.text.strip()
+        print(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment_text}")
+        user_text += segment_text + " "
+    
+    return user_text.strip()
+
+def transcribe_with_google(audio_path):
+    """Fallback transcription using Google's speech recognition"""
+    import speech_recognition as sr
+    recognizer = sr.Recognizer()
+    
+    with sr.AudioFile(audio_path) as source:
+        audio = recognizer.record(source)
+        try:
+            text = recognizer.recognize_google(audio)
+            return text
+        except sr.UnknownValueError:
+            return ""
+        except sr.RequestError:
+            # If Google API fails, try a basic energy-based VAD approach
+            # This is a very basic fallback and won't give good results
+            return "[Speech detected but transcription failed]"
+
+def generate_llm_response(user_text, conversation_segments):
+    """Generate text response using available model"""
+    if llm_model is not None and llm_tokenizer is not None:
+        # Format conversation history for the LLM
+        conversation_history = ""
+        for segment in conversation_segments[-5:]:  # Use last 5 utterances for context
+            speaker_name = "User" if segment.speaker == 0 else "Assistant"
+            conversation_history += f"{speaker_name}: {segment.text}\n"
+        
+        # Add the current user query
+        conversation_history += f"User: {user_text}\nAssistant:"
+        
+        try:
+            # Generate response
+            inputs = llm_tokenizer(conversation_history, return_tensors="pt").to(device)
+            output = llm_model.generate(
+                inputs.input_ids, 
+                max_new_tokens=150,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True
+            )
+            
+            response = llm_tokenizer.decode(output[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+            return response.strip()
+        except Exception as e:
+            print(f"Error generating response with LLM: {e}")
+            return fallback_response(user_text)
+    else:
+        return fallback_response(user_text)
+
+def fallback_response(user_text):
+    """Generate a simple fallback response when LLM is not available"""
+    # Simple rule-based responses
+    user_text_lower = user_text.lower()
+    
+    if "hello" in user_text_lower or "hi" in user_text_lower:
+        return "Hello! I'm a simple fallback assistant. The main language model couldn't be loaded, so I have limited capabilities."
+    
+    elif "how are you" in user_text_lower:
+        return "I'm functioning within my limited capabilities. How can I assist you today?"
+    
+    elif "thank" in user_text_lower:
+        return "You're welcome! Let me know if there's anything else I can help with."
+    
+    elif "bye" in user_text_lower or "goodbye" in user_text_lower:
+        return "Goodbye! Have a great day!"
+    
+    elif any(q in user_text_lower for q in ["what", "who", "where", "when", "why", "how"]):
+        return "I'm running in fallback mode and can't answer complex questions. Please try again when the main language model is available."
+        
+    else:
+        return "I understand you said something about that. Unfortunately, I'm running in fallback mode with limited capabilities. Please try again later when the main model is available."
+
+def generate_audio_response(text, conversation_segments):
+    """Generate audio response using CSM"""
+    try:
+        # Use the last few conversation segments as context
+        context_segments = conversation_segments[-4:] if len(conversation_segments) > 4 else conversation_segments
+        
+        # Generate audio for bot response
+        audio = csm_generator.generate(
+            text=text,
+            speaker=1,  # Bot is speaker 1
+            context=context_segments,
+            max_audio_length_ms=10000,  # 10 seconds max
+            temperature=0.9,
+            topk=50
+        )
+        
+        return audio
+    except Exception as e:
+        print(f"Error generating audio: {e}")
+        # Return silence as fallback
+        return torch.zeros(csm_generator.sample_rate * 3)  # 3 seconds of silence
+
+if __name__ == '__main__':
+    # Ensure the existing index.html file is in the correct location
+    if not os.path.exists('templates'):
+        os.makedirs('templates')
+    
+    if os.path.exists('index.html') and not os.path.exists('templates/index.html'):
+        os.rename('index.html', 'templates/index.html')
+    
+    # Load models asynchronously before starting the server
+    print("Starting model loading...")
+    # In a production environment, you could load models in a separate thread
+    load_models()
+    
+    # Start the server
+    print("Starting Flask SocketIO server...")
+    socketio.run(app, host='0.0.0.0', port=5000, debug=False)
\ No newline at end of file
diff --git a/Backend/setup.py b/Backend/setup.py
deleted file mode 100644
index 8eddb95..0000000
--- a/Backend/setup.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from setuptools import setup, find_packages
-import os
-
-# Read requirements from requirements.txt
-with open('requirements.txt') as f:
-    requirements = [line.strip() for line in f if line.strip() and not line.startswith('#')]
-
-setup(
-    name='csm',
-    version='0.1.0',
-    packages=find_packages(),
-    install_requires=requirements,
-)
diff --git a/Backend/test.py b/Backend/test.py
deleted file mode 100644
index 34735b1..0000000
--- a/Backend/test.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import os
-import torch
-import torchaudio
-from huggingface_hub import hf_hub_download
-from generator import load_csm_1b, Segment
-from dataclasses import dataclass
-
-if torch.backends.mps.is_available():
-    device = "mps"
-elif torch.cuda.is_available():
-    device = "cuda"
-else:
-    device = "cpu"
-
-generator = load_csm_1b(device=device)
-
-speakers = [0, 1, 0, 0]
-transcripts = [
-    "Hey how are you doing.",
-    "Pretty good, pretty good.",
-    "I'm great.",
-    "So happy to be speaking to you.",
-]
-audio_paths = [
-    "utterance_0.wav",
-    "utterance_1.wav",
-    "utterance_2.wav",
-    "utterance_3.wav",
-]
-
-def load_audio(audio_path):
-    audio_tensor, sample_rate = torchaudio.load(audio_path)
-    audio_tensor = torchaudio.functional.resample(
-        audio_tensor.squeeze(0), orig_freq=sample_rate, new_freq=generator.sample_rate
-    )
-    return audio_tensor
-
-segments = [
-    Segment(text=transcript, speaker=speaker, audio=load_audio(audio_path))
-    for transcript, speaker, audio_path in zip(transcripts, speakers, audio_paths)
-]
-
-audio = generator.generate(
-    text="Me too, this is some cool stuff huh?",
-    speaker=1,
-    context=segments,
-    max_audio_length_ms=10_000,
-)
-
-torchaudio.save("audio.wav", audio.unsqueeze(0).cpu(), generator.sample_rate)
\ No newline at end of file
diff --git a/Backend/voice-chat.js b/Backend/voice-chat.js
deleted file mode 100644
index 89ec71a..0000000
--- a/Backend/voice-chat.js
+++ /dev/null
@@ -1,1071 +0,0 @@
-/**
- * Sesame AI Voice Chat Client
- * 
- * A web client that connects to a Sesame AI voice chat server and enables 
- * real-time voice conversation with an AI assistant.
- */
-
-// Configuration constants
-const SERVER_URL = window.location.hostname === 'localhost' ? 
-    'http://localhost:5000' : window.location.origin;
-const ENERGY_WINDOW_SIZE = 15;
-const CLIENT_SILENCE_DURATION_MS = 750;
-
-// DOM elements
-const elements = {
-    conversation: null,
-    streamButton: null,
-    clearButton: null,
-    thresholdSlider: null,
-    thresholdValue: null,
-    visualizerCanvas: null,
-    visualizerLabel: null,
-    volumeLevel: null,
-    statusDot: null,
-    statusText: null,
-    speakerSelection: null,
-    autoPlayResponses: null,
-    showVisualizer: null
-};
-
-// Application state
-const state = {
-    socket: null,
-    audioContext: null,
-    analyser: null,
-    microphone: null,
-    streamProcessor: null,
-    isStreaming: false,
-    isSpeaking: false,
-    silenceThreshold: 0.01,
-    energyWindow: [],
-    silenceTimer: null,
-    volumeUpdateInterval: null,
-    visualizerAnimationFrame: null,
-    currentSpeaker: 0
-};
-
-// Visualizer variables
-let canvasContext = null;
-let visualizerBufferLength = 0;
-let visualizerDataArray = null;
-
-// New state variables to track incremental audio streaming
-const streamingAudio = {
-    messageElement: null,
-    audioElement: null,
-    chunks: [],
-    totalChunks: 0,
-    receivedChunks: 0,
-    text: '',
-    mediaSource: null,
-    sourceBuffer: null,
-    audioContext: null,
-    complete: false
-};
-
-// Initialize the application
-function initializeApp() {
-    // Initialize the UI elements
-    initializeUIElements();
-    
-    // Initialize socket.io connection
-    setupSocketConnection();
-    
-    // Setup event listeners
-    setupEventListeners();
-    
-    // Initialize visualizer
-    setupVisualizer();
-    
-    // Show welcome message
-    addSystemMessage('Welcome to Sesame AI Voice Chat! Click "Start Conversation" to begin.');
-}
-
-// Initialize UI elements
-function initializeUIElements() {
-    // Store references to UI elements
-    elements.conversation = document.getElementById('conversation');
-    elements.streamButton = document.getElementById('streamButton');
-    elements.clearButton = document.getElementById('clearButton');
-    elements.thresholdSlider = document.getElementById('thresholdSlider');
-    elements.thresholdValue = document.getElementById('thresholdValue');
-    elements.visualizerCanvas = document.getElementById('audioVisualizer');
-    elements.visualizerLabel = document.getElementById('visualizerLabel');
-    elements.volumeLevel = document.getElementById('volumeLevel');
-    elements.statusDot = document.getElementById('statusDot');
-    elements.statusText = document.getElementById('statusText');
-    elements.speakerSelection = document.getElementById('speakerSelect'); // Changed to match HTML
-    elements.autoPlayResponses = document.getElementById('autoPlayResponses');
-    elements.showVisualizer = document.getElementById('showVisualizer');
-}
-
-// Setup Socket.IO connection
-function setupSocketConnection() {
-    state.socket = io(SERVER_URL);
-    
-    // Connection events
-    state.socket.on('connect', () => {
-        console.log('Connected to server');
-        updateConnectionStatus(true);
-    });
-    
-    state.socket.on('disconnect', () => {
-        console.log('Disconnected from server');
-        updateConnectionStatus(false);
-        
-        // Stop streaming if active
-        if (state.isStreaming) {
-            stopStreaming(false);
-        }
-    });
-    
-    state.socket.on('error', (data) => {
-        console.error('Socket error:', data.message);
-        addSystemMessage(`Error: ${data.message}`);
-    });
-    
-    // Register message handlers
-    state.socket.on('audio_response', handleAudioResponse);
-    state.socket.on('transcription', handleTranscription);
-    state.socket.on('context_updated', handleContextUpdate);
-    state.socket.on('streaming_status', handleStreamingStatus);
-    
-    // New event handlers for incremental audio streaming
-    state.socket.on('audio_response_start', handleAudioResponseStart);
-    state.socket.on('audio_response_chunk', handleAudioResponseChunk);
-    state.socket.on('audio_response_complete', handleAudioResponseComplete);
-    state.socket.on('processing_status', handleProcessingStatus);
-}
-
-// Setup event listeners
-function setupEventListeners() {
-    // Stream button
-    elements.streamButton.addEventListener('click', toggleStreaming);
-    
-    // Clear button
-    elements.clearButton.addEventListener('click', clearConversation);
-    
-    // Threshold slider
-    elements.thresholdSlider.addEventListener('input', updateThreshold);
-    
-    // Speaker selection
-    elements.speakerSelection.addEventListener('change', () => {
-        state.currentSpeaker = parseInt(elements.speakerSelection.value, 10);
-    });
-    
-    // Visualizer toggle
-    elements.showVisualizer.addEventListener('change', toggleVisualizerVisibility);
-}
-
-// Setup audio visualizer
-function setupVisualizer() {
-    if (!elements.visualizerCanvas) return;
-    
-    canvasContext = elements.visualizerCanvas.getContext('2d');
-    
-    // Set canvas dimensions
-    elements.visualizerCanvas.width = elements.visualizerCanvas.offsetWidth;
-    elements.visualizerCanvas.height = elements.visualizerCanvas.offsetHeight;
-    
-    // Initialize the visualizer
-    drawVisualizer();
-}
-
-// Update connection status UI
-function updateConnectionStatus(isConnected) {
-    elements.statusDot.classList.toggle('active', isConnected);
-    elements.statusText.textContent = isConnected ? 'Connected' : 'Disconnected';
-}
-
-// Toggle streaming state
-function toggleStreaming() {
-    if (state.isStreaming) {
-        stopStreaming(true);
-    } else {
-        startStreaming();
-    }
-}
-
-// Start streaming audio to the server
-function startStreaming() {
-    if (state.isStreaming) return;
-    
-    // Request microphone access
-    navigator.mediaDevices.getUserMedia({ audio: true, video: false })
-        .then(stream => {
-            // Show processing state while setting up
-            elements.streamButton.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Initializing...';
-            
-            // Create audio context
-            state.audioContext = new (window.AudioContext || window.webkitAudioContext)();
-            
-            // Create microphone source
-            state.microphone = state.audioContext.createMediaStreamSource(stream);
-            
-            // Create analyser for visualizer
-            state.analyser = state.audioContext.createAnalyser();
-            state.analyser.fftSize = 256;
-            visualizerBufferLength = state.analyser.frequencyBinCount;
-            visualizerDataArray = new Uint8Array(visualizerBufferLength);
-            
-            // Connect microphone to analyser
-            state.microphone.connect(state.analyser);
-            
-            // Create script processor for audio processing
-            const bufferSize = 4096;
-            state.streamProcessor = state.audioContext.createScriptProcessor(bufferSize, 1, 1);
-            
-            // Set up audio processing callback
-            state.streamProcessor.onaudioprocess = handleAudioProcess;
-            
-            // Connect the processors
-            state.analyser.connect(state.streamProcessor);
-            state.streamProcessor.connect(state.audioContext.destination);
-            
-            // Update UI
-            state.isStreaming = true;
-            elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Listening...';
-            elements.streamButton.classList.add('recording');
-            
-            // Initialize energy window
-            state.energyWindow = [];
-            
-            // Start volume meter updates
-            state.volumeUpdateInterval = setInterval(updateVolumeMeter, 100);
-            
-            // Start visualizer if enabled
-            if (elements.showVisualizer.checked && !state.visualizerAnimationFrame) {
-                drawVisualizer();
-            }
-            
-            // Show starting message
-            addSystemMessage('Listening... Speak clearly into your microphone.');
-            
-            // Notify the server that we're starting
-            state.socket.emit('stream_audio', {
-                audio: '',
-                speaker: state.currentSpeaker
-            });
-        })
-        .catch(err => {
-            console.error('Error accessing microphone:', err);
-            addSystemMessage(`Error: ${err.message}. Please make sure your microphone is connected and you've granted permission.`);
-            elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Start Conversation';
-        });
-}
-
-// Stop streaming audio
-function stopStreaming(notifyServer = true) {
-    if (!state.isStreaming) return;
-    
-    // Update UI first
-    elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Start Conversation';
-    elements.streamButton.classList.remove('recording');
-    elements.streamButton.classList.remove('processing');
-    
-    // Stop volume meter updates
-    if (state.volumeUpdateInterval) {
-        clearInterval(state.volumeUpdateInterval);
-        state.volumeUpdateInterval = null;
-    }
-    
-    // Stop all audio processing
-    if (state.streamProcessor) {
-        state.streamProcessor.disconnect();
-        state.streamProcessor = null;
-    }
-    
-    if (state.analyser) {
-        state.analyser.disconnect();
-    }
-    
-    if (state.microphone) {
-        state.microphone.disconnect();
-    }
-    
-    // Close audio context
-    if (state.audioContext && state.audioContext.state !== 'closed') {
-        state.audioContext.close().catch(err => console.warn('Error closing audio context:', err));
-    }
-    
-    // Cleanup animation frames
-    if (state.visualizerAnimationFrame) {
-        cancelAnimationFrame(state.visualizerAnimationFrame);
-        state.visualizerAnimationFrame = null;
-    }
-    
-    // Reset state
-    state.isStreaming = false;
-    state.isSpeaking = false;
-    
-    // Notify the server
-    if (notifyServer && state.socket && state.socket.connected) {
-        state.socket.emit('stop_streaming', {
-            speaker: state.currentSpeaker
-        });
-    }
-    
-    // Show message
-    addSystemMessage('Conversation paused. Click "Start Conversation" to resume.');
-}
-
-// Handle audio processing
-function handleAudioProcess(event) {
-    const inputData = event.inputBuffer.getChannelData(0);
-    
-    // Calculate audio energy (volume level)
-    const energy = calculateAudioEnergy(inputData);
-    
-    // Update energy window for averaging
-    updateEnergyWindow(energy);
-    
-    // Calculate average energy
-    const avgEnergy = calculateAverageEnergy();
-    
-    // Determine if audio is silent
-    const isSilent = avgEnergy < state.silenceThreshold;
-    
-    // Debug logging only if significant changes in audio patterns
-    if (Math.random() < 0.05) { // Log only 5% of frames to avoid console spam
-        console.log(`Audio: len=${inputData.length}, energy=${energy.toFixed(4)}, avg=${avgEnergy.toFixed(4)}, silent=${isSilent}`);
-    }
-    
-    // Handle speech state based on silence
-    handleSpeechState(isSilent);
-    
-    // Only send audio chunk if we detect speech
-    if (!isSilent) {
-        // Create a resampled version at 24kHz for the server
-        // Most WebRTC audio is 48kHz, but we want 24kHz for the model
-        const resampledData = downsampleBuffer(inputData, state.audioContext.sampleRate, 24000);
-        
-        // Send the audio chunk to the server
-        sendAudioChunk(resampledData, state.currentSpeaker);
-    }
-}
-
-// Cleanup audio resources when done
-function cleanupAudioResources() {
-    // Stop all audio processing
-    if (state.streamProcessor) {
-        state.streamProcessor.disconnect();
-        state.streamProcessor = null;
-    }
-    
-    if (state.analyser) {
-        state.analyser.disconnect();
-        state.analyser = null;
-    }
-    
-    if (state.microphone) {
-        state.microphone.disconnect();
-        state.microphone = null;
-    }
-    
-    // Close audio context
-    if (state.audioContext && state.audioContext.state !== 'closed') {
-        state.audioContext.close().catch(err => console.warn('Error closing audio context:', err));
-    }
-    
-    // Cancel all timers and animation frames
-    if (state.volumeUpdateInterval) {
-        clearInterval(state.volumeUpdateInterval);
-        state.volumeUpdateInterval = null;
-    }
-    
-    if (state.visualizerAnimationFrame) {
-        cancelAnimationFrame(state.visualizerAnimationFrame);
-        state.visualizerAnimationFrame = null;
-    }
-    
-    if (state.silenceTimer) {
-        clearTimeout(state.silenceTimer);
-        state.silenceTimer = null;
-    }
-}
-
-// Clear conversation history
-function clearConversation() {
-    if (elements.conversation) {
-        elements.conversation.innerHTML = '';
-        addSystemMessage('Conversation cleared.');
-        
-        // Notify server to clear context
-        if (state.socket && state.socket.connected) {
-            state.socket.emit('clear_context');
-        }
-    }
-}
-
-// Calculate audio energy (volume)
-function calculateAudioEnergy(buffer) {
-    let sum = 0;
-    for (let i = 0; i < buffer.length; i++) {
-        sum += buffer[i] * buffer[i];
-    }
-    return Math.sqrt(sum / buffer.length);
-}
-
-// Update energy window for averaging
-function updateEnergyWindow(energy) {
-    state.energyWindow.push(energy);
-    if (state.energyWindow.length > ENERGY_WINDOW_SIZE) {
-        state.energyWindow.shift();
-    }
-}
-
-// Calculate average energy from window
-function calculateAverageEnergy() {
-    if (state.energyWindow.length === 0) return 0;
-    
-    const sum = state.energyWindow.reduce((a, b) => a + b, 0);
-    return sum / state.energyWindow.length;
-}
-
-// Update the threshold from the slider
-function updateThreshold() {
-    state.silenceThreshold = parseFloat(elements.thresholdSlider.value);
-    elements.thresholdValue.textContent = state.silenceThreshold.toFixed(3);
-}
-
-// Update the volume meter display
-function updateVolumeMeter() {
-    if (!state.isStreaming || !state.energyWindow.length) return;
-    
-    const avgEnergy = calculateAverageEnergy();
-    
-    // Scale energy to percentage (0-100)
-    // Typically, energy values will be very small (e.g., 0.001 to 0.1)
-    // So we multiply by a factor to make it more visible
-    const scaleFactor = 1000;
-    const percentage = Math.min(100, Math.max(0, avgEnergy * scaleFactor));
-    
-    // Update volume meter width
-    elements.volumeLevel.style.width = `${percentage}%`;
-    
-    // Change color based on level
-    if (percentage > 70) {
-        elements.volumeLevel.style.backgroundColor = '#ff5252';
-    } else if (percentage > 30) {
-        elements.volumeLevel.style.backgroundColor = '#4CAF50';
-    } else {
-        elements.volumeLevel.style.backgroundColor = '#4c84ff';
-    }
-}
-
-// Handle speech/silence state transitions
-function handleSpeechState(isSilent) {
-    if (state.isSpeaking && isSilent) {
-        // Transition from speaking to silence
-        if (!state.silenceTimer) {
-            state.silenceTimer = setTimeout(() => {
-                // Only consider it a real silence after a certain duration
-                // This prevents detecting brief pauses as the end of speech
-                state.isSpeaking = false;
-                state.silenceTimer = null;
-            }, CLIENT_SILENCE_DURATION_MS);
-        }
-    } else if (state.silenceTimer && !isSilent) {
-        // User started speaking again, cancel the silence timer
-        clearTimeout(state.silenceTimer);
-        state.silenceTimer = null;
-    }
-    
-    // Update speaking state for non-silent audio
-    if (!isSilent) {
-        state.isSpeaking = true;
-    }
-}
-
-// Send audio chunk to server
-function sendAudioChunk(audioData, speaker) {
-    if (!state.socket || !state.socket.connected) {
-        console.warn('Socket not connected');
-        return;
-    }
-    
-    console.log(`Preparing audio chunk: length=${audioData.length}, speaker=${speaker}`);
-    
-    // Check for NaN or invalid values
-    let hasInvalidValues = false;
-    for (let i = 0; i < audioData.length; i++) {
-        if (isNaN(audioData[i]) || !isFinite(audioData[i])) {
-            hasInvalidValues = true;
-            console.warn(`Invalid audio value at index ${i}: ${audioData[i]}`);
-            break;
-        }
-    }
-    
-    if (hasInvalidValues) {
-        console.warn('Audio data contains invalid values. Creating silent audio.');
-        audioData = new Float32Array(audioData.length).fill(0);
-    }
-    
-    try {
-        // Create WAV blob
-        const wavData = createWavBlob(audioData, 24000);
-        console.log(`WAV blob created: ${wavData.size} bytes`);
-        
-        const reader = new FileReader();
-        
-        reader.onloadend = function() {
-            try {
-                // Get base64 data
-                const base64data = reader.result;
-                console.log(`Base64 data created: ${base64data.length} bytes`);
-                
-                // Send to server
-                state.socket.emit('stream_audio', {
-                    audio: base64data,
-                    speaker: speaker
-                });
-                console.log('Audio chunk sent to server');
-            } catch (err) {
-                console.error('Error preparing audio data:', err);
-            }
-        };
-        
-        reader.onerror = function() {
-            console.error('Error reading audio data as base64');
-        };
-        
-        reader.readAsDataURL(wavData);
-    } catch (err) {
-        console.error('Error creating WAV data:', err);
-    }
-}
-
-// Create WAV blob from audio data with improved error handling
-function createWavBlob(audioData, sampleRate) {
-    // Validate input
-    if (!audioData || audioData.length === 0) {
-        console.warn('Empty audio data provided to createWavBlob');
-        audioData = new Float32Array(1024).fill(0); // Create 1024 samples of silence
-    }
-    
-    // Function to convert Float32Array to Int16Array for WAV format
-    function floatTo16BitPCM(output, offset, input) {
-        for (let i = 0; i < input.length; i++, offset += 2) {
-            // Ensure values are in -1 to 1 range
-            const s = Math.max(-1, Math.min(1, input[i]));
-            // Convert to 16-bit PCM
-            output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
-        }
-    }
-    
-    // Create WAV header
-    function writeString(view, offset, string) {
-        for (let i = 0; i < string.length; i++) {
-            view.setUint8(offset + i, string.charCodeAt(i));
-        }
-    }
-    
-    try {
-        // Create WAV file with header - careful with buffer sizes
-        const buffer = new ArrayBuffer(44 + audioData.length * 2);
-        const view = new DataView(buffer);
-        
-        // RIFF identifier
-        writeString(view, 0, 'RIFF');
-        
-        // File length (will be filled later)
-        view.setUint32(4, 36 + audioData.length * 2, true);
-        
-        // WAVE identifier
-        writeString(view, 8, 'WAVE');
-        
-        // fmt chunk identifier
-        writeString(view, 12, 'fmt ');
-        
-        // fmt chunk length
-        view.setUint32(16, 16, true);
-        
-        // Sample format (1 is PCM)
-        view.setUint16(20, 1, true);
-        
-        // Mono channel
-        view.setUint16(22, 1, true);
-        
-        // Sample rate
-        view.setUint32(24, sampleRate, true);
-        
-        // Byte rate (sample rate * block align)
-        view.setUint32(28, sampleRate * 2, true);
-        
-        // Block align (channels * bytes per sample)
-        view.setUint16(32, 2, true);
-        
-        // Bits per sample
-        view.setUint16(34, 16, true);
-        
-        // data chunk identifier
-        writeString(view, 36, 'data');
-        
-        // data chunk length
-        view.setUint32(40, audioData.length * 2, true);
-        
-        // Write the PCM samples
-        floatTo16BitPCM(view, 44, audioData);
-        
-        // Create and return blob
-        return new Blob([view], { type: 'audio/wav' });
-    } catch (err) {
-        console.error('Error in createWavBlob:', err);
-        
-        // Create a minimal valid WAV file with silence as fallback
-        const fallbackSamples = new Float32Array(1024).fill(0);
-        const fallbackBuffer = new ArrayBuffer(44 + fallbackSamples.length * 2);
-        const fallbackView = new DataView(fallbackBuffer);
-        
-        writeString(fallbackView, 0, 'RIFF');
-        fallbackView.setUint32(4, 36 + fallbackSamples.length * 2, true);
-        writeString(fallbackView, 8, 'WAVE');
-        writeString(fallbackView, 12, 'fmt ');
-        fallbackView.setUint32(16, 16, true);
-        fallbackView.setUint16(20, 1, true);
-        fallbackView.setUint16(22, 1, true);
-        fallbackView.setUint32(24, sampleRate, true);
-        fallbackView.setUint32(28, sampleRate * 2, true);
-        fallbackView.setUint16(32, 2, true);
-        fallbackView.setUint16(34, 16, true);
-        writeString(fallbackView, 36, 'data');
-        fallbackView.setUint32(40, fallbackSamples.length * 2, true);
-        floatTo16BitPCM(fallbackView, 44, fallbackSamples);
-        
-        return new Blob([fallbackView], { type: 'audio/wav' });
-    }
-}
-
-// Draw audio visualizer
-function drawVisualizer() {
-    if (!canvasContext) {
-        return;
-    }
-    
-    state.visualizerAnimationFrame = requestAnimationFrame(drawVisualizer);
-    
-    // Skip drawing if visualizer is hidden
-    if (!elements.showVisualizer.checked) {
-        if (elements.visualizerCanvas.style.opacity !== '0') {
-            elements.visualizerCanvas.style.opacity = '0';
-        }
-        return;
-    } else if (elements.visualizerCanvas.style.opacity !== '1') {
-        elements.visualizerCanvas.style.opacity = '1';
-    }
-    
-    // Get frequency data if available
-    if (state.isStreaming && state.analyser) {
-        try {
-            state.analyser.getByteFrequencyData(visualizerDataArray);
-        } catch (e) {
-            console.warn('Error getting frequency data:', e);
-        }
-    } else {
-        // Fade out when not streaming
-        for (let i = 0; i < visualizerDataArray.length; i++) {
-            visualizerDataArray[i] = Math.max(0, visualizerDataArray[i] - 5);
-        }
-    }
-    
-    // Clear canvas
-    canvasContext.fillStyle = 'rgb(0, 0, 0)';
-    canvasContext.fillRect(0, 0, elements.visualizerCanvas.width, elements.visualizerCanvas.height);
-    
-    // Draw gradient bars
-    const width = elements.visualizerCanvas.width;
-    const height = elements.visualizerCanvas.height;
-    const barCount = Math.min(visualizerBufferLength, 64);
-    const barWidth = width / barCount - 1;
-    
-    for (let i = 0; i < barCount; i++) {
-        const index = Math.floor(i * visualizerBufferLength / barCount);
-        const value = visualizerDataArray[index];
-        
-        // Use logarithmic scale for better audio visualization
-        // This makes low values more visible while still maintaining full range
-        const logFactor = 20;
-        const scaledValue = Math.log(1 + (value / 255) * logFactor) / Math.log(1 + logFactor);
-        const barHeight = scaledValue * height;
-        
-        // Position bars
-        const x = i * (barWidth + 1);
-        const y = height - barHeight;
-        
-        // Create color gradient based on frequency and amplitude
-        const hue = i / barCount * 360; // Full color spectrum
-        const saturation = 80 + (value / 255 * 20); // Higher values more saturated
-        const lightness = 40 + (value / 255 * 20); // Dynamic brightness based on amplitude
-        
-        // Draw main bar
-        canvasContext.fillStyle = `hsl(${hue}, ${saturation}%, ${lightness}%)`;
-        canvasContext.fillRect(x, y, barWidth, barHeight);
-        
-        // Add reflection effect
-        if (barHeight > 5) {
-            const gradient = canvasContext.createLinearGradient(
-                x, y, 
-                x, y + barHeight * 0.5
-            );
-            gradient.addColorStop(0, `hsla(${hue}, ${saturation}%, ${lightness + 20}%, 0.4)`);
-            gradient.addColorStop(1, `hsla(${hue}, ${saturation}%, ${lightness}%, 0)`);
-            canvasContext.fillStyle = gradient;
-            canvasContext.fillRect(x, y, barWidth, barHeight * 0.5);
-            
-            // Add highlight on top of the bar for better 3D effect
-            canvasContext.fillStyle = `hsla(${hue}, ${saturation - 20}%, ${lightness + 30}%, 0.7)`;
-            canvasContext.fillRect(x, y, barWidth, 2);
-        }
-    }
-    
-    // Show/hide the label
-    elements.visualizerLabel.style.opacity = (state.isStreaming) ? '0' : '0.7';
-}
-
-// Toggle visualizer visibility
-function toggleVisualizerVisibility() {
-    const isVisible = elements.showVisualizer.checked;
-    elements.visualizerCanvas.style.opacity = isVisible ? '1' : '0';
-    
-    if (isVisible && state.isStreaming && !state.visualizerAnimationFrame) {
-        drawVisualizer();
-    }
-}
-
-// Handle audio response from server
-function handleAudioResponse(data) {
-    console.log('Received audio response');
-    
-    // Create message container
-    const messageElement = document.createElement('div');
-    messageElement.className = 'message ai';
-    
-    // Add text content if available
-    if (data.text) {
-        const textElement = document.createElement('p');
-        textElement.textContent = data.text;
-        messageElement.appendChild(textElement);
-    }
-    
-    // Create and configure audio element
-    const audioElement = document.createElement('audio');
-    audioElement.controls = true;
-    audioElement.className = 'audio-player';
-    
-    // Set audio source
-    const audioSource = document.createElement('source');
-    audioSource.src = data.audio;
-    audioSource.type = 'audio/wav';
-    
-    // Add fallback text
-    audioElement.textContent = 'Your browser does not support the audio element.';
-    
-    // Assemble audio element
-    audioElement.appendChild(audioSource);
-    messageElement.appendChild(audioElement);
-    
-    // Add timestamp
-    const timeElement = document.createElement('span');
-    timeElement.className = 'message-time';
-    timeElement.textContent = new Date().toLocaleTimeString();
-    messageElement.appendChild(timeElement);
-    
-    // Add to conversation
-    elements.conversation.appendChild(messageElement);
-    
-    // Auto-scroll to bottom
-    elements.conversation.scrollTop = elements.conversation.scrollHeight;
-    
-    // Auto-play if enabled
-    if (elements.autoPlayResponses.checked) {
-        audioElement.play()
-            .catch(err => {
-                console.warn('Auto-play failed:', err);
-                addSystemMessage('Auto-play failed. Please click play to hear the response.');
-            });
-    }
-    
-    // Re-enable stream button after processing is complete
-    if (state.isStreaming) {
-        elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Listening...';
-        elements.streamButton.classList.add('recording');
-        elements.streamButton.classList.remove('processing');
-    }
-}
-
-// Handle transcription response from server
-function handleTranscription(data) {
-    console.log('Received transcription:', data.text);
-    
-    // Create message element
-    const messageElement = document.createElement('div');
-    messageElement.className = 'message user';
-    
-    // Add text content
-    const textElement = document.createElement('p');
-    textElement.textContent = data.text;
-    messageElement.appendChild(textElement);
-    
-    // Add timestamp
-    const timeElement = document.createElement('span');
-    timeElement.className = 'message-time';
-    timeElement.textContent = new Date().toLocaleTimeString();
-    messageElement.appendChild(timeElement);
-    
-    // Add to conversation
-    elements.conversation.appendChild(messageElement);
-    
-    // Auto-scroll to bottom
-    elements.conversation.scrollTop = elements.conversation.scrollHeight;
-}
-
-// Handle context update from server
-function handleContextUpdate(data) {
-    console.log('Context updated:', data.message);
-}
-
-// Handle streaming status updates from server
-function handleStreamingStatus(data) {
-    console.log('Streaming status:', data.status);
-    
-    if (data.status === 'stopped') {
-        // Reset UI if needed
-        if (state.isStreaming) {
-            stopStreaming(false); // Don't send to server since this came from server
-        }
-    }
-}
-
-// Add a system message to the conversation
-function addSystemMessage(message) {
-    const messageElement = document.createElement('div');
-    messageElement.className = 'message system';
-    messageElement.textContent = message;
-    elements.conversation.appendChild(messageElement);
-    
-    // Auto-scroll to bottom
-    elements.conversation.scrollTop = elements.conversation.scrollHeight;
-}
-
-// Downsample audio buffer to target sample rate
-function downsampleBuffer(buffer, originalSampleRate, targetSampleRate) {
-    if (originalSampleRate === targetSampleRate) {
-        return buffer;
-    }
-    
-    const ratio = originalSampleRate / targetSampleRate;
-    const newLength = Math.round(buffer.length / ratio);
-    const result = new Float32Array(newLength);
-    
-    for (let i = 0; i < newLength; i++) {
-        const pos = Math.round(i * ratio);
-        result[i] = buffer[pos];
-    }
-    
-    return result;
-}
-
-// Handle processing status updates
-function handleProcessingStatus(data) {
-    console.log('Processing status update:', data);
-    
-    // Show processing status in UI
-    if (data.status === 'generating_audio') {
-        elements.streamButton.innerHTML = '<i class="fas fa-cog fa-spin"></i> Processing...';
-        elements.streamButton.classList.add('processing');
-        elements.streamButton.classList.remove('recording');
-        
-        // Show message to user
-        addSystemMessage(data.message || 'Processing your request...');
-    }
-}
-
-// Handle the start of an audio streaming response
-function handleAudioResponseStart(data) {
-    console.log('Audio response starting:', data);
-    
-    // Reset streaming audio state
-    streamingAudio.chunks = [];
-    streamingAudio.totalChunks = data.total_chunks;
-    streamingAudio.receivedChunks = 0;
-    streamingAudio.text = data.text;
-    streamingAudio.complete = false;
-    
-    // Create message container now, so we can update it as chunks arrive
-    const messageElement = document.createElement('div');
-    messageElement.className = 'message ai processing';
-    
-    // Add text content if available
-    if (data.text) {
-        const textElement = document.createElement('p');
-        textElement.textContent = data.text;
-        messageElement.appendChild(textElement);
-    }
-    
-    // Create audio element (will be populated as chunks arrive)
-    const audioElement = document.createElement('audio');
-    audioElement.controls = true;
-    audioElement.className = 'audio-player';
-    audioElement.textContent = 'Audio is being generated...';
-    messageElement.appendChild(audioElement);
-    
-    // Add timestamp
-    const timeElement = document.createElement('span');
-    timeElement.className = 'message-time';
-    timeElement.textContent = new Date().toLocaleTimeString();
-    messageElement.appendChild(timeElement);
-    
-    // Add loading indicator
-    const loadingElement = document.createElement('div');
-    loadingElement.className = 'loading-indicator';
-    loadingElement.innerHTML = '<div class="loading-spinner"></div><span>Generating audio response...</span>';
-    messageElement.appendChild(loadingElement);
-    
-    // Add to conversation
-    elements.conversation.appendChild(messageElement);
-    
-    // Auto-scroll to bottom
-    elements.conversation.scrollTop = elements.conversation.scrollHeight;
-    
-    // Store elements for later updates
-    streamingAudio.messageElement = messageElement;
-    streamingAudio.audioElement = audioElement;
-}
-
-// Handle an incoming audio chunk
-function handleAudioResponseChunk(data) {
-    console.log(`Received audio chunk ${data.chunk_index + 1}/${data.total_chunks}`);
-    
-    // Store the chunk
-    streamingAudio.chunks[data.chunk_index] = data.audio;
-    streamingAudio.receivedChunks++;
-    
-    // Update progress in the UI
-    if (streamingAudio.messageElement) {
-        const loadingElement = streamingAudio.messageElement.querySelector('.loading-indicator span');
-        if (loadingElement) {
-            loadingElement.textContent = `Generating audio response... ${Math.round((streamingAudio.receivedChunks / data.total_chunks) * 100)}%`;
-        }
-    }
-    
-    // If this is the first chunk, start playing it immediately for faster response
-    if (data.chunk_index === 0 && streamingAudio.audioElement && elements.autoPlayResponses && elements.autoPlayResponses.checked) {
-        try {
-            streamingAudio.audioElement.src = data.audio;
-            streamingAudio.audioElement.play().catch(err => console.warn('Auto-play failed:', err));
-        } catch (e) {
-            console.error('Error playing first chunk:', e);
-        }
-    }
-    
-    // If this is the last chunk or we've received all chunks, finalize the audio
-    if (data.is_last || streamingAudio.receivedChunks >= data.total_chunks) {
-        finalizeStreamingAudio();
-    }
-}
-
-// Handle completion of audio streaming
-function handleAudioResponseComplete(data) {
-    console.log('Audio response complete:', data);
-    streamingAudio.complete = true;
-    
-    // Make sure we finalize the audio even if some chunks were missed
-    finalizeStreamingAudio();
-    
-    // Update UI to normal state
-    if (state.isStreaming) {
-        elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Listening...';
-        elements.streamButton.classList.add('recording');
-        elements.streamButton.classList.remove('processing');
-    }
-}
-
-// Finalize streaming audio by combining chunks and updating the UI
-function finalizeStreamingAudio() {
-    if (!streamingAudio.messageElement || streamingAudio.chunks.length === 0) {
-        return;
-    }
-    
-    try {
-        // For more sophisticated audio streaming, you would need to properly concatenate
-        // the WAV files, but for now we'll use the last chunk as the complete audio
-        // since it should contain the entire response due to how the server is implementing it
-        const lastChunkIndex = streamingAudio.chunks.length - 1;
-        const audioData = streamingAudio.chunks[lastChunkIndex] || streamingAudio.chunks[0];
-        
-        // Update the audio element with the complete audio
-        if (streamingAudio.audioElement) {
-            streamingAudio.audioElement.src = audioData;
-            
-            // Auto-play if enabled and not already playing
-            if (elements.autoPlayResponses && elements.autoPlayResponses.checked && 
-                streamingAudio.audioElement.paused) {
-                streamingAudio.audioElement.play()
-                    .catch(err => {
-                        console.warn('Auto-play failed:', err);
-                        addSystemMessage('Auto-play failed. Please click play to hear the response.');
-                    });
-            }
-        }
-        
-        // Remove loading indicator and processing class
-        if (streamingAudio.messageElement) {
-            const loadingElement = streamingAudio.messageElement.querySelector('.loading-indicator');
-            if (loadingElement) {
-                streamingAudio.messageElement.removeChild(loadingElement);
-            }
-            streamingAudio.messageElement.classList.remove('processing');
-        }
-        
-        console.log('Audio response finalized and ready for playback');
-    } catch (e) {
-        console.error('Error finalizing streaming audio:', e);
-    }
-    
-    // Reset streaming audio state
-    streamingAudio.chunks = [];
-    streamingAudio.totalChunks = 0;
-    streamingAudio.receivedChunks = 0;
-    streamingAudio.messageElement = null;
-    streamingAudio.audioElement = null;
-}
-
-// Add CSS styles for new UI elements
-document.addEventListener('DOMContentLoaded', function() {
-    // Add styles for processing state
-    const style = document.createElement('style');
-    style.textContent = `
-        .message.processing {
-            opacity: 0.8;
-        }
-        
-        .loading-indicator {
-            display: flex;
-            align-items: center;
-            margin-top: 8px;
-            font-size: 0.9em;
-            color: #666;
-        }
-        
-        .loading-spinner {
-            width: 16px;
-            height: 16px;
-            border: 2px solid #ddd;
-            border-top: 2px solid var(--primary-color);
-            border-radius: 50%;
-            margin-right: 8px;
-            animation: spin 1s linear infinite;
-        }
-        
-        @keyframes spin {
-            0% { transform: rotate(0deg); }
-            100% { transform: rotate(360deg); }
-        }
-    `;
-    document.head.appendChild(style);
-});
-
-// Initialize the application when DOM is fully loaded
-document.addEventListener('DOMContentLoaded', initializeApp);
-
diff --git a/React/src/app/auth/session/route.ts b/React/src/app/auth/session/route.ts
new file mode 100644
index 0000000..9299d4a
--- /dev/null
+++ b/React/src/app/auth/session/route.ts
@@ -0,0 +1,12 @@
+import { NextResponse } from "next/server";
+import { auth0 } from "../../../lib/auth0";
+
+export async function GET() {
+	try {
+		const session = await auth0.getSession();
+		return NextResponse.json({ session });
+	} catch (error) {
+		console.error("Error getting session:", error);
+		return NextResponse.json({ session: null }, { status: 500 });
+	}
+}
diff --git a/React/src/app/call/page.tsx b/React/src/app/call/page.tsx
index 17c0c65..5ee8795 100644
--- a/React/src/app/call/page.tsx
+++ b/React/src/app/call/page.tsx
@@ -78,7 +78,7 @@ function CallPage() {
 				"Content-Type": "application/json",
 			},
 			body: JSON.stringify({
-				message: `yo i need help`,
+				message: `John Smith needs help.`,
 			}),
 		});
 
diff --git a/React/src/app/page.tsx b/React/src/app/page.tsx
index 29297d0..21e0862 100644
--- a/React/src/app/page.tsx
+++ b/React/src/app/page.tsx
@@ -1,40 +1,94 @@
 "use client";
-import { useState } from "react";
-import { auth0 } from "../lib/auth0";
-import { NextApiRequest, NextApiResponse } from "next";
+import { useState, useEffect } from "react";
+import { useRouter } from "next/navigation";
 
-
-
-
-export default async function Home() {
-
-	
+export default function Home() {
 	const [contacts, setContacts] = useState<string[]>([]);
 	const [codeword, setCodeword] = useState("");
+	const [session, setSession] = useState<any>(null);
+	const [loading, setLoading] = useState(true);
+	const router = useRouter();
 
-	const session = await auth0.getSession();
+	useEffect(() => {
+		// Fetch session data from an API route
+		fetch("/auth/session")
+			.then((response) => response.json())
+			.then((data) => {
+				setSession(data.session);
+				setLoading(false);
+			})
+			.catch((error) => {
+				console.error("Failed to fetch session:", error);
+				setLoading(false);
+			});
+	}, []);
 
-	console.log("Session:", session?.user);
+	function saveToDB() {
+		alert("Saving contacts...");
+		const contactInputs = document.querySelectorAll(
+			".text-input"
+		) as NodeListOf<HTMLInputElement>;
+		const contactValues = Array.from(contactInputs).map((input) => input.value);
 
+		fetch("/api/databaseStorage", {
+			method: "POST",
+			headers: {
+				"Content-Type": "application/json",
+			},
+			body: JSON.stringify({
+				email: session?.user?.email || "",
+				codeword: codeword,
+				contacts: contactValues,
+			}),
+		})
+			.then((response) => {
+				if (response.ok) {
+					alert("Contacts saved successfully!");
+				} else {
+					alert("Error saving contacts.");
+				}
+			})
+			.catch((error) => {
+				console.error("Error:", error);
+				alert("Error saving contacts.");
+			});
+	}
+
+	if (loading) {
+		return <div>Loading...</div>;
+	}
 
 	// If no session, show sign-up and login buttons
-	if (!session) {	
-
+	if (!session) {
 		return (
 			<div className="space-y-7 bg-indigo-800 items-center justify-items-center min-h-screen p-8 pb-20 gap-16 sm:p-20 font-[family-name:var(--font-geist-sans)]">
 				<main className="space-x-2 flex flex-row gap-[32px] row-start-2 items-center sm:items-start">
 					<a href="/auth/login?screen_hint=signup">
-						<button className="box-content w-32 border-2 h-16 text-2xl bg-violet-900 text-green-300">Sign up</button>
+						<button className="box-content w-32 border-2 h-16 text-2xl bg-violet-900 text-green-300">
+							Sign up
+						</button>
 					</a>
 					<a href="/auth/login">
-						<button className = "box-content w-32 border-2 h-16 text-2xl bg-violet-900 text-green-400">Log in</button>
+						<button className="box-content w-32 border-2 h-16 text-2xl bg-violet-900 text-green-400">
+							Log in
+						</button>
 					</a>
 				</main>
-				<h1 className="space-y-3 text-6xl text-lime-500 subpixel-antialiased font-stretch-semi-expanded font-serif">Fauxcall</h1>
-				<h2 className="space-y-3 text-6x1 text-red-700 antialiased font-mono">Set emergency contacts</h2>
-				<p>If you stop speaking or say the codeword, these contacts will be notified</p>
+				<h1 className="space-y-3 text-6xl text-lime-500 subpixel-antialiased font-stretch-semi-expanded font-serif">
+					Fauxcall
+				</h1>
+				<h2 className="space-y-3 text-6x1 text-red-700 antialiased font-mono">
+					Set emergency contacts
+				</h2>
+				<p>
+					If you stop speaking or say the codeword, these contacts will be
+					notified
+				</p>
 				{/* form for setting codeword */}
-				<form className="flex flex-col gap-[32px] row-start-2 items-center sm:items-start" onSubmit={(e) => e.preventDefault()}>
+				<form
+					className="flex flex-col gap-[32px] row-start-2 items-center sm:items-start"
+					onSubmit={(e) => e.preventDefault()}
+				>
 					<input
 						type="text"
 						value={codeword}
@@ -43,11 +97,17 @@ export default async function Home() {
 						className="border border-gray-300 rounded-md p-2"
 					/>
 					<button
-					className="bg-blue-500 text-white font-semibold font-lg rounded-md p-2"
-					type="submit">Set codeword</button>
+						className="bg-blue-500 text-white font-semibold font-lg rounded-md p-2"
+						type="submit"
+					>
+						Set codeword
+					</button>
 				</form>
 				{/* form for adding contacts */}
-				<form className="space-y-5 flex flex-col gap-[32px] row-start-2 items-center sm:items-start" onSubmit={(e) => e.preventDefault()}>
+				<form
+					className="space-y-5 flex flex-col gap-[32px] row-start-2 items-center sm:items-start"
+					onSubmit={(e) => e.preventDefault()}
+				>
 					<input
 						type="text"
 						value={contacts}
@@ -70,7 +130,12 @@ export default async function Home() {
 						className="border border-gray-300 rounded-md p-2"
 					/>
 					<button type="button">Add</button>
-					<button className="bg-slate-500 text-yellow-300 text-stretch-50% font-lg rounded-md p-2" type="submit">Set contacts</button>
+					<button
+						className="bg-slate-500 text-yellow-300 text-stretch-50% font-lg rounded-md p-2"
+						type="submit"
+					>
+						Set contacts
+					</button>
 				</form>
 			</div>
 		);
@@ -80,25 +145,42 @@ export default async function Home() {
 		<div className="grid grid-rows-[20px_1fr_20px] items-center justify-items-center min-h-screen p-8 pb-20 gap-16 sm:p-20 font-[family-name:var(--font-geist-sans)]">
 			<main className="flex flex-col gap-[32px] row-start-2 items-center sm:items-start">
 				<h1>Welcome, {session.user.name}!</h1>
-				
-					<h1 className="space-y-3 text-6xl text-lime-500 subpixel-antialiased font-stretch-semi-expanded font-serif">Fauxcall</h1>
-					<h2 className="space-y-3 text-6x1 text-red-700 antialiased font-mono">Set emergency contacts</h2>
-					<p>If you stop speaking or say the codeword, these contacts will be notified</p>
-					{/* form for setting codeword */}
-					<form className="flex flex-col gap-[32px] row-start-2 items-center sm:items-start" onSubmit={(e) => e.preventDefault()}>
-						<input
-							type="text"
-							value={codeword}
-							onChange={(e) => setCodeword(e.target.value)}
-							placeholder="Codeword"
-							className="border border-gray-300 rounded-md p-2"
-						/>
-						<button
+
+				<h1 className="space-y-3 text-6xl text-lime-500 subpixel-antialiased font-stretch-semi-expanded font-serif">
+					Fauxcall
+				</h1>
+				<h2 className="space-y-3 text-6x1 text-red-700 antialiased font-mono">
+					Set emergency contacts
+				</h2>
+				<p>
+					If you stop speaking or say the codeword, these contacts will be
+					notified
+				</p>
+				{/* form for setting codeword */}
+				<form
+					className="flex flex-col gap-[32px] row-start-2 items-center sm:items-start"
+					onSubmit={(e) => e.preventDefault()}
+				>
+					<input
+						type="text"
+						value={codeword}
+						onChange={(e) => setCodeword(e.target.value)}
+						placeholder="Codeword"
+						className="border border-gray-300 rounded-md p-2"
+					/>
+					<button
 						className="bg-blue-500 text-white font-semibold font-lg rounded-md p-2"
-						type="submit">Set codeword</button>
-					</form>
-					{/* form for adding contacts */}
-					<form id="Contacts" className="space-y-5 flex flex-col gap-[32px] row-start-2 items-center sm:items-start" onSubmit={(e) => e.preventDefault()}>
+						type="submit"
+					>
+						Set codeword
+					</button>
+				</form>
+				{/* form for adding contacts */}
+				<form
+					id="Contacts"
+					className="space-y-5 flex flex-col gap-[32px] row-start-2 items-center sm:items-start"
+					onSubmit={(e) => e.preventDefault()}
+				>
 					<input
 						type="text"
 						value={contacts}
@@ -127,19 +209,37 @@ export default async function Home() {
 						placeholder="Write down an emergency contact"
 						className="text-input border border-gray-300 rounded-md p-2"
 					/>
-					<button onClick={() => {
-						alert("Adding contact...");
-						let elem = document.getElementsByClassName("text-input")[0] as HTMLElement;
-						console.log("Element:", elem);
-						let d = elem.cloneNode(true) as HTMLElement;
-						document.getElementById("Contacts")?.appendChild(d);
-					}}
-					className="bg-emerald-500 text-fuchsia-300"
-					type="button">Add</button>
-					
-						<button className="bg-slate-500 text-yellow-300 text-stretch-50% font-lg rounded-md p-2" type="submit">Set contacts</button>
-					</form>
-				
+					<button
+						onClick={() => {
+							alert("Adding contact...");
+							let elem = document.getElementsByClassName(
+								"text-input"
+							)[0] as HTMLElement;
+							console.log("Element:", elem);
+							let d = elem.cloneNode(true) as HTMLElement;
+							document.getElementById("Contacts")?.appendChild(d);
+						}}
+						className="bg-emerald-500 text-fuchsia-300"
+						type="button"
+					>
+						Add
+					</button>
+
+					<button
+						type="button"
+						onClick={saveToDB}
+						className="bg-slate-500 text-yellow-300 text-stretch-50% font-lg rounded-md p-2"
+					>
+						Save
+					</button>
+				</form>
+				<div>
+					<a href="/call">
+						<button className="bg-zinc-700 text-lime-300 font-semibold font-lg rounded-md p-2">
+							Call
+						</button>
+					</a>
+				</div>
 				<p>
 					<a href="/auth/logout">
 						<button>Log out</button>
diff --git a/React/src/pages/api/databaseStorage.ts b/React/src/pages/api/databaseStorage.ts
new file mode 100644
index 0000000..aa01d37
--- /dev/null
+++ b/React/src/pages/api/databaseStorage.ts
@@ -0,0 +1,56 @@
+import { NextApiRequest, NextApiResponse } from "next";
+import mongoose from "mongoose";
+
+const uri = process.env.MONGODB_URI || "mongodb://localhost:27017/mydatabase";
+const clientOptions = { serverApi: { version: "1" as const, strict: true, deprecationErrors: true } };
+
+// Create a reusable connection function
+async function connectToDatabase() {
+  if (mongoose.connection.readyState === 0) {
+    // Only connect if not already connected
+    await mongoose.connect(uri, clientOptions);
+    console.log("Connected to MongoDB!");
+    mongoose.model("User", new mongoose.Schema({
+        email: { type: String, required: true, unique: true },
+        codeword: { type: String, required: true },
+        contacts: [{ type: String }],
+        }));
+  }
+}
+
+export default async function handler(req: NextApiRequest, res: NextApiResponse) {
+  try {
+    // Ensure the database is connected
+    await connectToDatabase();
+
+
+    if (req.method === 'POST') {
+      const { email, codeword, contacts } = req.body;
+
+      // Perform database operations here
+      // query database to see if document with email exists
+      const existingUser = await mongoose.model('User').findOne({ email });
+      if (existingUser) {
+        // If user exists, update their codeword and contacts
+        await mongoose.model('User').updateOne({ email }, { codeword, contacts });
+      } else {
+        // If user does not exist, create a new user
+        const User = mongoose.model('User');
+        const newUser = new User({ email, codeword, contacts });
+        await newUser.save();
+      }
+
+
+      console.log("Codeword:", codeword);
+      console.log("Contacts:", contacts);
+
+      res.status(200).json({ success: true, message: "Data saved successfully!" });
+    } else {
+      res.setHeader('Allow', ['POST']);
+      res.status(405).end(`Method ${req.method} Not Allowed`);
+    }
+  } catch (error) {
+    console.error("Error:", error);
+    res.status(500).json({ success: false, error: "Internal Server Error" });
+  }
+}
\ No newline at end of file