diff --git a/Backend/index.html b/Backend/index.html
index 0e4006e..5ea925c 100644
--- a/Backend/index.html
+++ b/Backend/index.html
@@ -4,715 +4,489 @@
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>Sesame AI Voice Chat</title>
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
+    <!-- Socket.IO client library -->
+    <script src="https://cdn.socket.io/4.6.0/socket.io.min.js"></script>
     <style>
+        :root {
+            --primary-color: #4c84ff;
+            --secondary-color: #3367d6;
+            --text-color: #333;
+            --background-color: #f9f9f9;
+            --card-background: #ffffff;
+            --accent-color: #ff5252;
+            --success-color: #4CAF50;
+            --border-color: #e0e0e0;
+            --shadow-color: rgba(0, 0, 0, 0.1);
+        }
+
+        * {
+            box-sizing: border-box;
+            margin: 0;
+            padding: 0;
+        }
+
         body {
-            font-family: 'Arial', sans-serif;
-            max-width: 800px;
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            background-color: var(--background-color);
+            color: var(--text-color);
+            line-height: 1.6;
+            max-width: 1000px;
             margin: 0 auto;
             padding: 20px;
-            background-color: #f9f9f9;
+            transition: all 0.3s ease;
         }
-        .conversation {
-            border: 1px solid #ddd;
+
+        header {
+            text-align: center;
+            margin-bottom: 30px;
+        }
+
+        h1 {
+            color: var(--primary-color);
+            font-size: 2.5rem;
+            margin-bottom: 10px;
+        }
+
+        .subtitle {
+            color: #666;
+            font-weight: 300;
+        }
+
+        .app-container {
+            display: grid;
+            grid-template-columns: 1fr;
+            gap: 20px;
+        }
+
+        @media (min-width: 768px) {
+            .app-container {
+                grid-template-columns: 1fr 1fr;
+            }
+        }
+
+        .chat-container, .control-panel {
+            background-color: var(--card-background);
             border-radius: 12px;
+            box-shadow: 0 4px 12px var(--shadow-color);
             padding: 20px;
+        }
+
+        .control-panel {
+            display: flex;
+            flex-direction: column;
+            gap: 20px;
+        }
+
+        .chat-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 15px;
+            padding-bottom: 10px;
+            border-bottom: 1px solid var(--border-color);
+        }
+
+        .conversation {
             height: 400px;
             overflow-y: auto;
+            padding: 10px;
+            border-radius: 8px;
+            background-color: #f7f9fc;
             margin-bottom: 20px;
-            background-color: white;
-            box-shadow: 0 2px 10px rgba(0,0,0,0.05);
+            scroll-behavior: smooth;
         }
+
         .message {
             margin-bottom: 15px;
-            padding: 12px;
+            padding: 12px 15px;
             border-radius: 12px;
-            max-width: 80%;
-            line-height: 1.4;
+            max-width: 85%;
+            position: relative;
+            animation: fade-in 0.3s ease-out forwards;
         }
+
+        @keyframes fade-in {
+            from { opacity: 0; transform: translateY(10px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+
         .user {
             background-color: #e3f2fd;
-            text-align: right;
+            color: #0d47a1;
             margin-left: auto;
             border-bottom-right-radius: 4px;
         }
+
         .ai {
             background-color: #f1f1f1;
+            color: #37474f;
             margin-right: auto;
             border-bottom-left-radius: 4px;
         }
+
         .system {
             background-color: #f8f9fa;
             font-style: italic;
+            color: #666;
             text-align: center;
-            font-size: 0.9em;
-            color: #666;
-            padding: 8px;
-            margin: 10px auto;
             max-width: 90%;
-        }
-        .controls {
-            display: flex;
-            gap: 15px;
-            justify-content: center;
-            align-items: center;
-        }
-        button {
-            padding: 12px 24px;
-            border-radius: 24px;
-            border: none;
-            background-color: #4CAF50;
-            color: white;
-            cursor: pointer;
-            font-weight: bold;
-            transition: all 0.2s ease;
-            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
-        }
-        button:hover {
-            background-color: #45a049;
-            box-shadow: 0 4px 8px rgba(0,0,0,0.15);
-        }
-        .recording {
-            background-color: #f44336;
-            animation: pulse 1.5s infinite;
-        }
-        .processing {
-            background-color: #FFA500;
-        }
-        select {
-            padding: 10px;
-            border-radius: 24px;
-            border: 1px solid #ddd;
-            background-color: white;
-        }
-        .transcript {
-            font-style: italic;
-            color: #666;
-            margin-top: 5px;
-        }
-        @keyframes pulse {
-            0% { opacity: 1; }
-            50% { opacity: 0.7; }
-            100% { opacity: 1; }
-        }
-        .status-indicator {
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            margin-top: 10px;
-            gap: 5px;
-        }
-        .status-dot {
-            width: 10px;
-            height: 10px;
-            border-radius: 50%;
-            background-color: #ccc;
-        }
-        .status-dot.active {
-            background-color: #4CAF50;
-        }
-        .status-text {
+            margin: 10px auto;
             font-size: 0.9em;
-            color: #666;
+            padding: 8px 12px;
+            border-radius: 8px;
         }
-        audio {
+
+        .message-time {
+            font-size: 0.7em;
+            color: #999;
+            position: absolute;
+            bottom: 5px;
+            right: 10px;
+        }
+
+        .audio-player {
             width: 100%;
-            margin-top: 5px;
+            margin-top: 8px;
+            border-radius: 8px;
         }
+
+        .visualizer-section {
+            margin-bottom: 20px;
+        }
+
         .visualizer-container {
-            width: 100%;
-            height: 60px;
-            background-color: #f5f5f5;
+            height: 150px;
+            background-color: #000;
             border-radius: 12px;
-            margin-bottom: 15px;
             overflow: hidden;
             position: relative;
+            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
         }
-        
-        .audio-visualizer {
-            width: 100%;
-            height: 100%;
-            display: block;
-        }
-        
+
         .visualizer-label {
             position: absolute;
             top: 50%;
             left: 50%;
             transform: translate(-50%, -50%);
-            color: #999;
-            font-size: 0.9em;
-            pointer-events: none;
-            opacity: 0.7;
+            color: rgba(255, 255, 255, 0.7);
+            font-size: 1rem;
             text-align: center;
+            pointer-events: none;
+            transition: opacity 0.3s ease;
+            z-index: 1;
+        }
+
+        #audioVisualizer {
             width: 100%;
+            height: 100%;
+            display: block;
+        }
+
+        .controls {
+            display: flex;
+            gap: 15px;
+            flex-wrap: wrap;
+        }
+
+        .control-group {
+            flex: 1;
+            min-width: 200px;
+        }
+
+        .control-label {
+            font-weight: 600;
+            margin-bottom: 10px;
+            color: #555;
+        }
+
+        .button-row {
+            display: flex;
+            gap: 10px;
+            margin-top: 15px;
+        }
+
+        button {
+            padding: 12px 20px;
+            border-radius: 8px;
+            border: none;
+            background-color: var(--primary-color);
+            color: white;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.2s ease;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 8px;
+            flex: 1;
+        }
+
+        button:hover {
+            background-color: var(--secondary-color);
+            transform: translateY(-2px);
+            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+        }
+
+        button:active {
+            transform: translateY(0);
+        }
+
+        button.recording {
+            background-color: var(--accent-color);
+            animation: pulse 1.5s infinite;
+        }
+
+        button.processing {
+            background-color: #ff9800;
+        }
+
+        @keyframes pulse {
+            0% { opacity: 1; }
+            50% { opacity: 0.8; }
+            100% { opacity: 1; }
+        }
+
+        select, .slider-container {
+            width: 100%;
+            padding: 10px;
+            border-radius: 8px;
+            border: 1px solid var(--border-color);
+            background-color: white;
+            margin-bottom: 15px;
+        }
+
+        .slider-container {
+            display: flex;
+            flex-direction: column;
+            gap: 5px;
+        }
+
+        .slider-label {
+            display: flex;
+            justify-content: space-between;
+        }
+
+        input[type="range"] {
+            width: 100%;
+            cursor: pointer;
+        }
+
+        .volume-indicator {
+            height: 30px;
+            background: linear-gradient(to right, #4CAF50, #FFEB3B, #F44336);
+            border-radius: 4px;
+            margin-top: 5px;
+            position: relative;
+            overflow: hidden;
+        }
+
+        .volume-level {
+            height: 100%;
+            width: 0%;
+            background-color: rgba(0, 0, 0, 0.5);
+            position: absolute;
+            right: 0;
+            top: 0;
+            transition: width 0.1s ease;
+        }
+
+        .status-indicator {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            padding: 10px;
+            border-radius: 8px;
+            background-color: #f5f5f5;
+            margin-top: 20px;
+        }
+
+        .status-dot {
+            width: 12px;
+            height: 12px;
+            border-radius: 50%;
+            background-color: #ccc;
+            transition: background-color 0.3s ease;
+        }
+
+        .status-dot.active {
+            background-color: var(--success-color);
+        }
+
+        .status-text {
+            font-size: 0.9em;
+            color: #666;
+        }
+
+        /* Custom Scrollbar */
+        .conversation::-webkit-scrollbar {
+            width: 8px;
+        }
+
+        .conversation::-webkit-scrollbar-track {
+            background: #f1f1f1;
+            border-radius: 10px;
+        }
+
+        .conversation::-webkit-scrollbar-thumb {
+            background: #ccc;
+            border-radius: 10px;
+        }
+
+        .conversation::-webkit-scrollbar-thumb:hover {
+            background: #aaa;
+        }
+
+        /* Settings Panel */
+        .settings-panel {
+            margin-top: 20px;
+        }
+
+        .settings-toggles {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
+            gap: 10px;
+            margin-top: 10px;
+        }
+
+        .toggle-switch {
+            display: flex;
+            align-items: center;
+        }
+
+        .toggle-switch input {
+            opacity: 0;
+            width: 0;
+            height: 0;
+        }
+
+        .toggle-switch label {
+            position: relative;
+            display: inline-block;
+            width: 50px;
+            height: 24px;
+            background-color: #ccc;
+            border-radius: 34px;
+            transition: .4s;
+            margin-right: 10px;
+            cursor: pointer;
+        }
+
+        .toggle-switch label:before {
+            position: absolute;
+            content: "";
+            height: 16px;
+            width: 16px;
+            left: 4px;
+            bottom: 4px;
+            background-color: white;
+            transition: .4s;
+            border-radius: 50%;
+        }
+
+        .toggle-switch input:checked + label {
+            background-color: var(--primary-color);
+        }
+
+        .toggle-switch input:checked + label:before {
+            transform: translateX(26px);
+        }
+
+        footer {
+            text-align: center;
+            margin-top: 40px;
+            padding-top: 20px;
+            border-top: 1px solid var(--border-color);
+            color: #888;
+            font-size: 0.9em;
         }
     </style>
 </head>
 <body>
-    <h1>Sesame AI Voice Chat</h1>
-    <div class="conversation" id="conversation"></div>
-    
-    <div class="visualizer-container">
-        <canvas id="audioVisualizer" class="audio-visualizer"></canvas>
-        <div id="visualizerLabel" class="visualizer-label">Audio levels will appear here when speaking</div>
-    </div>
-    
-    <div class="controls">
-        <select id="speakerSelect">
-            <option value="0">Speaker 0</option>
-            <option value="1">Speaker 1</option>
-        </select>
-        <button id="streamButton">Start Conversation</button>
-        <button id="clearButton">Clear Chat</button>
-    </div>
-    
-    <div class="status-indicator">
-        <div class="status-dot" id="statusDot"></div>
-        <div class="status-text" id="statusText">Not connected</div>
+    <header>
+        <h1>Sesame AI Voice Chat</h1>
+        <p class="subtitle">Speak naturally and have a conversation with AI</p>
+    </header>
+
+    <div class="app-container">
+        <div class="chat-container">
+            <div class="chat-header">
+                <h2>Conversation</h2>
+                <button id="clearButton" class="small-button">
+                    <i class="fas fa-trash"></i> Clear Chat
+                </button>
+            </div>
+            <div class="conversation" id="conversation"></div>
+        </div>
+
+        <div class="control-panel">
+            <div class="visualizer-section">
+                <h3>Audio Visualizer</h3>
+                <div class="visualizer-container">
+                    <canvas id="audioVisualizer"></canvas>
+                    <div id="visualizerLabel" class="visualizer-label">Speak to see audio visualization</div>
+                </div>
+            </div>
+
+            <div class="controls">
+                <div class="control-group">
+                    <div class="control-label">Voice Settings</div>
+                    <select id="speakerSelect">
+                        <option value="0">Speaker 0 (You)</option>
+                        <option value="1">Speaker 1 (AI)</option>
+                    </select>
+                    
+                    <div class="slider-container">
+                        <div class="slider-label">
+                            <span>Silence Threshold</span>
+                            <span id="thresholdValue">0.01</span>
+                        </div>
+                        <input type="range" id="thresholdSlider" min="0.001" max="0.1" step="0.001" value="0.01">
+                    </div>
+                    
+                    <div class="volume-indicator">
+                        <div id="volumeLevel" class="volume-level"></div>
+                    </div>
+                </div>
+
+                <div class="control-group">
+                    <div class="control-label">Conversation Controls</div>
+                    <div class="button-row">
+                        <button id="streamButton" class="main-button">
+                            <i class="fas fa-microphone"></i> Start Conversation
+                        </button>
+                    </div>
+                </div>
+            </div>
+
+            <div class="settings-panel">
+                <div class="control-label">Settings</div>
+                <div class="settings-toggles">
+                    <div class="toggle-switch">
+                        <input type="checkbox" id="autoPlayResponses" checked>
+                        <label for="autoPlayResponses"></label>
+                        <span>Auto-play responses</span>
+                    </div>
+                    <div class="toggle-switch">
+                        <input type="checkbox" id="showVisualizer" checked>
+                        <label for="showVisualizer"></label>
+                        <span>Show visualizer</span>
+                    </div>
+                </div>
+            </div>
+
+            <div class="status-indicator">
+                <div class="status-dot" id="statusDot"></div>
+                <div class="status-text" id="statusText">Not connected</div>
+            </div>
+        </div>
     </div>
 
-    <script>
-        // Variables
-        let ws;
-        let audioContext;
-        let streamProcessor;
-        let isStreaming = false;
-        let isSpeaking = false;
-        let silenceTimer = null;
-        let energyWindow = [];
-        const ENERGY_WINDOW_SIZE = 10;
-        const CLIENT_SILENCE_THRESHOLD = 0.01;
-        const CLIENT_SILENCE_DURATION_MS = 1000; // 1 second
-        
-        // Add these variables with your existing ones
-        let analyser;
-        let visualizerCanvas;
-        let canvasContext;
-        let visualizerBufferLength;
-        let visualizerDataArray;
-        let visualizerAnimationFrame;
-        const visualizerLabel = document.getElementById('visualizerLabel');
-        
-        // DOM elements
-        const conversationEl = document.getElementById('conversation');
-        const speakerSelectEl = document.getElementById('speakerSelect');
-        const streamButton = document.getElementById('streamButton');
-        const clearButton = document.getElementById('clearButton');
-        const statusDot = document.getElementById('statusDot');
-        const statusText = document.getElementById('statusText');
-        
-        // Initialize on page load
-        window.addEventListener('load', () => {
-            connectWebSocket();
-            setupAudioContext();
-            setupVisualizer();
-            
-            // Event listeners
-            streamButton.addEventListener('click', toggleStreaming);
-            clearButton.addEventListener('click', clearConversation);
-        });
-        
-        // Setup audio context for streaming
-        function setupAudioContext() {
-            try {
-                audioContext = new (window.AudioContext || window.webkitAudioContext)();
-                console.log('Audio context setup completed');
-            } catch (err) {
-                console.error('Error setting up audio context:', err);
-                addSystemMessage(`Audio context error: ${err.message}`);
-            }
-        }
-        
-        // Connect to WebSocket server
-        function connectWebSocket() {
-            const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
-            const wsUrl = `${wsProtocol}//${window.location.hostname}:8000/ws`;
-            
-            ws = new WebSocket(wsUrl);
-            
-            ws.onopen = () => {
-                console.log('WebSocket connected');
-                statusDot.classList.add('active');
-                statusText.textContent = 'Connected';
-                addSystemMessage('Connected to server');
-            };
-            
-            ws.onmessage = (event) => {
-                const response = JSON.parse(event.data);
-                console.log('Received:', response);
-                
-                if (response.type === 'audio_response') {
-                    // Play audio response
-                    const audio = new Audio(response.audio);
-                    audio.play();
-                    
-                    // Add message to conversation
-                    addAIMessage(response.text || 'AI response', response.audio);
-                    
-                    // Reset to speaking state after AI response
-                    if (isStreaming) {
-                        streamButton.textContent = 'Listening...';
-                        streamButton.style.backgroundColor = '#f44336'; // Back to red
-                        streamButton.classList.add('recording');
-                        isSpeaking = false; // Reset speaking state
-                    }
-                } else if (response.type === 'error') {
-                    addSystemMessage(`Error: ${response.message}`);
-                } else if (response.type === 'context_updated') {
-                    addSystemMessage(response.message);
-                } else if (response.type === 'streaming_status') {
-                    addSystemMessage(`Streaming ${response.status}`);
-                } else if (response.type === 'transcription') {
-                    addUserTranscription(response.text);
-                }
-            };
-            
-            ws.onclose = () => {
-                console.log('WebSocket disconnected');
-                statusDot.classList.remove('active');
-                statusText.textContent = 'Disconnected';
-                addSystemMessage('Disconnected from server. Reconnecting...');
-                setTimeout(connectWebSocket, 3000);
-            };
-            
-            ws.onerror = (error) => {
-                console.error('WebSocket error:', error);
-                statusDot.classList.remove('active');
-                statusText.textContent = 'Error';
-                addSystemMessage('Connection error');
-            };
-        }
-        
-        // Toggle streaming
-        function toggleStreaming() {
-            if (isStreaming) {
-                stopStreaming();
-            } else {
-                startStreaming();
-            }
-        }
-        
-        // Start streaming
-        async function startStreaming() {
-            try {
-                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-                const speaker = parseInt(speakerSelectEl.value);
-                
-                isStreaming = true;
-                isSpeaking = false;
-                energyWindow = [];
-                
-                streamButton.textContent = 'Listening...';
-                streamButton.classList.add('recording');
-                
-                // Create audio processor node
-                const source = audioContext.createMediaStreamSource(stream);
-                
-                // Set up analyser for visualization with better settings
-                analyser = audioContext.createAnalyser();
-                analyser.fftSize = 256;
-                analyser.smoothingTimeConstant = 0.8; // Add smoothing for nicer visualization
-                analyser.minDecibels = -90;
-                analyser.maxDecibels = -10;
-                
-                visualizerBufferLength = analyser.frequencyBinCount;
-                visualizerDataArray = new Uint8Array(visualizerBufferLength);
-                
-                // Connect source to analyzer first
-                source.connect(analyser);
-                
-                // Hide the label when visualization is active
-                visualizerLabel.style.opacity = '0';
-                
-                // Start drawing the visualization
-                if (visualizerAnimationFrame) {
-                    cancelAnimationFrame(visualizerAnimationFrame);
-                }
-                drawVisualizer();
-                
-                // Set up processor for audio processing
-                streamProcessor = audioContext.createScriptProcessor(4096, 1, 1);
-                
-                // Connect nodes
-                source.connect(streamProcessor);
-                streamProcessor.connect(audioContext.destination);
-                
-                // Process and send audio data
-                streamProcessor.onaudioprocess = function(e) {
-                    const audioData = e.inputBuffer.getChannelData(0);
-                    
-                    // Calculate energy (volume) for silence detection
-                    const energy = calculateAudioEnergy(audioData);
-                    updateEnergyWindow(energy);
-                    
-                    // Check if currently silent
-                    const avgEnergy = calculateAverageEnergy();
-                    const isSilent = avgEnergy < CLIENT_SILENCE_THRESHOLD;
-                    
-                    // Handle silence/speech transitions for visual feedback
-                    handleSpeechState(isSilent);
-                    
-                    // Continue processing audio regardless of silence state
-                    const downsampled = downsampleBuffer(audioData, audioContext.sampleRate, 24000);
-                    sendAudioChunk(downsampled, speaker);
-                };
-                
-                addSystemMessage('Listening - speak naturally and pause when finished');
-                
-            } catch (err) {
-                console.error('Error starting audio stream:', err);
-                addSystemMessage(`Microphone error: ${err.message}`);
-                isStreaming = false;
-                streamButton.textContent = 'Start Conversation';
-                streamButton.classList.remove('recording');
-            }
-        }
-        
-        // Calculate audio energy (volume)
-        function calculateAudioEnergy(buffer) {
-            let sum = 0;
-            for (let i = 0; i < buffer.length; i++) {
-                sum += Math.abs(buffer[i]);
-            }
-            return sum / buffer.length;
-        }
-        
-        // Update the sliding energy window
-        function updateEnergyWindow(energy) {
-            energyWindow.push(energy);
-            if (energyWindow.length > ENERGY_WINDOW_SIZE) {
-                energyWindow.shift();
-            }
-        }
-        
-        // Calculate average energy from the window
-        function calculateAverageEnergy() {
-            if (energyWindow.length === 0) return 0;
-            return energyWindow.reduce((sum, val) => sum + val, 0) / energyWindow.length;
-        }
-        
-        // Handle speech state changes and visual feedback
-        function handleSpeechState(isSilent) {
-            if (isSpeaking && isSilent) {
-                // Transition from speaking to silence
-                if (!silenceTimer) {
-                    silenceTimer = setTimeout(() => {
-                        // Silence persisted long enough
-                        streamButton.textContent = 'Processing...';
-                        streamButton.classList.remove('recording');
-                        streamButton.classList.add('processing');
-                        addSystemMessage('Detected pause in speech, processing response...');
-                    }, CLIENT_SILENCE_DURATION_MS);
-                }
-            } else if (!isSpeaking && !isSilent) {
-                // Transition from silence to speaking
-                isSpeaking = true;
-                streamButton.textContent = 'Listening...';
-                streamButton.classList.add('recording');
-                streamButton.classList.remove('processing');
-                
-                // Clear any pending silence timer
-                if (silenceTimer) {
-                    clearTimeout(silenceTimer);
-                    silenceTimer = null;
-                }
-            } else if (isSpeaking && !isSilent) {
-                // Still speaking, reset any silence timer
-                if (silenceTimer) {
-                    clearTimeout(silenceTimer);
-                    silenceTimer = null;
-                }
-            }
-            
-            // Update speaking state
-            if (!isSilent) {
-                isSpeaking = true;
-            }
-        }
-        
-        // Send audio chunk to server
-        function sendAudioChunk(audioData, speaker) {
-            const wavData = createWavBlob(audioData, 24000);
-            const reader = new FileReader();
-            
-            reader.onloadend = function() {
-                const base64data = reader.result;
-                
-                // Send to server
-                ws.send(JSON.stringify({
-                    action: 'stream_audio',
-                    speaker: speaker,
-                    audio: base64data
-                }));
-            };
-            
-            reader.readAsDataURL(wavData);
-        }
-        
-        // Stop streaming
-        function stopStreaming() {
-            if (streamProcessor) {
-                streamProcessor.disconnect();
-                streamProcessor = null;
-            }
-            
-            if (analyser) {
-                analyser.disconnect();
-                analyser = null;
-            }
-            
-            // Stop the visualization
-            if (visualizerAnimationFrame) {
-                cancelAnimationFrame(visualizerAnimationFrame);
-                visualizerAnimationFrame = null;
-            }
-            
-            // Clear the canvas
-            if (canvasContext) {
-                canvasContext.clearRect(0, 0, visualizerCanvas.width, visualizerCanvas.height);
-                visualizerLabel.style.opacity = '0.7';
-            }
-            
-            // Clear any pending silence timer
-            if (silenceTimer) {
-                clearTimeout(silenceTimer);
-                silenceTimer = null;
-            }
-            
-            isStreaming = false;
-            isSpeaking = false;
-            energyWindow = [];
-            
-            streamButton.textContent = 'Start Conversation';
-            streamButton.classList.remove('recording', 'processing');
-            streamButton.style.backgroundColor = ''; // Reset to default
-            
-            addSystemMessage('Conversation paused');
-            
-            // Send stop streaming signal to server
-            ws.send(JSON.stringify({
-                action: 'stop_streaming',
-                speaker: parseInt(speakerSelectEl.value)
-            }));
-        }
-        
-        // Clear conversation
-        function clearConversation() {
-            // Clear conversation history
-            ws.send(JSON.stringify({
-                action: 'clear_context'
-            }));
-            
-            // Clear the UI
-            conversationEl.innerHTML = '';
-            addSystemMessage('Conversation cleared');
-        }
-        
-        // Downsample audio buffer to target sample rate
-        function downsampleBuffer(buffer, sampleRate, targetSampleRate) {
-            if (targetSampleRate === sampleRate) {
-                return buffer;
-            }
-            
-            const sampleRateRatio = sampleRate / targetSampleRate;
-            const newLength = Math.round(buffer.length / sampleRateRatio);
-            const result = new Float32Array(newLength);
-            
-            let offsetResult = 0;
-            let offsetBuffer = 0;
-            
-            while (offsetResult < result.length) {
-                const nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
-                let accum = 0, count = 0;
-                
-                for (let i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
-                    accum += buffer[i];
-                    count++;
-                }
-                
-                result[offsetResult] = accum / count;
-                offsetResult++;
-                offsetBuffer = nextOffsetBuffer;
-            }
-            
-            return result;
-        }
-        
-        // Create WAV blob from Float32Array
-        function createWavBlob(samples, sampleRate) {
-            const buffer = new ArrayBuffer(44 + samples.length * 2);
-            const view = new DataView(buffer);
-            
-            // RIFF chunk descriptor
-            writeString(view, 0, 'RIFF');
-            view.setUint32(4, 36 + samples.length * 2, true);
-            writeString(view, 8, 'WAVE');
-            
-            // fmt sub-chunk
-            writeString(view, 12, 'fmt ');
-            view.setUint32(16, 16, true);
-            view.setUint16(20, 1, true);     // PCM format
-            view.setUint16(22, 1, true);     // Mono channel
-            view.setUint32(24, sampleRate, true);
-            view.setUint32(28, sampleRate * 2, true);
-            view.setUint16(32, 2, true);
-            view.setUint16(34, 16, true);
-            
-            // data sub-chunk
-            writeString(view, 36, 'data');
-            view.setUint32(40, samples.length * 2, true);
-            
-            // Write the PCM samples
-            const volume = 0.5;
-            for (let i = 0; i < samples.length; i++) {
-                const sample = Math.max(-1, Math.min(1, samples[i]));
-                view.setInt16(44 + i * 2, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
-            }
-            
-            return new Blob([buffer], { type: 'audio/wav' });
-        }
-        
-        function writeString(view, offset, string) {
-            for (let i = 0; i < string.length; i++) {
-                view.setUint8(offset + i, string.charCodeAt(i));
-            }
-        }
-        
-        // Message display functions
-        function addUserTranscription(text) {
-            // Find if there's already a pending user message
-            let pendingMessage = document.querySelector('.message.user.pending');
-            
-            if (!pendingMessage) {
-                // Create a new message
-                pendingMessage = document.createElement('div');
-                pendingMessage.classList.add('message', 'user', 'pending');
-                conversationEl.appendChild(pendingMessage);
-            }
-            
-            pendingMessage.textContent = text;
-            pendingMessage.classList.remove('pending');
-            conversationEl.scrollTop = conversationEl.scrollHeight;
-        }
-        
-        function addAIMessage(text, audioSrc) {
-            const messageEl = document.createElement('div');
-            messageEl.classList.add('message', 'ai');
-            
-            if (text) {
-                const textDiv = document.createElement('div');
-                textDiv.textContent = text;
-                messageEl.appendChild(textDiv);
-            }
-            
-            const audioEl = document.createElement('audio');
-            audioEl.controls = true;
-            audioEl.src = audioSrc;
-            messageEl.appendChild(audioEl);
-            
-            conversationEl.appendChild(messageEl);
-            conversationEl.scrollTop = conversationEl.scrollHeight;
-        }
-        
-        function addSystemMessage(text) {
-            const messageEl = document.createElement('div');
-            messageEl.classList.add('message', 'system');
-            messageEl.textContent = text;
-            conversationEl.appendChild(messageEl);
-            conversationEl.scrollTop = conversationEl.scrollHeight;
-        }
-        
-        // Setup the audio visualizer
-        function setupVisualizer() {
-            visualizerCanvas = document.getElementById('audioVisualizer');
-            canvasContext = visualizerCanvas.getContext('2d');
-            
-            // Set canvas size to match container
-            function resizeCanvas() {
-                const container = visualizerCanvas.parentElement;
-                visualizerCanvas.width = container.clientWidth;
-                visualizerCanvas.height = container.clientHeight;
-            }
-            
-            // Call initially and on window resize
-            resizeCanvas();
-            window.addEventListener('resize', resizeCanvas);
-            
-            // Create placeholder data array (will be used before streaming starts)
-            visualizerBufferLength = 128; // Default size
-            visualizerDataArray = new Uint8Array(visualizerBufferLength);
-        }
-        
-        // Add the visualization drawing function
-        function drawVisualizer() {
-            // Ensure we have the canvas context
-            if (!canvasContext) {
-                console.error("Canvas context not available");
-                return;
-            }
-            
-            visualizerAnimationFrame = requestAnimationFrame(drawVisualizer);
-            
-            // If we're streaming and have an analyzer, get the frequency data
-            if (isStreaming && analyser) {
-                try {
-                    analyser.getByteFrequencyData(visualizerDataArray);
-                } catch (e) {
-                    console.error("Error getting frequency data:", e);
-                }
-            } else {
-                // If not streaming, gradually reduce all values to create a fade-out effect
-                for (let i = 0; i < visualizerDataArray.length; i++) {
-                    visualizerDataArray[i] = Math.max(0, visualizerDataArray[i] - 5);
-                }
-            }
-            
-            // Clear the canvas with a very slight background
-            canvasContext.fillStyle = 'rgba(245, 245, 245, 0.2)';
-            canvasContext.fillRect(0, 0, visualizerCanvas.width, visualizerCanvas.height);
-            
-            // Calculate bar width based on canvas size and buffer length
-            const width = visualizerCanvas.width;
-            const height = visualizerCanvas.height;
-            const barCount = Math.min(visualizerBufferLength, 64); // Limit bars for performance
-            const barWidth = width / barCount - 1;  // Leave 1px gap
-            
-            // Draw bars
-            for (let i = 0; i < barCount; i++) {
-                // Use a logarithmic scale for better visualization of lower frequencies
-                const index = Math.floor(i * visualizerBufferLength / barCount);
-                const value = visualizerDataArray[index];
-                
-                // Scale height (values typically range from 0-255)
-                const barHeight = (value / 255) * height;
-                
-                // Position x coordinate
-                const x = i * (barWidth + 1);
-                
-                // Calculate gradient color based on frequency
-                const hue = 200 + (i / barCount * 60); // Blue to light-blue/cyan spectrum
-                const saturation = 90 - (value / 255 * 30); // More saturated for louder sounds
-                const lightness = 40 + (value / 255 * 30); // Brighter for louder sounds
-                
-                // Draw the bar
-                canvasContext.fillStyle = `hsl(${hue}, ${saturation}%, ${lightness}%)`;
-                canvasContext.fillRect(x, height - barHeight, barWidth, barHeight);
-                
-                // Add a subtle reflection
-                const gradientHeight = Math.min(10, barHeight / 3);
-                const gradient = canvasContext.createLinearGradient(
-                    0, height - barHeight, 
-                    0, height - barHeight + gradientHeight
-                );
-                gradient.addColorStop(0, 'rgba(255, 255, 255, 0.3)');
-                gradient.addColorStop(1, 'rgba(255, 255, 255, 0)');
-                canvasContext.fillStyle = gradient;
-                canvasContext.fillRect(x, height - barHeight, barWidth, gradientHeight);
-            }
-            
-            // Only show the label when not streaming
-            visualizerLabel.style.opacity = isStreaming ? '0' : '0.7';
-        }
-    </script>
+    <footer>
+        <p>Powered by Sesame AI | WhisperX for speech recognition</p>
+    </footer>
+
+    <!-- Load our JavaScript file -->
+    <script src="./voice-chat.js"></script>
 </body>
 </html>
\ No newline at end of file
diff --git a/Backend/server.py b/Backend/server.py
index b9736b5..d0dee80 100644
--- a/Backend/server.py
+++ b/Backend/server.py
@@ -1,99 +1,276 @@
 import os
 import base64
 import json
-import asyncio
 import torch
 import torchaudio
 import numpy as np
-import io
 import whisperx
 from io import BytesIO
 from typing import List, Dict, Any, Optional
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
+from flask import Flask, request, send_from_directory, Response
+from flask_cors import CORS
+from flask_socketio import SocketIO, emit, disconnect
 from generator import load_csm_1b, Segment
-import uvicorn
 import time
 import gc
 from collections import deque
+from threading import Lock
 
-# Select device
-if torch.cuda.is_available():
-    device = "cuda"
-else:
+# Add this at the top of your file, replacing your current CUDA setup
+
+# CUDA setup with robust error handling
+try:
+    # Handle CUDA issues
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Limit to first GPU only
+    
+    # Try enabling TF32 precision
+    try:
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+    except:
+        pass  # Ignore if not supported
+    
+    # Check if CUDA is available
+    if torch.cuda.is_available():
+        try:
+            # Test CUDA functionality
+            x = torch.rand(10, device="cuda")
+            y = x + x
+            del x, y
+            device = "cuda"
+            compute_type = "float16"
+            print("CUDA is fully functional")
+        except Exception as cuda_error:
+            print(f"CUDA is available but not working correctly: {str(cuda_error)}")
+            device = "cpu"
+            compute_type = "int8"
+    else:
+        device = "cpu"
+        compute_type = "int8"
+except Exception as e:
+    print(f"Error setting up CUDA: {str(e)}")
     device = "cpu"
-print(f"Using device: {device}")
+    compute_type = "int8"
 
-# Initialize the model
-generator = load_csm_1b(device=device)
+print(f"Using device: {device} with compute type: {compute_type}")
 
-# Initialize WhisperX for ASR
+# Initialize the Sesame CSM model with robust error handling
+try:
+    print(f"Loading Sesame CSM model on {device}...")
+    generator = load_csm_1b(device=device)
+    print("Sesame CSM model loaded successfully")
+except Exception as model_error:
+    print(f"Error loading Sesame CSM on {device}: {str(model_error)}")
+    if device == "cuda":
+        # Try on CPU as fallback
+        try:
+            print("Trying to load Sesame CSM on CPU instead...")
+            device = "cpu"  # Update global device setting
+            generator = load_csm_1b(device="cpu")
+            print("Sesame CSM model loaded on CPU successfully")
+        except Exception as cpu_error:
+            print(f"Fatal error - could not load Sesame CSM model: {str(cpu_error)}")
+            raise RuntimeError("Failed to load speech synthesis model")
+    else:
+        # Already tried CPU and it failed
+        raise RuntimeError("Failed to load speech synthesis model on any device")
+
+# Replace the WhisperX model loading section
+
+# Initialize WhisperX for ASR with robust error handling
 print("Loading WhisperX model...")
-# Use a smaller model for faster response times
-asr_model = whisperx.load_model("medium", device, compute_type="float16")
-print("WhisperX model loaded!")
+asr_model = None  # Initialize to None first to avoid scope issues
 
-app = FastAPI()
-
-# Add CORS middleware to allow cross-origin requests
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # Allow all origins in development
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-# Connection manager to handle multiple clients
-class ConnectionManager:
-    def __init__(self):
-        self.active_connections: List[WebSocket] = []
-
-    async def connect(self, websocket: WebSocket):
-        await websocket.accept()
-        self.active_connections.append(websocket)
-
-    def disconnect(self, websocket: WebSocket):
-        self.active_connections.remove(websocket)
-
-manager = ConnectionManager()
+try:
+    # Always start with the tiny model on CPU for stability
+    asr_model = whisperx.load_model("tiny", "cpu", compute_type="int8")
+    print("WhisperX 'tiny' model loaded on CPU successfully")
+    
+    # If CPU works, try CUDA if available
+    if device == "cuda":
+        try:
+            print("Trying to load WhisperX on CUDA...")
+            cuda_model = whisperx.load_model("tiny", "cuda", compute_type="float16")
+            # Test the model to ensure it works
+            test_audio = torch.zeros(16000)  # 1 second of silence at 16kHz
+            _ = cuda_model.transcribe(test_audio.numpy(), batch_size=1)
+            # If we get here, CUDA works
+            asr_model = cuda_model
+            print("WhisperX model moved to CUDA successfully")
+            
+            # Try to upgrade to small model on CUDA
+            try:
+                small_model = whisperx.load_model("small", "cuda", compute_type="float16")
+                # Test it
+                _ = small_model.transcribe(test_audio.numpy(), batch_size=1)
+                asr_model = small_model
+                print("WhisperX 'small' model loaded on CUDA successfully")
+            except Exception as upgrade_error:
+                print(f"Staying with 'tiny' model on CUDA: {str(upgrade_error)}")
+        except Exception as cuda_error:
+            print(f"CUDA loading failed, staying with CPU model: {str(cuda_error)}")
+except Exception as e:
+    print(f"Error loading WhisperX model: {str(e)}")
+    # Create a minimal dummy model as last resort
+    class DummyModel:
+        def __init__(self):
+            self.device = "cpu"
+        def transcribe(self, *args, **kwargs):
+            return {"segments": [{"text": "Speech recognition currently unavailable."}]}
+    
+    asr_model = DummyModel()
+    print("WARNING: Using dummy transcription model - ASR functionality limited")
 
 # Silence detection parameters
 SILENCE_THRESHOLD = 0.01  # Adjust based on your audio normalization
-SILENCE_DURATION_SEC = 1.0  # How long silence must persist to be considered "stopped talking"
+SILENCE_DURATION_SEC = 1.0  # How long silence must persist
+
+# Define the base directory
+base_dir = os.path.dirname(os.path.abspath(__file__))
+static_dir = os.path.join(base_dir, "static")
+os.makedirs(static_dir, exist_ok=True)
+
+# Setup Flask
+app = Flask(__name__)
+CORS(app)
+socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
+
+# Socket connection management
+thread = None
+thread_lock = Lock()
+active_clients = {}  # Map client_id to client context
 
 # Helper function to convert audio data
-async def decode_audio_data(audio_data: str) -> torch.Tensor:
-    """Decode base64 audio data to a torch tensor"""
+def decode_audio_data(audio_data: str) -> torch.Tensor:
+    """Decode base64 audio data to a torch tensor with improved error handling"""
     try:
+        # Skip empty audio data
+        if not audio_data or len(audio_data) < 100:
+            print("Empty or too short audio data received")
+            return torch.zeros(generator.sample_rate // 2)  # 0.5 seconds of silence
+            
+        # Extract the actual base64 content
+        if ',' in audio_data:
+            # Handle data URL format (data:audio/wav;base64,...)
+            audio_data = audio_data.split(',')[1]
+            
         # Decode base64 audio data
-        binary_data = base64.b64decode(audio_data.split(',')[1] if ',' in audio_data else audio_data)
+        try:
+            binary_data = base64.b64decode(audio_data)
+            print(f"Decoded base64 data: {len(binary_data)} bytes")
+            
+            # Check if we have enough data for a valid WAV
+            if len(binary_data) < 44:  # WAV header is 44 bytes
+                print("Data too small to be a valid WAV file")
+                return torch.zeros(generator.sample_rate // 2)
+        except Exception as e:
+            print(f"Base64 decoding error: {str(e)}")
+            return torch.zeros(generator.sample_rate // 2)
         
-        # Save to a temporary WAV file first
-        temp_file = BytesIO(binary_data)
+        # Save for debugging
+        debug_path = os.path.join(base_dir, "debug_incoming.wav") 
+        with open(debug_path, 'wb') as f:
+            f.write(binary_data)
+        print(f"Saved debug file: {debug_path}")
         
-        # Load audio from binary data, explicitly specifying the format
-        audio_tensor, sample_rate = torchaudio.load(temp_file, format="wav")
+        # Approach 1: Load directly with torchaudio
+        try:
+            with BytesIO(binary_data) as temp_file:
+                temp_file.seek(0)  # Ensure we're at the start of the buffer
+                audio_tensor, sample_rate = torchaudio.load(temp_file, format="wav")
+                print(f"Direct loading success: shape={audio_tensor.shape}, rate={sample_rate}Hz")
+                
+                # Check if audio is valid
+                if audio_tensor.numel() == 0 or torch.isnan(audio_tensor).any():
+                    raise ValueError("Empty or invalid audio tensor detected")
+        except Exception as e:
+            print(f"Direct loading failed: {str(e)}")
+            
+            # Approach 2: Try to fix/normalize the WAV data
+            try:
+                # Sometimes WAV headers can be malformed, attempt to fix
+                temp_path = os.path.join(base_dir, "temp_fixing.wav")
+                with open(temp_path, 'wb') as f:
+                    f.write(binary_data)
+                
+                # Use a simpler numpy approach as backup
+                import numpy as np
+                import wave
+                
+                try:
+                    with wave.open(temp_path, 'rb') as wf:
+                        n_channels = wf.getnchannels()
+                        sample_width = wf.getsampwidth()
+                        sample_rate = wf.getframerate()
+                        n_frames = wf.getnframes()
+                        
+                        # Read the frames
+                        frames = wf.readframes(n_frames)
+                        print(f"Wave reading: channels={n_channels}, rate={sample_rate}Hz, frames={n_frames}")
+                        
+                        # Convert to numpy and then to torch
+                        if sample_width == 2:  # 16-bit audio
+                            data = np.frombuffer(frames, dtype=np.int16).astype(np.float32) / 32768.0
+                        elif sample_width == 1:  # 8-bit audio
+                            data = np.frombuffer(frames, dtype=np.uint8).astype(np.float32) / 128.0 - 1.0
+                        else:
+                            raise ValueError(f"Unsupported sample width: {sample_width}")
+                        
+                        # Convert to mono if needed
+                        if n_channels > 1:
+                            data = data.reshape(-1, n_channels)
+                            data = data.mean(axis=1)
+                        
+                        # Convert to torch tensor
+                        audio_tensor = torch.from_numpy(data)
+                        print(f"Successfully converted with numpy: shape={audio_tensor.shape}")
+                except Exception as wave_error:
+                    print(f"Wave processing failed: {str(wave_error)}")
+                    # Try with torchaudio as last resort
+                    audio_tensor, sample_rate = torchaudio.load(temp_path, format="wav")
+                
+                # Clean up
+                if os.path.exists(temp_path):
+                    os.remove(temp_path)
+            except Exception as e2:
+                print(f"All WAV loading methods failed: {str(e2)}")
+                print("Returning silence as fallback")
+                return torch.zeros(generator.sample_rate // 2)
         
+        # Ensure audio is the right shape (mono)
+        if len(audio_tensor.shape) > 1 and audio_tensor.shape[0] > 1:
+            audio_tensor = torch.mean(audio_tensor, dim=0)
+        
+        # Ensure we have a 1D tensor
+        audio_tensor = audio_tensor.squeeze()
+            
         # Resample if needed
         if sample_rate != generator.sample_rate:
-            audio_tensor = torchaudio.functional.resample(
-                audio_tensor.squeeze(0), 
-                orig_freq=sample_rate, 
-                new_freq=generator.sample_rate
-            )
-        else:
-            audio_tensor = audio_tensor.squeeze(0)
-            
+            try:
+                print(f"Resampling from {sample_rate}Hz to {generator.sample_rate}Hz")
+                resampler = torchaudio.transforms.Resample(
+                    orig_freq=sample_rate, 
+                    new_freq=generator.sample_rate
+                )
+                audio_tensor = resampler(audio_tensor)
+            except Exception as e:
+                print(f"Resampling error: {str(e)}")
+                # If resampling fails, just return the original audio
+                # The model can often handle different sample rates
+        
+        # Normalize audio to avoid issues
+        if torch.abs(audio_tensor).max() > 0:
+            audio_tensor = audio_tensor / torch.abs(audio_tensor).max()
+        
+        print(f"Final audio tensor: shape={audio_tensor.shape}, min={audio_tensor.min().item():.4f}, max={audio_tensor.max().item():.4f}")
         return audio_tensor
     except Exception as e:
-        print(f"Error decoding audio: {str(e)}")
+        print(f"Unhandled error in decode_audio_data: {str(e)}")
         # Return a small silent audio segment as fallback
         return torch.zeros(generator.sample_rate // 2)  # 0.5 seconds of silence
 
 
-async def encode_audio_data(audio_tensor: torch.Tensor) -> str:
+def encode_audio_data(audio_tensor: torch.Tensor) -> str:
     """Encode torch tensor audio to base64 string"""
     buf = BytesIO()
     torchaudio.save(buf, audio_tensor.unsqueeze(0).cpu(), generator.sample_rate, format="wav")
@@ -102,40 +279,72 @@ async def encode_audio_data(audio_tensor: torch.Tensor) -> str:
     return f"data:audio/wav;base64,{audio_base64}"
 
 
-async def transcribe_audio(audio_tensor: torch.Tensor) -> str:
-    """Transcribe audio using WhisperX"""
+def transcribe_audio(audio_tensor: torch.Tensor) -> str:
+    """Transcribe audio using WhisperX with robust error handling"""
+    global asr_model  # Declare global at the beginning of the function
+    
     try:
         # Save the tensor to a temporary file
-        temp_file = BytesIO()
-        torchaudio.save(temp_file, audio_tensor.unsqueeze(0).cpu(), generator.sample_rate, format="wav")
-        temp_file.seek(0)
+        temp_path = os.path.join(base_dir, "temp_audio.wav")
+        torchaudio.save(temp_path, audio_tensor.unsqueeze(0).cpu(), generator.sample_rate)
         
-        # Create a temporary file on disk (WhisperX requires a file path)
-        temp_path = "temp_audio.wav"
-        with open(temp_path, "wb") as f:
-            f.write(temp_file.read())
+        print(f"Transcribing audio file: {temp_path} (size: {os.path.getsize(temp_path)} bytes)")
         
-        # Load and transcribe the audio
-        audio = whisperx.load_audio(temp_path)
-        result = asr_model.transcribe(audio, batch_size=16)
+        # Load the audio file using whisperx's function
+        try:
+            audio = whisperx.load_audio(temp_path)
+        except Exception as audio_load_error:
+            print(f"WhisperX load_audio failed: {str(audio_load_error)}")
+            # Fall back to manual loading
+            import soundfile as sf
+            audio, sr = sf.read(temp_path)
+            if sr != 16000:  # WhisperX expects 16kHz audio
+                from scipy import signal
+                audio = signal.resample(audio, int(len(audio) * 16000 / sr))
+        
+        # Transcribe with error handling for CUDA issues
+        try:
+            # Try with original device
+            result = asr_model.transcribe(audio, batch_size=8)
+        except RuntimeError as cuda_error:
+            if "CUDA" in str(cuda_error) or "libcudnn" in str(cuda_error):
+                print(f"CUDA error in transcription, falling back to CPU: {str(cuda_error)}")
+                
+                # Try to load a CPU model as fallback
+                try:
+                    # Move model to CPU and try again
+                    asr_model = whisperx.load_model("tiny", "cpu", compute_type="int8")
+                    result = asr_model.transcribe(audio, batch_size=1)
+                except Exception as e:
+                    print(f"CPU fallback also failed: {str(e)}")
+                    return "I'm having trouble processing audio right now."
+            else:
+                # Re-raise if it's not a CUDA error
+                raise
         
         # Clean up
-        os.remove(temp_path)
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
         
         # Get the transcription text
         if result["segments"] and len(result["segments"]) > 0:
             # Combine all segments
             transcription = " ".join([segment["text"] for segment in result["segments"]])
-            print(f"Transcription: {transcription}")
+            print(f"Transcription successful: '{transcription.strip()}'")
             return transcription.strip()
         else:
+            print("Transcription returned no segments")
             return ""
     except Exception as e:
         print(f"Error in transcription: {str(e)}")
-        return ""
+        import traceback
+        traceback.print_exc()
+        if os.path.exists("temp_audio.wav"):
+            os.remove("temp_audio.wav")
+        return "I heard something but couldn't understand it."
 
 
-async def generate_response(text: str, conversation_history: List[Segment]) -> str:
+def generate_response(text: str, conversation_history: List[Segment]) -> str:
     """Generate a contextual response based on the transcribed text"""
     # Simple response logic - can be replaced with a more sophisticated LLM in the future
     responses = {
@@ -163,255 +372,417 @@ async def generate_response(text: str, conversation_history: List[Segment]) -> s
     else:
         return f"I understand you said '{text}'. That's interesting! Can you tell me more about that?"
 
+# Flask routes for serving static content
+@app.route('/')
+def index():
+    return send_from_directory(base_dir, 'index.html')
 
-@app.websocket("/ws")
-async def websocket_endpoint(websocket: WebSocket):
-    await manager.connect(websocket)
-    context_segments = []  # Store conversation context
-    streaming_buffer = []  # Buffer for streaming audio chunks
-    is_streaming = False
+@app.route('/favicon.ico')
+def favicon():
+    if os.path.exists(os.path.join(static_dir, 'favicon.ico')):
+        return send_from_directory(static_dir, 'favicon.ico')
+    return Response(status=204)
+
+@app.route('/voice-chat.js')
+def voice_chat_js():
+    return send_from_directory(base_dir, 'voice-chat.js')
+
+@app.route('/static/<path:path>')
+def serve_static(path):
+    return send_from_directory(static_dir, path)
+
+# Socket.IO event handlers
+@socketio.on('connect')
+def handle_connect():
+    client_id = request.sid
+    print(f"Client connected: {client_id}")
     
-    # Variables for silence detection
-    last_active_time = time.time()
-    is_silence = False
-    energy_window = deque(maxlen=10)  # For tracking recent audio energy
+    # Initialize client context
+    active_clients[client_id] = {
+        'context_segments': [],
+        'streaming_buffer': [],
+        'is_streaming': False,
+        'is_silence': False,
+        'last_active_time': time.time(),
+        'energy_window': deque(maxlen=10)
+    }
+    
+    emit('status', {'type': 'connected', 'message': 'Connected to server'})
+
+@socketio.on('disconnect')
+def handle_disconnect():
+    client_id = request.sid
+    if client_id in active_clients:
+        del active_clients[client_id]
+    print(f"Client disconnected: {client_id}")
+
+@socketio.on('generate')
+def handle_generate(data):
+    client_id = request.sid
+    if client_id not in active_clients:
+        emit('error', {'message': 'Client not registered'})
+        return
     
     try:
-        while True:
-            # Receive JSON data from client
-            data = await websocket.receive_text()
-            request = json.loads(data)
+        text = data.get('text', '')
+        speaker_id = data.get('speaker', 0)
+        
+        print(f"Generating audio for: '{text}' with speaker {speaker_id}")
+        
+        # Generate audio response
+        audio_tensor = generator.generate(
+            text=text,
+            speaker=speaker_id,
+            context=active_clients[client_id]['context_segments'],
+            max_audio_length_ms=10_000,
+        )
+        
+        # Add to conversation context
+        active_clients[client_id]['context_segments'].append(
+            Segment(text=text, speaker=speaker_id, audio=audio_tensor)
+        )
+        
+        # Convert audio to base64 and send back to client
+        audio_base64 = encode_audio_data(audio_tensor)
+        emit('audio_response', {
+            'type': 'audio_response',
+            'audio': audio_base64
+        })
+        
+    except Exception as e:
+        print(f"Error generating audio: {str(e)}")
+        emit('error', {
+            'type': 'error',
+            'message': f"Error generating audio: {str(e)}"
+        })
+
+@socketio.on('add_to_context')
+def handle_add_to_context(data):
+    client_id = request.sid
+    if client_id not in active_clients:
+        emit('error', {'message': 'Client not registered'})
+        return
+    
+    try:
+        text = data.get('text', '')
+        speaker_id = data.get('speaker', 0)
+        audio_data = data.get('audio', '')
+        
+        # Convert received audio to tensor
+        audio_tensor = decode_audio_data(audio_data)
+        
+        # Add to conversation context
+        active_clients[client_id]['context_segments'].append(
+            Segment(text=text, speaker=speaker_id, audio=audio_tensor)
+        )
+        
+        emit('context_updated', {
+            'type': 'context_updated',
+            'message': 'Audio added to context'
+        })
+        
+    except Exception as e:
+        print(f"Error adding to context: {str(e)}")
+        emit('error', {
+            'type': 'error',
+            'message': f"Error processing audio: {str(e)}"
+        })
+
+@socketio.on('clear_context')
+def handle_clear_context():
+    client_id = request.sid
+    if client_id in active_clients:
+        active_clients[client_id]['context_segments'] = []
+        
+    emit('context_updated', {
+        'type': 'context_updated',
+        'message': 'Context cleared'
+    })
+
+@socketio.on('stream_audio')
+def handle_stream_audio(data):
+    client_id = request.sid
+    if client_id not in active_clients:
+        emit('error', {'message': 'Client not registered'})
+        return
+    
+    client = active_clients[client_id]
+    
+    try:
+        speaker_id = data.get('speaker', 0)
+        audio_data = data.get('audio', '')
+        
+        # Convert received audio to tensor
+        audio_chunk = decode_audio_data(audio_data)
+        
+        # Start streaming mode if not already started
+        if not client['is_streaming']:
+            client['is_streaming'] = True
+            client['streaming_buffer'] = []
+            client['energy_window'].clear()
+            client['is_silence'] = False
+            client['last_active_time'] = time.time()
+            print(f"[{client_id}] Streaming started with speaker ID: {speaker_id}")
+            emit('streaming_status', {
+                'type': 'streaming_status',
+                'status': 'started'
+            })
+        
+        # Calculate audio energy for silence detection
+        chunk_energy = torch.mean(torch.abs(audio_chunk)).item()
+        client['energy_window'].append(chunk_energy)
+        avg_energy = sum(client['energy_window']) / len(client['energy_window'])
+        
+        # Check if audio is silent
+        current_silence = avg_energy < SILENCE_THRESHOLD
+        
+        # Track silence transition
+        if not client['is_silence'] and current_silence:
+            # Transition to silence
+            client['is_silence'] = True
+            client['last_active_time'] = time.time()
+        elif client['is_silence'] and not current_silence:
+            # User started talking again
+            client['is_silence'] = False
+        
+        # Add chunk to buffer regardless of silence state
+        client['streaming_buffer'].append(audio_chunk)
             
-            action = request.get("action")
+        # Check if silence has persisted long enough to consider "stopped talking"
+        silence_elapsed = time.time() - client['last_active_time']
+        
+        if client['is_silence'] and silence_elapsed >= SILENCE_DURATION_SEC and len(client['streaming_buffer']) > 0:
+            # User has stopped talking - process the collected audio
+            print(f"[{client_id}] Processing audio after {silence_elapsed:.2f}s of silence")
             
-            if action == "generate":
+            full_audio = torch.cat(client['streaming_buffer'], dim=0)
+            
+            # Process with WhisperX speech-to-text
+            print(f"[{client_id}] Starting transcription with WhisperX...")
+            transcribed_text = transcribe_audio(full_audio)
+            
+            # Log the transcription
+            print(f"[{client_id}] Transcribed text: '{transcribed_text}'")
+            
+            # Handle the transcription result
+            if transcribed_text:
+                # Add user message to context
+                user_segment = Segment(text=transcribed_text, speaker=speaker_id, audio=full_audio)
+                client['context_segments'].append(user_segment)
+                
+                # Send the transcribed text to client
+                emit('transcription', {
+                    'type': 'transcription',
+                    'text': transcribed_text
+                })
+                
+                # Generate a contextual response
+                response_text = generate_response(transcribed_text, client['context_segments'])
+                print(f"[{client_id}] Generating audio response: '{response_text}'")
+                
+                # Let the client know we're processing
+                emit('processing_status', {
+                    'type': 'processing_status',
+                    'status': 'generating_audio',
+                    'message': 'Generating audio response...'
+                })
+                
+                # Generate audio for the response
                 try:
-                    text = request.get("text", "")
-                    speaker_id = request.get("speaker", 0)
+                    # Use a different speaker than the user
+                    ai_speaker_id = 1 if speaker_id == 0 else 0
                     
-                    # Generate audio response
-                    print(f"Generating audio for: '{text}' with speaker {speaker_id}")
+                    # Start audio generation with streaming (chunk by chunk)
+                    audio_chunks = []
+                    
+                    # This version tries to stream the audio generation in smaller chunks
+                    # Note: CSM model doesn't natively support incremental generation,
+                    # so we're simulating it here for a more responsive UI experience
+                    
+                    # Generate the full response
                     audio_tensor = generator.generate(
-                        text=text,
-                        speaker=speaker_id,
-                        context=context_segments,
+                        text=response_text,
+                        speaker=ai_speaker_id,
+                        context=client['context_segments'],
                         max_audio_length_ms=10_000,
                     )
                     
-                    # Add to conversation context
-                    context_segments.append(Segment(text=text, speaker=speaker_id, audio=audio_tensor))
+                    # Add response to context
+                    ai_segment = Segment(
+                        text=response_text, 
+                        speaker=ai_speaker_id, 
+                        audio=audio_tensor
+                    )
+                    client['context_segments'].append(ai_segment)
                     
                     # Convert audio to base64 and send back to client
-                    audio_base64 = await encode_audio_data(audio_tensor)
-                    await websocket.send_json({
-                        "type": "audio_response",
-                        "audio": audio_base64
+                    audio_base64 = encode_audio_data(audio_tensor)
+                    emit('audio_response', {
+                        'type': 'audio_response',
+                        'text': response_text,
+                        'audio': audio_base64
                     })
-                except Exception as e:
-                    print(f"Error generating audio: {str(e)}")
-                    await websocket.send_json({
-                        "type": "error",
-                        "message": f"Error generating audio: {str(e)}"
-                    })
-                
-            elif action == "add_to_context":
-                try:
-                    text = request.get("text", "")
-                    speaker_id = request.get("speaker", 0)
-                    audio_data = request.get("audio", "")
                     
-                    # Convert received audio to tensor
-                    audio_tensor = await decode_audio_data(audio_data)
+                    print(f"[{client_id}] Audio response sent: {len(audio_base64)} bytes")
                     
-                    # Add to conversation context
-                    context_segments.append(Segment(text=text, speaker=speaker_id, audio=audio_tensor))
-                    
-                    await websocket.send_json({
-                        "type": "context_updated",
-                        "message": "Audio added to context"
+                except Exception as gen_error:
+                    print(f"Error generating audio response: {str(gen_error)}")
+                    emit('error', {
+                        'type': 'error',
+                        'message': "Sorry, there was an error generating the audio response."
                     })
-                except Exception as e:
-                    print(f"Error adding to context: {str(e)}")
-                    await websocket.send_json({
-                        "type": "error",
-                        "message": f"Error processing audio: {str(e)}"
-                    })
-                
-            elif action == "clear_context":
-                context_segments = []
-                await websocket.send_json({
-                    "type": "context_updated",
-                    "message": "Context cleared"
+            else:
+                # If transcription failed, send a generic response
+                emit('error', {
+                    'type': 'error',
+                    'message': "Sorry, I couldn't understand what you said. Could you try again?"
                 })
             
-            elif action == "stream_audio":
-                try:
-                    speaker_id = request.get("speaker", 0)
-                    audio_data = request.get("audio", "")
-                    
-                    # Convert received audio to tensor
-                    audio_chunk = await decode_audio_data(audio_data)
-                    
-                    # Start streaming mode if not already started
-                    if not is_streaming:
-                        is_streaming = True
-                        streaming_buffer = []
-                        energy_window.clear()
-                        is_silence = False
-                        last_active_time = time.time()
-                        await websocket.send_json({
-                            "type": "streaming_status",
-                            "status": "started"
-                        })
-                    
-                    # Calculate audio energy for silence detection
-                    chunk_energy = torch.mean(torch.abs(audio_chunk)).item()
-                    energy_window.append(chunk_energy)
-                    avg_energy = sum(energy_window) / len(energy_window)
-                    
-                    # Check if audio is silent
-                    current_silence = avg_energy < SILENCE_THRESHOLD
-                    
-                    # Track silence transition
-                    if not is_silence and current_silence:
-                        # Transition to silence
-                        is_silence = True
-                        last_active_time = time.time()
-                    elif is_silence and not current_silence:
-                        # User started talking again
-                        is_silence = False
-                    
-                    # Add chunk to buffer regardless of silence state
-                    streaming_buffer.append(audio_chunk)
-                    
-                    # Check if silence has persisted long enough to consider "stopped talking"
-                    silence_elapsed = time.time() - last_active_time
-                    
-                    if is_silence and silence_elapsed >= SILENCE_DURATION_SEC and len(streaming_buffer) > 0:
-                        # User has stopped talking - process the collected audio
-                        full_audio = torch.cat(streaming_buffer, dim=0)
-                        
-                        # Process with WhisperX speech-to-text
-                        transcribed_text = await transcribe_audio(full_audio)
-                        
-                        # Log the transcription
-                        print(f"Transcribed text: '{transcribed_text}'")
-                        
-                        # Add to conversation context
-                        if transcribed_text:
-                            user_segment = Segment(text=transcribed_text, speaker=speaker_id, audio=full_audio)
-                            context_segments.append(user_segment)
-                            
-                            # Generate a contextual response
-                            response_text = await generate_response(transcribed_text, context_segments)
-                            
-                            # Send the transcribed text to client
-                            await websocket.send_json({
-                                "type": "transcription",
-                                "text": transcribed_text
-                            })
-                            
-                            # Generate audio for the response
-                            audio_tensor = generator.generate(
-                                text=response_text,
-                                speaker=1 if speaker_id == 0 else 0,  # Use opposite speaker
-                                context=context_segments,
-                                max_audio_length_ms=10_000,
-                            )
-                            
-                            # Add response to context
-                            ai_segment = Segment(
-                                text=response_text, 
-                                speaker=1 if speaker_id == 0 else 0, 
-                                audio=audio_tensor
-                            )
-                            context_segments.append(ai_segment)
-                            
-                            # Convert audio to base64 and send back to client
-                            audio_base64 = await encode_audio_data(audio_tensor)
-                            await websocket.send_json({
-                                "type": "audio_response",
-                                "text": response_text,
-                                "audio": audio_base64
-                            })
-                        else:
-                            # If transcription failed, send a generic response
-                            await websocket.send_json({
-                                "type": "error",
-                                "message": "Sorry, I couldn't understand what you said. Could you try again?"
-                            })
-                        
-                        # Clear buffer and reset silence detection
-                        streaming_buffer = []
-                        energy_window.clear()
-                        is_silence = False
-                        last_active_time = time.time()
-                    
-                    # If buffer gets too large without silence, process it anyway
-                    # This prevents memory issues with very long streams
-                    elif len(streaming_buffer) >= 30:  # ~6 seconds of audio at 5 chunks/sec
-                        print("Buffer limit reached, processing audio")
-                        full_audio = torch.cat(streaming_buffer, dim=0)
-                        
-                        # Process with WhisperX speech-to-text
-                        transcribed_text = await transcribe_audio(full_audio)
-                        
-                        if transcribed_text:
-                            context_segments.append(Segment(text=transcribed_text, speaker=speaker_id, audio=full_audio))
-                            
-                            # Send the transcribed text to client
-                            await websocket.send_json({
-                                "type": "transcription",
-                                "text": transcribed_text + " (processing continued speech...)"
-                            })
-                        
-                        streaming_buffer = []
-                        
-                except Exception as e:
-                    print(f"Error processing streaming audio: {str(e)}")
-                    await websocket.send_json({
-                        "type": "error",
-                        "message": f"Error processing streaming audio: {str(e)}"
-                    })
+            # Clear buffer and reset silence detection
+            client['streaming_buffer'] = []
+            client['energy_window'].clear()
+            client['is_silence'] = False
+            client['last_active_time'] = time.time()
+        
+        # If buffer gets too large without silence, process it anyway
+        elif len(client['streaming_buffer']) >= 30:  # ~6 seconds of audio at 5 chunks/sec
+            print(f"[{client_id}] Processing long audio segment without silence")
+            full_audio = torch.cat(client['streaming_buffer'], dim=0)
             
-            elif action == "stop_streaming":
-                is_streaming = False
-                if streaming_buffer and len(streaming_buffer) > 5:  # Only process if there's meaningful audio
-                    # Process any remaining audio in the buffer
-                    full_audio = torch.cat(streaming_buffer, dim=0)
-                    
-                    # Process with WhisperX speech-to-text
-                    transcribed_text = await transcribe_audio(full_audio)
-                    
-                    if transcribed_text:
-                        context_segments.append(Segment(text=transcribed_text, speaker=request.get("speaker", 0), audio=full_audio))
-                        
-                        # Send the transcribed text to client
-                        await websocket.send_json({
-                            "type": "transcription",
-                            "text": transcribed_text
-                        })
+            # Process with WhisperX speech-to-text
+            transcribed_text = transcribe_audio(full_audio)
+            
+            if transcribed_text:
+                client['context_segments'].append(
+                    Segment(text=transcribed_text, speaker=speaker_id, audio=full_audio)
+                )
                 
-                streaming_buffer = []
-                await websocket.send_json({
-                    "type": "streaming_status",
-                    "status": "stopped"
+                # Send the transcribed text to client
+                emit('transcription', {
+                    'type': 'transcription',
+                    'text': transcribed_text + " (processing continued speech...)"
                 })
-    
-    except WebSocketDisconnect:
-        manager.disconnect(websocket)
-        print("Client disconnected")
+            
+            # Keep half of the buffer for context (sliding window approach)
+            half_point = len(client['streaming_buffer']) // 2
+            client['streaming_buffer'] = client['streaming_buffer'][half_point:]
+            
     except Exception as e:
-        print(f"Error: {str(e)}")
-        try:
-            await websocket.send_json({
-                "type": "error",
-                "message": str(e)
-            })
-        except:
-            pass
-        manager.disconnect(websocket)
+        import traceback
+        traceback.print_exc()
+        print(f"Error processing streaming audio: {str(e)}")
+        emit('error', {
+            'type': 'error',
+            'message': f"Error processing streaming audio: {str(e)}"
+        })
 
+@socketio.on('stop_streaming')
+def handle_stop_streaming(data):
+    client_id = request.sid
+    if client_id not in active_clients:
+        return
+    
+    client = active_clients[client_id]
+    client['is_streaming'] = False
+    
+    if client['streaming_buffer'] and len(client['streaming_buffer']) > 5:
+        # Process any remaining audio in the buffer
+        full_audio = torch.cat(client['streaming_buffer'], dim=0)
+        
+        # Process with WhisperX speech-to-text
+        transcribed_text = transcribe_audio(full_audio)
+        
+        if transcribed_text:
+            client['context_segments'].append(
+                Segment(text=transcribed_text, speaker=data.get("speaker", 0), audio=full_audio)
+            )
+            
+            # Send the transcribed text to client
+            emit('transcription', {
+                'type': 'transcription',
+                'text': transcribed_text
+            })
+    
+    client['streaming_buffer'] = []
+    emit('streaming_status', {
+        'type': 'streaming_status',
+        'status': 'stopped'
+    })
+
+def stream_audio_to_client(client_id, audio_tensor, text, speaker_id, chunk_size_ms=500):
+    """Stream audio to client in chunks to simulate real-time generation"""
+    try:
+        if client_id not in active_clients:
+            print(f"Client {client_id} not found for streaming")
+            return
+            
+        # Calculate chunk size in samples
+        chunk_size = int(generator.sample_rate * chunk_size_ms / 1000)
+        total_chunks = math.ceil(audio_tensor.size(0) / chunk_size)
+        
+        print(f"Streaming audio in {total_chunks} chunks of {chunk_size_ms}ms each")
+        
+        # Send initial response with text but no audio yet
+        socketio.emit('audio_response_start', {
+            'type': 'audio_response_start',
+            'text': text,
+            'total_chunks': total_chunks
+        }, room=client_id)
+        
+        # Stream each chunk
+        for i in range(total_chunks):
+            start_idx = i * chunk_size
+            end_idx = min(start_idx + chunk_size, audio_tensor.size(0))
+            
+            # Extract chunk
+            chunk = audio_tensor[start_idx:end_idx]
+            
+            # Encode chunk
+            chunk_base64 = encode_audio_data(chunk)
+            
+            # Send chunk
+            socketio.emit('audio_response_chunk', {
+                'type': 'audio_response_chunk',
+                'chunk_index': i,
+                'total_chunks': total_chunks,
+                'audio': chunk_base64,
+                'is_last': i == total_chunks - 1
+            }, room=client_id)
+            
+            # Brief pause between chunks to simulate streaming
+            time.sleep(0.1)
+            
+        # Send completion message
+        socketio.emit('audio_response_complete', {
+            'type': 'audio_response_complete',
+            'text': text
+        }, room=client_id)
+        
+        print(f"Audio streaming complete: {total_chunks} chunks sent")
+        
+    except Exception as e:
+        print(f"Error streaming audio to client: {str(e)}")
+        import traceback
+        traceback.print_exc()
 
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file
+    print(f"\n{'='*60}")
+    print(f"🔊 Sesame AI Voice Chat Server (Flask Implementation)")
+    print(f"{'='*60}")
+    print(f"📡 Server Information:")
+    print(f"   - Local URL: http://localhost:5000")
+    print(f"   - Network URL: http://<your-ip-address>:5000")
+    print(f"   - WebSocket: ws://<your-ip-address>:5000/socket.io")
+    print(f"{'='*60}")
+    print(f"💡 To make this server public:")
+    print(f"   1. Ensure port 5000 is open in your firewall")
+    print(f"   2. Set up port forwarding on your router to port 5000")
+    print(f"   3. Or use a service like ngrok with: ngrok http 5000")
+    print(f"{'='*60}")
+    print(f"🌐 Device: {device.upper()}")
+    print(f"🧠 Models loaded: Sesame CSM + WhisperX ({asr_model.device})")
+    print(f"🔧 Serving from: {os.path.join(base_dir, 'index.html')}")
+    print(f"{'='*60}")
+    print(f"Ready to receive connections! Press Ctrl+C to stop the server.\n")
+    
+    socketio.run(app, host="0.0.0.0", port=5000, debug=False)
\ No newline at end of file
diff --git a/Backend/voice-chat.js b/Backend/voice-chat.js
new file mode 100644
index 0000000..b224b27
--- /dev/null
+++ b/Backend/voice-chat.js
@@ -0,0 +1,852 @@
+/**
+ * Sesame AI Voice Chat Client
+ * 
+ * A web client that connects to a Sesame AI voice chat server and enables 
+ * real-time voice conversation with an AI assistant.
+ */
+
+// Configuration constants
+const SERVER_URL = window.location.hostname === 'localhost' ? 
+    'http://localhost:5000' : window.location.origin;
+const ENERGY_WINDOW_SIZE = 15;
+const CLIENT_SILENCE_DURATION_MS = 750;
+
+// DOM elements
+const elements = {
+    conversation: null,
+    streamButton: null,
+    clearButton: null,
+    thresholdSlider: null,
+    thresholdValue: null,
+    visualizerCanvas: null,
+    visualizerLabel: null,
+    volumeLevel: null,
+    statusDot: null,
+    statusText: null,
+    speakerSelection: null,
+    autoPlayResponses: null,
+    showVisualizer: null
+};
+
+// Application state
+const state = {
+    socket: null,
+    audioContext: null,
+    analyser: null,
+    microphone: null,
+    streamProcessor: null,
+    isStreaming: false,
+    isSpeaking: false,
+    silenceThreshold: 0.01,
+    energyWindow: [],
+    silenceTimer: null,
+    volumeUpdateInterval: null,
+    visualizerAnimationFrame: null,
+    currentSpeaker: 0
+};
+
+// Visualizer variables
+let canvasContext = null;
+let visualizerBufferLength = 0;
+let visualizerDataArray = null;
+
+// Initialize the application
+function initializeApp() {
+    // Initialize the UI elements
+    initializeUIElements();
+    
+    // Initialize socket.io connection
+    setupSocketConnection();
+    
+    // Setup event listeners
+    setupEventListeners();
+    
+    // Initialize visualizer
+    setupVisualizer();
+    
+    // Show welcome message
+    addSystemMessage('Welcome to Sesame AI Voice Chat! Click "Start Conversation" to begin.');
+}
+
+// Initialize UI elements
+function initializeUIElements() {
+    // Store references to UI elements
+    elements.conversation = document.getElementById('conversation');
+    elements.streamButton = document.getElementById('streamButton');
+    elements.clearButton = document.getElementById('clearButton');
+    elements.thresholdSlider = document.getElementById('thresholdSlider');
+    elements.thresholdValue = document.getElementById('thresholdValue');
+    elements.visualizerCanvas = document.getElementById('audioVisualizer');
+    elements.visualizerLabel = document.getElementById('visualizerLabel');
+    elements.volumeLevel = document.getElementById('volumeLevel');
+    elements.statusDot = document.getElementById('statusDot');
+    elements.statusText = document.getElementById('statusText');
+    elements.speakerSelection = document.getElementById('speakerSelect'); // Changed to match HTML
+    elements.autoPlayResponses = document.getElementById('autoPlayResponses');
+    elements.showVisualizer = document.getElementById('showVisualizer');
+}
+
+// Setup Socket.IO connection
+function setupSocketConnection() {
+    state.socket = io(SERVER_URL);
+    
+    // Connection events
+    state.socket.on('connect', () => {
+        console.log('Connected to server');
+        updateConnectionStatus(true);
+    });
+    
+    state.socket.on('disconnect', () => {
+        console.log('Disconnected from server');
+        updateConnectionStatus(false);
+        
+        // Stop streaming if active
+        if (state.isStreaming) {
+            stopStreaming(false);
+        }
+    });
+    
+    state.socket.on('error', (data) => {
+        console.error('Socket error:', data.message);
+        addSystemMessage(`Error: ${data.message}`);
+    });
+    
+    // Register message handlers
+    state.socket.on('audio_response', handleAudioResponse);
+    state.socket.on('transcription', handleTranscription);
+    state.socket.on('context_updated', handleContextUpdate);
+    state.socket.on('streaming_status', handleStreamingStatus);
+}
+
+// Setup event listeners
+function setupEventListeners() {
+    // Stream button
+    elements.streamButton.addEventListener('click', toggleStreaming);
+    
+    // Clear button
+    elements.clearButton.addEventListener('click', clearConversation);
+    
+    // Threshold slider
+    elements.thresholdSlider.addEventListener('input', updateThreshold);
+    
+    // Speaker selection
+    elements.speakerSelection.addEventListener('change', () => {
+        state.currentSpeaker = parseInt(elements.speakerSelection.value, 10);
+    });
+    
+    // Visualizer toggle
+    elements.showVisualizer.addEventListener('change', toggleVisualizerVisibility);
+}
+
+// Setup audio visualizer
+function setupVisualizer() {
+    if (!elements.visualizerCanvas) return;
+    
+    canvasContext = elements.visualizerCanvas.getContext('2d');
+    
+    // Set canvas dimensions
+    elements.visualizerCanvas.width = elements.visualizerCanvas.offsetWidth;
+    elements.visualizerCanvas.height = elements.visualizerCanvas.offsetHeight;
+    
+    // Initialize the visualizer
+    drawVisualizer();
+}
+
+// Update connection status UI
+function updateConnectionStatus(isConnected) {
+    elements.statusDot.classList.toggle('active', isConnected);
+    elements.statusText.textContent = isConnected ? 'Connected' : 'Disconnected';
+}
+
+// Toggle streaming state
+function toggleStreaming() {
+    if (state.isStreaming) {
+        stopStreaming(true);
+    } else {
+        startStreaming();
+    }
+}
+
+// Start streaming audio to the server
+function startStreaming() {
+    if (state.isStreaming) return;
+    
+    // Request microphone access
+    navigator.mediaDevices.getUserMedia({ audio: true, video: false })
+        .then(stream => {
+            // Show processing state while setting up
+            elements.streamButton.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Initializing...';
+            
+            // Create audio context
+            state.audioContext = new (window.AudioContext || window.webkitAudioContext)();
+            
+            // Create microphone source
+            state.microphone = state.audioContext.createMediaStreamSource(stream);
+            
+            // Create analyser for visualizer
+            state.analyser = state.audioContext.createAnalyser();
+            state.analyser.fftSize = 256;
+            visualizerBufferLength = state.analyser.frequencyBinCount;
+            visualizerDataArray = new Uint8Array(visualizerBufferLength);
+            
+            // Connect microphone to analyser
+            state.microphone.connect(state.analyser);
+            
+            // Create script processor for audio processing
+            const bufferSize = 4096;
+            state.streamProcessor = state.audioContext.createScriptProcessor(bufferSize, 1, 1);
+            
+            // Set up audio processing callback
+            state.streamProcessor.onaudioprocess = handleAudioProcess;
+            
+            // Connect the processors
+            state.analyser.connect(state.streamProcessor);
+            state.streamProcessor.connect(state.audioContext.destination);
+            
+            // Update UI
+            state.isStreaming = true;
+            elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Listening...';
+            elements.streamButton.classList.add('recording');
+            
+            // Initialize energy window
+            state.energyWindow = [];
+            
+            // Start volume meter updates
+            state.volumeUpdateInterval = setInterval(updateVolumeMeter, 100);
+            
+            // Start visualizer if enabled
+            if (elements.showVisualizer.checked && !state.visualizerAnimationFrame) {
+                drawVisualizer();
+            }
+            
+            // Show starting message
+            addSystemMessage('Listening... Speak clearly into your microphone.');
+            
+            // Notify the server that we're starting
+            state.socket.emit('stream_audio', {
+                audio: '',
+                speaker: state.currentSpeaker
+            });
+        })
+        .catch(err => {
+            console.error('Error accessing microphone:', err);
+            addSystemMessage(`Error: ${err.message}. Please make sure your microphone is connected and you've granted permission.`);
+            elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Start Conversation';
+        });
+}
+
+// Stop streaming audio
+function stopStreaming(notifyServer = true) {
+    if (!state.isStreaming) return;
+    
+    // Update UI first
+    elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Start Conversation';
+    elements.streamButton.classList.remove('recording');
+    elements.streamButton.classList.remove('processing');
+    
+    // Stop volume meter updates
+    if (state.volumeUpdateInterval) {
+        clearInterval(state.volumeUpdateInterval);
+        state.volumeUpdateInterval = null;
+    }
+    
+    // Stop all audio processing
+    if (state.streamProcessor) {
+        state.streamProcessor.disconnect();
+        state.streamProcessor = null;
+    }
+    
+    if (state.analyser) {
+        state.analyser.disconnect();
+    }
+    
+    if (state.microphone) {
+        state.microphone.disconnect();
+    }
+    
+    // Close audio context
+    if (state.audioContext && state.audioContext.state !== 'closed') {
+        state.audioContext.close().catch(err => console.warn('Error closing audio context:', err));
+    }
+    
+    // Cleanup animation frames
+    if (state.visualizerAnimationFrame) {
+        cancelAnimationFrame(state.visualizerAnimationFrame);
+        state.visualizerAnimationFrame = null;
+    }
+    
+    // Reset state
+    state.isStreaming = false;
+    state.isSpeaking = false;
+    
+    // Notify the server
+    if (notifyServer && state.socket && state.socket.connected) {
+        state.socket.emit('stop_streaming', {
+            speaker: state.currentSpeaker
+        });
+    }
+    
+    // Show message
+    addSystemMessage('Conversation paused. Click "Start Conversation" to resume.');
+}
+
+// Handle audio processing
+function handleAudioProcess(event) {
+    const inputData = event.inputBuffer.getChannelData(0);
+    
+    // Log audio buffer statistics
+    console.log(`Audio buffer: length=${inputData.length}, sample rate=${state.audioContext.sampleRate}Hz`);
+    
+    // Calculate audio energy (volume level)
+    const energy = calculateAudioEnergy(inputData);
+    console.log(`Energy: ${energy.toFixed(6)}, threshold: ${state.silenceThreshold}`);
+    
+    // Update energy window for averaging
+    updateEnergyWindow(energy);
+    
+    // Calculate average energy
+    const avgEnergy = calculateAverageEnergy();
+    
+    // Determine if audio is silent
+    const isSilent = avgEnergy < state.silenceThreshold;
+    console.log(`Silent: ${isSilent ? 'Yes' : 'No'}, avg energy: ${avgEnergy.toFixed(6)}`);
+    
+    // Handle speech state based on silence
+    handleSpeechState(isSilent);
+    
+    // Only send audio chunk if we detect speech
+    if (!isSilent) {
+        // Create a resampled version at 24kHz for the server
+        // Most WebRTC audio is 48kHz, but we want 24kHz for the model
+        const resampledData = downsampleBuffer(inputData, state.audioContext.sampleRate, 24000);
+        console.log(`Resampled audio: ${state.audioContext.sampleRate}Hz → 24000Hz, new length: ${resampledData.length}`);
+        
+        // Send the audio chunk to the server
+        sendAudioChunk(resampledData, state.currentSpeaker);
+    }
+}
+
+// Cleanup audio resources when done
+function cleanupAudioResources() {
+    // Stop all audio processing
+    if (state.streamProcessor) {
+        state.streamProcessor.disconnect();
+        state.streamProcessor = null;
+    }
+    
+    if (state.analyser) {
+        state.analyser.disconnect();
+        state.analyser = null;
+    }
+    
+    if (state.microphone) {
+        state.microphone.disconnect();
+        state.microphone = null;
+    }
+    
+    // Close audio context
+    if (state.audioContext && state.audioContext.state !== 'closed') {
+        state.audioContext.close().catch(err => console.warn('Error closing audio context:', err));
+    }
+    
+    // Cancel all timers and animation frames
+    if (state.volumeUpdateInterval) {
+        clearInterval(state.volumeUpdateInterval);
+        state.volumeUpdateInterval = null;
+    }
+    
+    if (state.visualizerAnimationFrame) {
+        cancelAnimationFrame(state.visualizerAnimationFrame);
+        state.visualizerAnimationFrame = null;
+    }
+    
+    if (state.silenceTimer) {
+        clearTimeout(state.silenceTimer);
+        state.silenceTimer = null;
+    }
+}
+
+// Clear conversation history
+function clearConversation() {
+    if (elements.conversation) {
+        elements.conversation.innerHTML = '';
+        addSystemMessage('Conversation cleared.');
+        
+        // Notify server to clear context
+        if (state.socket && state.socket.connected) {
+            state.socket.emit('clear_context');
+        }
+    }
+}
+
+// Calculate audio energy (volume)
+function calculateAudioEnergy(buffer) {
+    let sum = 0;
+    for (let i = 0; i < buffer.length; i++) {
+        sum += buffer[i] * buffer[i];
+    }
+    return Math.sqrt(sum / buffer.length);
+}
+
+// Update energy window for averaging
+function updateEnergyWindow(energy) {
+    state.energyWindow.push(energy);
+    if (state.energyWindow.length > ENERGY_WINDOW_SIZE) {
+        state.energyWindow.shift();
+    }
+}
+
+// Calculate average energy from window
+function calculateAverageEnergy() {
+    if (state.energyWindow.length === 0) return 0;
+    
+    const sum = state.energyWindow.reduce((a, b) => a + b, 0);
+    return sum / state.energyWindow.length;
+}
+
+// Update the threshold from the slider
+function updateThreshold() {
+    state.silenceThreshold = parseFloat(elements.thresholdSlider.value);
+    elements.thresholdValue.textContent = state.silenceThreshold.toFixed(3);
+}
+
+// Update the volume meter display
+function updateVolumeMeter() {
+    if (!state.isStreaming || !state.energyWindow.length) return;
+    
+    const avgEnergy = calculateAverageEnergy();
+    
+    // Scale energy to percentage (0-100)
+    // Typically, energy values will be very small (e.g., 0.001 to 0.1)
+    // So we multiply by a factor to make it more visible
+    const scaleFactor = 1000;
+    const percentage = Math.min(100, Math.max(0, avgEnergy * scaleFactor));
+    
+    // Update volume meter width
+    elements.volumeLevel.style.width = `${percentage}%`;
+    
+    // Change color based on level
+    if (percentage > 70) {
+        elements.volumeLevel.style.backgroundColor = '#ff5252';
+    } else if (percentage > 30) {
+        elements.volumeLevel.style.backgroundColor = '#4CAF50';
+    } else {
+        elements.volumeLevel.style.backgroundColor = '#4c84ff';
+    }
+}
+
+// Handle speech/silence state transitions
+function handleSpeechState(isSilent) {
+    if (state.isSpeaking && isSilent) {
+        // Transition from speaking to silence
+        if (!state.silenceTimer) {
+            state.silenceTimer = setTimeout(() => {
+                // Only consider it a real silence after a certain duration
+                // This prevents detecting brief pauses as the end of speech
+                state.isSpeaking = false;
+                state.silenceTimer = null;
+            }, CLIENT_SILENCE_DURATION_MS);
+        }
+    } else if (state.silenceTimer && !isSilent) {
+        // User started speaking again, cancel the silence timer
+        clearTimeout(state.silenceTimer);
+        state.silenceTimer = null;
+    }
+    
+    // Update speaking state for non-silent audio
+    if (!isSilent) {
+        state.isSpeaking = true;
+    }
+}
+
+// Send audio chunk to server
+function sendAudioChunk(audioData, speaker) {
+    if (!state.socket || !state.socket.connected) {
+        console.warn('Socket not connected');
+        return;
+    }
+    
+    console.log(`Preparing audio chunk: length=${audioData.length}, speaker=${speaker}`);
+    
+    // Check for NaN or invalid values
+    let hasInvalidValues = false;
+    for (let i = 0; i < audioData.length; i++) {
+        if (isNaN(audioData[i]) || !isFinite(audioData[i])) {
+            hasInvalidValues = true;
+            console.warn(`Invalid audio value at index ${i}: ${audioData[i]}`);
+            break;
+        }
+    }
+    
+    if (hasInvalidValues) {
+        console.warn('Audio data contains invalid values. Creating silent audio.');
+        audioData = new Float32Array(audioData.length).fill(0);
+    }
+    
+    try {
+        // Create WAV blob
+        const wavData = createWavBlob(audioData, 24000);
+        console.log(`WAV blob created: ${wavData.size} bytes`);
+        
+        const reader = new FileReader();
+        
+        reader.onloadend = function() {
+            try {
+                // Get base64 data
+                const base64data = reader.result;
+                console.log(`Base64 data created: ${base64data.length} bytes`);
+                
+                // Send to server
+                state.socket.emit('stream_audio', {
+                    audio: base64data,
+                    speaker: speaker
+                });
+                console.log('Audio chunk sent to server');
+            } catch (err) {
+                console.error('Error preparing audio data:', err);
+            }
+        };
+        
+        reader.onerror = function() {
+            console.error('Error reading audio data as base64');
+        };
+        
+        reader.readAsDataURL(wavData);
+    } catch (err) {
+        console.error('Error creating WAV data:', err);
+    }
+}
+
+// Create WAV blob from audio data with improved error handling
+function createWavBlob(audioData, sampleRate) {
+    // Validate input
+    if (!audioData || audioData.length === 0) {
+        console.warn('Empty audio data provided to createWavBlob');
+        audioData = new Float32Array(1024).fill(0); // Create 1024 samples of silence
+    }
+    
+    // Function to convert Float32Array to Int16Array for WAV format
+    function floatTo16BitPCM(output, offset, input) {
+        for (let i = 0; i < input.length; i++, offset += 2) {
+            // Ensure values are in -1 to 1 range
+            const s = Math.max(-1, Math.min(1, input[i]));
+            // Convert to 16-bit PCM
+            output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
+        }
+    }
+    
+    // Create WAV header
+    function writeString(view, offset, string) {
+        for (let i = 0; i < string.length; i++) {
+            view.setUint8(offset + i, string.charCodeAt(i));
+        }
+    }
+    
+    try {
+        // Create WAV file with header - careful with buffer sizes
+        const buffer = new ArrayBuffer(44 + audioData.length * 2);
+        const view = new DataView(buffer);
+        
+        // RIFF identifier
+        writeString(view, 0, 'RIFF');
+        
+        // File length (will be filled later)
+        view.setUint32(4, 36 + audioData.length * 2, true);
+        
+        // WAVE identifier
+        writeString(view, 8, 'WAVE');
+        
+        // fmt chunk identifier
+        writeString(view, 12, 'fmt ');
+        
+        // fmt chunk length
+        view.setUint32(16, 16, true);
+        
+        // Sample format (1 is PCM)
+        view.setUint16(20, 1, true);
+        
+        // Mono channel
+        view.setUint16(22, 1, true);
+        
+        // Sample rate
+        view.setUint32(24, sampleRate, true);
+        
+        // Byte rate (sample rate * block align)
+        view.setUint32(28, sampleRate * 2, true);
+        
+        // Block align (channels * bytes per sample)
+        view.setUint16(32, 2, true);
+        
+        // Bits per sample
+        view.setUint16(34, 16, true);
+        
+        // data chunk identifier
+        writeString(view, 36, 'data');
+        
+        // data chunk length
+        view.setUint32(40, audioData.length * 2, true);
+        
+        // Write the PCM samples
+        floatTo16BitPCM(view, 44, audioData);
+        
+        // Create and return blob
+        return new Blob([view], { type: 'audio/wav' });
+    } catch (err) {
+        console.error('Error in createWavBlob:', err);
+        
+        // Create a minimal valid WAV file with silence as fallback
+        const fallbackSamples = new Float32Array(1024).fill(0);
+        const fallbackBuffer = new ArrayBuffer(44 + fallbackSamples.length * 2);
+        const fallbackView = new DataView(fallbackBuffer);
+        
+        writeString(fallbackView, 0, 'RIFF');
+        fallbackView.setUint32(4, 36 + fallbackSamples.length * 2, true);
+        writeString(fallbackView, 8, 'WAVE');
+        writeString(fallbackView, 12, 'fmt ');
+        fallbackView.setUint32(16, 16, true);
+        fallbackView.setUint16(20, 1, true);
+        fallbackView.setUint16(22, 1, true);
+        fallbackView.setUint32(24, sampleRate, true);
+        fallbackView.setUint32(28, sampleRate * 2, true);
+        fallbackView.setUint16(32, 2, true);
+        fallbackView.setUint16(34, 16, true);
+        writeString(fallbackView, 36, 'data');
+        fallbackView.setUint32(40, fallbackSamples.length * 2, true);
+        floatTo16BitPCM(fallbackView, 44, fallbackSamples);
+        
+        return new Blob([fallbackView], { type: 'audio/wav' });
+    }
+}
+
+// Draw audio visualizer
+function drawVisualizer() {
+    if (!canvasContext) {
+        return;
+    }
+    
+    state.visualizerAnimationFrame = requestAnimationFrame(drawVisualizer);
+    
+    // Skip drawing if visualizer is hidden
+    if (!elements.showVisualizer.checked) {
+        if (elements.visualizerCanvas.style.opacity !== '0') {
+            elements.visualizerCanvas.style.opacity = '0';
+        }
+        return;
+    } else if (elements.visualizerCanvas.style.opacity !== '1') {
+        elements.visualizerCanvas.style.opacity = '1';
+    }
+    
+    // Get frequency data if available
+    if (state.isStreaming && state.analyser) {
+        try {
+            state.analyser.getByteFrequencyData(visualizerDataArray);
+        } catch (e) {
+            console.warn('Error getting frequency data:', e);
+        }
+    } else {
+        // Fade out when not streaming
+        for (let i = 0; i < visualizerDataArray.length; i++) {
+            visualizerDataArray[i] = Math.max(0, visualizerDataArray[i] - 5);
+        }
+    }
+    
+    // Clear canvas
+    canvasContext.fillStyle = 'rgb(0, 0, 0)';
+    canvasContext.fillRect(0, 0, elements.visualizerCanvas.width, elements.visualizerCanvas.height);
+    
+    // Draw gradient bars
+    const width = elements.visualizerCanvas.width;
+    const height = elements.visualizerCanvas.height;
+    const barCount = Math.min(visualizerBufferLength, 64);
+    const barWidth = width / barCount - 1;
+    
+    for (let i = 0; i < barCount; i++) {
+        const index = Math.floor(i * visualizerBufferLength / barCount);
+        const value = visualizerDataArray[index];
+        
+        // Use logarithmic scale for better audio visualization
+        // This makes low values more visible while still maintaining full range
+        const logFactor = 20;
+        const scaledValue = Math.log(1 + (value / 255) * logFactor) / Math.log(1 + logFactor);
+        const barHeight = scaledValue * height;
+        
+        // Position bars
+        const x = i * (barWidth + 1);
+        const y = height - barHeight;
+        
+        // Create color gradient based on frequency and amplitude
+        const hue = i / barCount * 360; // Full color spectrum
+        const saturation = 80 + (value / 255 * 20); // Higher values more saturated
+        const lightness = 40 + (value / 255 * 20); // Dynamic brightness based on amplitude
+        
+        // Draw main bar
+        canvasContext.fillStyle = `hsl(${hue}, ${saturation}%, ${lightness}%)`;
+        canvasContext.fillRect(x, y, barWidth, barHeight);
+        
+        // Add reflection effect
+        if (barHeight > 5) {
+            const gradient = canvasContext.createLinearGradient(
+                x, y, 
+                x, y + barHeight * 0.5
+            );
+            gradient.addColorStop(0, `hsla(${hue}, ${saturation}%, ${lightness + 20}%, 0.4)`);
+            gradient.addColorStop(1, `hsla(${hue}, ${saturation}%, ${lightness}%, 0)`);
+            canvasContext.fillStyle = gradient;
+            canvasContext.fillRect(x, y, barWidth, barHeight * 0.5);
+            
+            // Add highlight on top of the bar for better 3D effect
+            canvasContext.fillStyle = `hsla(${hue}, ${saturation - 20}%, ${lightness + 30}%, 0.7)`;
+            canvasContext.fillRect(x, y, barWidth, 2);
+        }
+    }
+    
+    // Show/hide the label
+    elements.visualizerLabel.style.opacity = (state.isStreaming) ? '0' : '0.7';
+}
+
+// Toggle visualizer visibility
+function toggleVisualizerVisibility() {
+    const isVisible = elements.showVisualizer.checked;
+    elements.visualizerCanvas.style.opacity = isVisible ? '1' : '0';
+    
+    if (isVisible && state.isStreaming && !state.visualizerAnimationFrame) {
+        drawVisualizer();
+    }
+}
+
+// Handle audio response from server
+function handleAudioResponse(data) {
+    console.log('Received audio response');
+    
+    // Create message container
+    const messageElement = document.createElement('div');
+    messageElement.className = 'message ai';
+    
+    // Add text content if available
+    if (data.text) {
+        const textElement = document.createElement('p');
+        textElement.textContent = data.text;
+        messageElement.appendChild(textElement);
+    }
+    
+    // Create and configure audio element
+    const audioElement = document.createElement('audio');
+    audioElement.controls = true;
+    audioElement.className = 'audio-player';
+    
+    // Set audio source
+    const audioSource = document.createElement('source');
+    audioSource.src = data.audio;
+    audioSource.type = 'audio/wav';
+    
+    // Add fallback text
+    audioElement.textContent = 'Your browser does not support the audio element.';
+    
+    // Assemble audio element
+    audioElement.appendChild(audioSource);
+    messageElement.appendChild(audioElement);
+    
+    // Add timestamp
+    const timeElement = document.createElement('span');
+    timeElement.className = 'message-time';
+    timeElement.textContent = new Date().toLocaleTimeString();
+    messageElement.appendChild(timeElement);
+    
+    // Add to conversation
+    elements.conversation.appendChild(messageElement);
+    
+    // Auto-scroll to bottom
+    elements.conversation.scrollTop = elements.conversation.scrollHeight;
+    
+    // Auto-play if enabled
+    if (elements.autoPlayResponses.checked) {
+        audioElement.play()
+            .catch(err => {
+                console.warn('Auto-play failed:', err);
+                addSystemMessage('Auto-play failed. Please click play to hear the response.');
+            });
+    }
+    
+    // Re-enable stream button after processing is complete
+    if (state.isStreaming) {
+        elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Listening...';
+        elements.streamButton.classList.add('recording');
+        elements.streamButton.classList.remove('processing');
+    }
+}
+
+// Handle transcription response from server
+function handleTranscription(data) {
+    console.log('Received transcription:', data.text);
+    
+    // Create message element
+    const messageElement = document.createElement('div');
+    messageElement.className = 'message user';
+    
+    // Add text content
+    const textElement = document.createElement('p');
+    textElement.textContent = data.text;
+    messageElement.appendChild(textElement);
+    
+    // Add timestamp
+    const timeElement = document.createElement('span');
+    timeElement.className = 'message-time';
+    timeElement.textContent = new Date().toLocaleTimeString();
+    messageElement.appendChild(timeElement);
+    
+    // Add to conversation
+    elements.conversation.appendChild(messageElement);
+    
+    // Auto-scroll to bottom
+    elements.conversation.scrollTop = elements.conversation.scrollHeight;
+}
+
+// Handle context update from server
+function handleContextUpdate(data) {
+    console.log('Context updated:', data.message);
+}
+
+// Handle streaming status updates from server
+function handleStreamingStatus(data) {
+    console.log('Streaming status:', data.status);
+    
+    if (data.status === 'stopped') {
+        // Reset UI if needed
+        if (state.isStreaming) {
+            stopStreaming(false); // Don't send to server since this came from server
+        }
+    }
+}
+
+// Add a system message to the conversation
+function addSystemMessage(message) {
+    const messageElement = document.createElement('div');
+    messageElement.className = 'message system';
+    messageElement.textContent = message;
+    elements.conversation.appendChild(messageElement);
+    
+    // Auto-scroll to bottom
+    elements.conversation.scrollTop = elements.conversation.scrollHeight;
+}
+
+// Downsample audio buffer to target sample rate
+function downsampleBuffer(buffer, originalSampleRate, targetSampleRate) {
+    if (originalSampleRate === targetSampleRate) {
+        return buffer;
+    }
+    
+    const ratio = originalSampleRate / targetSampleRate;
+    const newLength = Math.round(buffer.length / ratio);
+    const result = new Float32Array(newLength);
+    
+    for (let i = 0; i < newLength; i++) {
+        const pos = Math.round(i * ratio);
+        result[i] = buffer[pos];
+    }
+    
+    return result;
+}
+
+// Initialize the application when DOM is fully loaded
+document.addEventListener('DOMContentLoaded', initializeApp);
+