From 8695dd0297431530eedab166c92585fd3a1791fa Mon Sep 17 00:00:00 2001
From: GamerBoss101 <adithcool.5@gmail.com>
Date: Sun, 30 Mar 2025 08:59:26 -0400
Subject: [PATCH] Demo Fixes 19

---
 Backend/index.html | 1070 ++++++++++++++++++++++----------------------
 Backend/server.py  |  304 +++++++------
 2 files changed, 694 insertions(+), 680 deletions(-)
diff --git a/Backend/index.html b/Backend/index.html
index 5113b93..59b4903 100644
--- a/Backend/index.html
+++ b/Backend/index.html
@@ -3,7 +3,7 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Voice Assistant - CSM & Whisper</title>
+    <title>Live Voice Assistant with CSM</title>
     <script src="https://cdn.socket.io/4.6.0/socket.io.min.js"></script>
     <style>
         body {
@@ -89,19 +89,19 @@
             transition: all 0.3s ease;
         }
         
-        #recordButton {
+        #talkButton {
             background-color: #4CAF50;
             color: white;
             width: 200px;
             box-shadow: 0 4px 8px rgba(76, 175, 80, 0.3);
         }
         
-        #recordButton:hover {
+        #talkButton:hover {
             background-color: #45a049;
             transform: translateY(-2px);
         }
         
-        #recordButton.recording {
+        #talkButton.recording {
             background-color: #f44336;
             animation: pulse 1.5s infinite;
             box-shadow: 0 4px 8px rgba(244, 67, 54, 0.3);
@@ -126,45 +126,6 @@
             color: #657786;
         }
         
-        .audio-wave {
-            display: flex;
-            justify-content: center;
-            align-items: center;
-            height: 40px;
-            gap: 3px;
-        }
-        
-        .audio-wave span {
-            display: block;
-            width: 3px;
-            height: 100%;
-            background-color: #4CAF50;
-            animation: wave 1.5s infinite ease-in-out;
-            border-radius: 6px;
-        }
-        
-        .audio-wave span:nth-child(2) {
-            animation-delay: 0.2s;
-        }
-        .audio-wave span:nth-child(3) {
-            animation-delay: 0.4s;
-        }
-        .audio-wave span:nth-child(4) {
-            animation-delay: 0.6s;
-        }
-        .audio-wave span:nth-child(5) {
-            animation-delay: 0.8s;
-        }
-        
-        @keyframes wave {
-            0%, 100% {
-                height: 8px;
-            }
-            50% {
-                height: 30px;
-            }
-        }
-        
         .hidden {
             display: none;
         }
@@ -221,6 +182,61 @@
             border-radius: 4px;
         }
         
+        /* Real-time transcription */
+        .live-transcription {
+            position: absolute;
+            bottom: 10px;
+            left: 10px;
+            right: 10px;
+            color: white;
+            font-size: 0.9em;
+            background-color: rgba(0, 0, 0, 0.5);
+            padding: 8px;
+            border-radius: 4px;
+            text-align: center;
+            max-height: 60px;
+            overflow-y: auto;
+            font-style: italic;
+        }
+        
+        /* Wave animation for active speaker */
+        .speaking-wave {
+            display: inline-block;
+            margin-left: 5px;
+            vertical-align: middle;
+        }
+        
+        .speaking-wave span {
+            display: inline-block;
+            width: 3px;
+            height: 12px;
+            margin: 0 1px;
+            background-color: currentColor;
+            border-radius: 1px;
+            animation: speakingWave 1s infinite ease-in-out;
+        }
+        
+        .speaking-wave span:nth-child(2) {
+            animation-delay: 0.1s;
+        }
+        
+        .speaking-wave span:nth-child(3) {
+            animation-delay: 0.2s;
+        }
+        
+        .speaking-wave span:nth-child(4) {
+            animation-delay: 0.3s;
+        }
+        
+        @keyframes speakingWave {
+            0%, 100% {
+                height: 4px;
+            }
+            50% {
+                height: 12px;
+            }
+        }
+        
         /* Modern switch for visualizer toggle */
         .switch-container {
             display: flex;
@@ -274,36 +290,45 @@
         input:checked + .slider:before {
             transform: translateX(26px);
         }
-
-        /* Add this style for streaming indicator */
-        .streaming-indicator {
-            display: inline-block;
-            width: 10px;
-            height: 10px;
-            background-color: #3498db;
-            border-radius: 50%;
-            margin-right: 5px;
-            animation: pulse-blue 1.5s infinite;
+        
+        /* Toast notification for feedback */
+        .toast {
+            position: fixed;
+            bottom: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            background-color: #333;
+            color: white;
+            padding: 12px 20px;
+            border-radius: 4px;
+            max-width: 80%;
+            z-index: 1000;
+            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
+            animation: fadeIn 0.3s, fadeOut 0.3s 2.7s forwards;
+            text-align: center;
         }
         
-        @keyframes pulse-blue {
-            0% {
-                transform: scale(0.8);
-                opacity: 0.8;
-            }
-            50% {
-                transform: scale(1.2);
-                opacity: 1;
-            }
-            100% {
-                transform: scale(0.8);
-                opacity: 0.8;
-            }
+        .toast.error {
+            background-color: #e74c3c;
+        }
+        
+        .toast.info {
+            background-color: #3498db;
+        }
+        
+        @keyframes fadeIn {
+            from { opacity: 0; transform: translate(-50%, 20px); }
+            to { opacity: 1; transform: translate(-50%, 0); }
+        }
+        
+        @keyframes fadeOut {
+            from { opacity: 1; transform: translate(-50%, 0); }
+            to { opacity: 0; transform: translate(-50%, 20px); }
         }
     </style>
 </head>
 <body>
-    <h1>Voice Assistant with CSM & Whisper</h1>
+    <h1>Live Voice Assistant with CSM</h1>
     <div id="conversation"></div>
     
     <div class="switch-container">
@@ -317,53 +342,43 @@
     <div class="visualizer-container" id="visualizerContainer">
         <canvas id="visualizer"></canvas>
         <div class="visualizer-label" id="visualizerLabel">Listening...</div>
+        <div class="live-transcription" id="liveTranscription"></div>
     </div>
     
     <div id="controls">
-        <button id="recordButton">Hold to Speak</button>
-    </div>
-    
-    <div id="audioWave" class="audio-wave hidden">
-        <span></span>
-        <span></span>
-        <span></span>
-        <span></span>
-        <span></span>
+        <button id="talkButton">Press to Talk</button>
     </div>
     
     <div id="status">Connecting to server...</div>
 
     <script>
         const socket = io();
-        const recordButton = document.getElementById('recordButton');
+        const talkButton = document.getElementById('talkButton');
         const conversation = document.getElementById('conversation');
         const status = document.getElementById('status');
-        const audioWave = document.getElementById('audioWave');
         const visualizerToggle = document.getElementById('visualizerToggle');
         const visualizerContainer = document.getElementById('visualizerContainer');
         const visualizerLabel = document.getElementById('visualizerLabel');
+        const liveTranscription = document.getElementById('liveTranscription');
         const canvas = document.getElementById('visualizer');
         const canvasCtx = canvas.getContext('2d');
         
-        let mediaRecorder;
-        let audioChunks = [];
-        let isRecording = false;
-        let audioSendInterval;
+        // Audio processing variables
+        let audioContext;
+        let mediaStream = null;
+        let mediaRecorder = null;
+        let audioProcessor = null;
+        let isStreaming = false;
+        let isAITalking = false;
         let sessionActive = false;
         let reconnectAttempts = 0;
-        let audioStream = null;
         let audioAnalyser = null;
         let visualizerActive = true;
         let visualizerAnimationId = null;
-        let audioBufferSource = null;
-        
-        // Variables for streaming audio
+        let streamingInterval = null;
         let audioQueue = [];
-        let isPlayingAudio = false;
-        let currentAudioElement = null;
-        
-        // Initialize audio context
-        const audioContext = new (window.AudioContext || window.webkitAudioContext)();
+        let audioWorkletSupported = false;
+        let sampleRate = 24000; // Default, will be updated from server
         
         // Set up canvas size
         function setupCanvas() {
@@ -390,100 +405,387 @@
             sessionActive = true;
             reconnectAttempts = 0;
             
-            if (conversation.children.length > 0) {
-                addStatusMessage("Reconnected to server");
-            }
+            // Initialize audio context
+            initializeAudio();
+            
+            // Ask server for configuration
+            socket.emit('get_config');
         });
         
         socket.on('disconnect', () => {
             status.textContent = 'Disconnected from server';
             sessionActive = false;
+            isStreaming = false;
+            isAITalking = false;
             
-            addStatusMessage("Disconnected from server. Attempting to reconnect...");
+            showToast('Disconnected from server. Attempting to reconnect...', 'error');
+            
+            stopAudioStream();
             
             // Attempt to reconnect
             tryReconnect();
         });
         
-        function tryReconnect() {
-            if (reconnectAttempts < 5) {
-                reconnectAttempts++;
-                setTimeout(() => {
-                    if (!sessionActive) {
-                        socket.connect();
-                    }
-                }, 1000 * reconnectAttempts);
-            } else {
-                addStatusMessage("Failed to reconnect. Please refresh the page.");
+        socket.on('config', (data) => {
+            // Set configuration from server
+            sampleRate = data.sample_rate || sampleRate;
+            status.textContent = 'Ready to talk';
+        });
+        
+        // Initialize audio
+        function initializeAudio() {
+            try {
+                // Create audio context
+                audioContext = new (window.AudioContext || window.webkitAudioContext)({
+                    latencyHint: 'interactive',
+                    sampleRate: 44100
+                });
+                
+                // Check for AudioWorklet support
+                audioWorkletSupported = !!audioContext.audioWorklet;
+                
+                // Setup canvas
+                setupCanvas();
+                
+                // Setup audio stream
+                setupAudioStream();
+            } catch (e) {
+                console.error('Error initializing audio:', e);
+                status.textContent = 'Error initializing audio';
+                showToast('Error initializing audio: ' + e.message, 'error');
             }
         }
         
-        socket.on('ready', (data) => {
-            status.textContent = data.message;
-            setupAudioRecording();
+        function setupAudioStream() {
+            if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
+                status.textContent = 'Your browser does not support audio recording';
+                showToast('Your browser does not support audio recording', 'error');
+                return;
+            }
+            
+            navigator.mediaDevices.getUserMedia({ 
+                audio: {
+                    echoCancellation: true,
+                    noiseSuppression: true,
+                    autoGainControl: true
+                } 
+            })
+            .then(stream => {
+                mediaStream = stream;
+                
+                // Create analyzer for visualizer
+                const source = audioContext.createMediaStreamSource(stream);
+                audioAnalyser = audioContext.createAnalyser();
+                audioAnalyser.fftSize = 2048;
+                source.connect(audioAnalyser);
+                
+                // Start visualizer
+                if (visualizerActive) {
+                    drawVisualizer();
+                }
+                
+                // Create a ScriptProcessor or use AudioWorklet if supported
+                if (audioWorkletSupported) {
+                    setupAudioWorklet(stream);
+                } else {
+                    setupScriptProcessor(stream);
+                }
+                
+                // Setup talk button
+                talkButton.addEventListener('click', toggleTalking);
+                
+                // Setup keyboard shortcuts
+                document.addEventListener('keydown', (e) => {
+                    if (e.code === 'Space' && !e.repeat && !isInputActive()) {
+                        e.preventDefault();
+                        if (!isStreaming) {
+                            startTalking();
+                        }
+                    }
+                });
+                
+                document.addEventListener('keyup', (e) => {
+                    if (e.code === 'Space' && isStreaming && !isInputActive()) {
+                        e.preventDefault();
+                        stopTalking();
+                    }
+                });
+                
+                status.textContent = 'Ready to talk (press button or spacebar)';
+            })
+            .catch(err => {
+                console.error('Error accessing microphone:', err);
+                status.textContent = 'Error accessing microphone';
+                showToast('Error accessing microphone: ' + err.message, 'error');
+            });
+        }
+        
+        // Check if user is typing in an input field
+        function isInputActive() {
+            const activeElement = document.activeElement;
+            return activeElement.tagName === 'INPUT' || 
+                   activeElement.tagName === 'TEXTAREA' || 
+                   activeElement.isContentEditable;
+        }
+        
+        // Setup AudioWorklet for modern browsers
+        async function setupAudioWorklet(stream) {
+            try {
+                // Create a processor worklet
+                await audioContext.audioWorklet.addModule('processor.js');
+                
+                // Create worklet node
+                const workletNode = new AudioWorkletNode(audioContext, 'audio-processor', {
+                    numberOfInputs: 1,
+                    numberOfOutputs: 1,
+                    processorOptions: {
+                        sampleRate: audioContext.sampleRate
+                    }
+                });
+                
+                // Handle processed audio data
+                workletNode.port.onmessage = (event) => {
+                    if (isStreaming && !isAITalking) {
+                        const audioData = event.data.audioData;
+                        if (audioData && audioData.length > 0) {
+                            sendAudioChunk(audioData);
+                        }
+                    }
+                };
+                
+                // Connect nodes
+                const source = audioContext.createMediaStreamSource(stream);
+                source.connect(workletNode);
+                
+                // Store for later
+                audioProcessor = workletNode;
+            } catch (e) {
+                console.error('Error setting up AudioWorklet:', e);
+                // Fall back to ScriptProcessor
+                setupScriptProcessor(stream);
+            }
+        }
+        
+        // Fallback to ScriptProcessor for older browsers
+        function setupScriptProcessor(stream) {
+            // Create script processor
+            const bufferSize = 4096;
+            const scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
+            
+            scriptProcessor.onaudioprocess = (audioProcessingEvent) => {
+                if (isStreaming && !isAITalking) {
+                    const inputBuffer = audioProcessingEvent.inputBuffer;
+                    const audioData = inputBuffer.getChannelData(0);
+                    
+                    // Convert to format expected by server
+                    const audioArray = new Float32Array(audioData);
+                    
+                    // Send to server
+                    sendAudioChunk(audioArray);
+                }
+                
+                // Need to copy input to output to keep the node active
+                const outputBuffer = audioProcessingEvent.outputBuffer;
+                for (let channel = 0; channel < outputBuffer.numberOfChannels; channel++) {
+                    const outputData = outputBuffer.getChannelData(channel);
+                    for (let sample = 0; sample < outputBuffer.length; sample++) {
+                        // Silent output
+                        outputData[sample] = 0;
+                    }
+                }
+            };
+            
+            // Connect nodes
+            const source = audioContext.createMediaStreamSource(stream);
+            source.connect(scriptProcessor);
+            scriptProcessor.connect(audioContext.destination);
+            
+            // Store for later
+            audioProcessor = scriptProcessor;
+        }
+        
+        // Toggle talking state
+        function toggleTalking() {
+            if (isStreaming) {
+                stopTalking();
+            } else {
+                startTalking();
+            }
+        }
+        
+        // Start talking to the assistant
+        function startTalking() {
+            if (!sessionActive || isAITalking) return;
+            
+            isStreaming = true;
+            talkButton.classList.add('recording');
+            talkButton.textContent = 'Release to Stop';
+            status.textContent = 'Listening...';
+            visualizerLabel.textContent = 'You are speaking...';
+            
+            // Resume audio context if suspended
+            if (audioContext.state === 'suspended') {
+                audioContext.resume();
+            }
+            
+            // Tell server we're starting to speak
+            socket.emit('start_speaking');
+            
+            // Clear previous transcriptions
+            liveTranscription.textContent = '';
+            liveTranscription.classList.remove('hidden');
+        }
+        
+        // Stop talking to the assistant
+        function stopTalking() {
+            if (!isStreaming) return;
+            
+            isStreaming = false;
+            talkButton.classList.remove('recording');
+            talkButton.textContent = 'Press to Talk';
+            status.textContent = 'Processing...';
+            
+            // Tell server we're done speaking
+            socket.emit('stop_speaking');
+            
+            // Hide live transcription temporarily
+            liveTranscription.classList.add('hidden');
+        }
+        
+        // Send audio chunk to server
+        function sendAudioChunk(audioData) {
+            // Convert to base64 for sending over websocket
+            const buffer = new ArrayBuffer(audioData.length * 4);
+            const view = new DataView(buffer);
+            for (let i = 0; i < audioData.length; i++) {
+                view.setFloat32(i * 4, audioData[i], true);
+            }
+            
+            const base64Audio = arrayBufferToBase64(buffer);
+            
+            // Send to server
+            socket.emit('audio_stream', {
+                audio: base64Audio,
+                sampleRate: audioContext.sampleRate
+            });
+        }
+        
+        // Convert ArrayBuffer to base64
+        function arrayBufferToBase64(buffer) {
+            const binary = new Uint8Array(buffer);
+            let binaryString = '';
+            for (let i = 0; i < binary.byteLength; i++) {
+                binaryString += String.fromCharCode(binary[i]);
+            }
+            return btoa(binaryString);
+        }
+        
+        // Clean up audio stream
+        function stopAudioStream() {
+            if (mediaStream) {
+                mediaStream.getTracks().forEach(track => track.stop());
+                mediaStream = null;
+            }
+            
+            if (audioProcessor) {
+                if (audioProcessor.disconnect) {
+                    audioProcessor.disconnect();
+                }
+                audioProcessor = null;
+            }
+            
+            if (visualizerAnimationId) {
+                cancelAnimationFrame(visualizerAnimationId);
+                visualizerAnimationId = null;
+            }
+        }
+        
+        // Handle real-time transcription
+        socket.on('live_transcription', (data) => {
+            liveTranscription.textContent = data.text || '...';
+            liveTranscription.classList.remove('hidden');
         });
         
+        // Handle final transcription
         socket.on('transcription', (data) => {
             addMessage('user', data.text);
             status.textContent = 'Assistant is thinking...';
-            visualizerLabel.textContent = 'Processing...';
+            visualizerLabel.textContent = 'Waiting for response...';
         });
         
-        socket.on('audio_response', (data) => {
-            // Remove any streaming indicator that might be present
-            const indicator = document.getElementById('current-stream-indicator');
-            if (indicator) indicator.remove();
+        // Handle AI response start
+        socket.on('ai_response_start', (data) => {
+            isAITalking = true;
+            status.textContent = 'Assistant is responding...';
+            visualizerLabel.textContent = 'Assistant is speaking...';
             
-            // Play audio
-            status.textContent = 'Playing response...';
-            visualizerLabel.textContent = 'Assistant speaking...';
+            // Create container for the response
+            addMessage('bot', data.text);
             
-            // Create audio element
-            const audio = new Audio('data:audio/wav;base64,' + data.audio);
-            currentAudioElement = audio;
+            // Add speaking indicator
+            const lastBotMessage = document.querySelector('.bot-message-container:last-child .message');
+            const speakingWave = document.createElement('span');
+            speakingWave.className = 'speaking-wave';
+            speakingWave.innerHTML = '<span></span><span></span><span></span><span></span>';
+            speakingWave.id = 'current-speaking-wave';
+            lastBotMessage.appendChild(speakingWave);
+        });
+        
+        // Handle streaming audio from assistant
+        socket.on('ai_audio_chunk', (data) => {
+            // Play audio chunk
+            playAudioChunk(data.audio);
+        });
+        
+        // Handle AI response end
+        socket.on('ai_response_end', () => {
+            isAITalking = false;
+            status.textContent = 'Ready to talk';
+            visualizerLabel.textContent = 'Listening...';
             
-            // Visualize assistant audio if visualizer is active
-            if (visualizerActive) {
-                visualizeResponseAudio(audio);
+            // Remove speaking indicator
+            const speakingWave = document.getElementById('current-speaking-wave');
+            if (speakingWave) {
+                speakingWave.remove();
             }
             
-            audio.onended = () => {
-                status.textContent = 'Ready to record';
-                visualizerLabel.textContent = 'Listening...';
-                if (audioBufferSource) {
-                    audioBufferSource.disconnect();
-                    audioBufferSource = null;
+            // Re-enable talk button if it was disabled
+            talkButton.disabled = false;
+        });
+        
+        // Legacy handler for text-only responses
+        socket.on('text_response', (data) => {
+            addMessage('bot', data.text, true);
+            isAITalking = false;
+            status.textContent = 'Ready to talk';
+            visualizerLabel.textContent = 'Listening...';
+        });
+        
+        // Play audio chunk
+        function playAudioChunk(base64Audio) {
+            try {
+                const audio = new Audio('data:audio/wav;base64,' + base64Audio);
+                
+                // Visualize if active
+                if (visualizerActive) {
+                    visualizeResponseAudio(audio);
                 }
-                currentAudioElement = null;
-            };
-            
-            audio.onerror = () => {
-                status.textContent = 'Error playing audio';
-                visualizerLabel.textContent = 'Listening...';
-                console.error('Error playing audio response');
-                currentAudioElement = null;
-            };
-            
-            audio.play().catch(err => {
-                status.textContent = 'Error playing audio: ' + err.message;
-                visualizerLabel.textContent = 'Listening...';
-                console.error('Error playing audio:', err);
-                currentAudioElement = null;
-            });
-            
-            // Display text if not already displayed
-            if (!document.querySelector('.bot-message:last-child')?.textContent.includes(data.text)) {
-                addMessage('bot', data.text, false);
+                
+                audio.play().catch(err => {
+                    console.error('Error playing audio chunk:', err);
+                });
+            } catch (e) {
+                console.error('Error playing audio chunk:', e);
             }
-        });
+        }
         
-        // Visualize response audio
+        // Visualize audio response
         async function visualizeResponseAudio(audioElement) {
             try {
                 // Create media element source
                 const audioSource = audioContext.createMediaElementSource(audioElement);
                 
-                // Create analyser
+                // Create analyser for visualization
                 const analyser = audioContext.createAnalyser();
                 analyser.fftSize = 2048;
                 
@@ -491,111 +793,19 @@
                 audioSource.connect(analyser);
                 analyser.connect(audioContext.destination);
                 
-                // Store reference
+                // Store reference for visualization
                 audioAnalyser = analyser;
                 
-                // Start visualization
-                drawVisualizer();
+                // Start visualization if not already running
+                if (visualizerActive && !visualizerAnimationId) {
+                    drawVisualizer();
+                }
             } catch (e) {
-                console.error('Error setting up audio visualization:', e);
+                console.error('Error setting up visualization:', e);
             }
         }
         
-        // Handle text-only responses when audio generation isn't available
-        socket.on('text_response', (data) => {
-            status.textContent = 'Received text response';
-            visualizerLabel.textContent = 'Text only (no audio)';
-            addMessage('bot', data.text, true);
-            setTimeout(() => {
-                status.textContent = 'Ready to record';
-                visualizerLabel.textContent = 'Listening...';
-            }, 1000);
-        });
-        
-        socket.on('error', (data) => {
-            status.textContent = 'Error: ' + data.message;
-            visualizerLabel.textContent = 'Error occurred';
-            console.error('Server error:', data.message);
-            addStatusMessage("Error: " + data.message);
-        });
-        
-        function setupAudioRecording() {
-            // Check if browser supports required APIs
-            if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
-                status.textContent = 'Your browser does not support audio recording';
-                return;
-            }
-            
-            // Set up canvas
-            setupCanvas();
-            
-            // Get user media
-            navigator.mediaDevices.getUserMedia({ audio: true })
-                .then(stream => {
-                    // Store stream for visualizer
-                    audioStream = stream;
-                    
-                    // Create audio analyser for visualization
-                    const source = audioContext.createMediaStreamSource(stream);
-                    const analyser = audioContext.createAnalyser();
-                    analyser.fftSize = 2048;
-                    source.connect(analyser);
-                    
-                    // Store analyser for visualization
-                    audioAnalyser = analyser;
-                    
-                    // Start visualizer if enabled
-                    if (visualizerActive) {
-                        drawVisualizer();
-                    }
-                    
-                    // Setup recording with better audio quality
-                    const options = { 
-                        mimeType: 'audio/webm',
-                        audioBitsPerSecond: 128000 
-                    };
-                    
-                    try {
-                        mediaRecorder = new MediaRecorder(stream, options);
-                    } catch (e) {
-                        // Fallback if the specified options aren't supported
-                        mediaRecorder = new MediaRecorder(stream);
-                    }
-                    
-                    mediaRecorder.ondataavailable = event => {
-                        if (event.data.size > 0) {
-                            audioChunks.push(event.data);
-                        }
-                    };
-                    
-                    mediaRecorder.onstop = () => {
-                        processRecording();
-                    };
-                    
-                    // Setup button handlers with better touch handling
-                    recordButton.addEventListener('mousedown', startRecording);
-                    recordButton.addEventListener('touchstart', (e) => {
-                        e.preventDefault(); // Prevent default touch behavior
-                        startRecording();
-                    });
-                    
-                    recordButton.addEventListener('mouseup', stopRecording);
-                    recordButton.addEventListener('touchend', (e) => {
-                        e.preventDefault();
-                        stopRecording();
-                    });
-                    
-                    recordButton.addEventListener('mouseleave', stopRecording);
-                    
-                    status.textContent = 'Ready to record';
-                })
-                .catch(err => {
-                    status.textContent = 'Error accessing microphone: ' + err.message;
-                    console.error('Error accessing microphone:', err);
-                });
-        }
-        
-        // Draw visualizer animation
+        // Draw visualizer
         function drawVisualizer() {
             if (!visualizerActive || !audioAnalyser) {
                 return;
@@ -613,142 +823,44 @@
             canvasCtx.fillStyle = '#000';
             canvasCtx.fillRect(0, 0, canvas.width, canvas.height);
             
-            // Draw visualization based on audio data
-            const barWidth = (canvas.width / bufferLength) * 2.5;
+            // Calculate bar width based on canvas size and buffer length
+            const barWidth = (canvas.width / (bufferLength * 0.5)) - 1;
             let x = 0;
             
-            // Choose color based on state
+            // Choose color based on who is speaking
             let gradient;
-            if (isRecording) {
-                // Red gradient for recording
+            if (isStreaming && !isAITalking) {
+                // User speaking - green
                 gradient = canvasCtx.createLinearGradient(0, 0, 0, canvas.height);
-                gradient.addColorStop(0, 'rgba(255, 0, 0, 0.8)');
-                gradient.addColorStop(1, 'rgba(255, 80, 80, 0.2)');
-            } else if (visualizerLabel.textContent === 'Assistant speaking...') {
-                // Blue gradient for assistant
+                gradient.addColorStop(0, 'rgba(0, 255, 0, 0.8)');
+                gradient.addColorStop(1, 'rgba(0, 100, 0, 0.2)');
+            } else if (isAITalking) {
+                // AI speaking - blue
                 gradient = canvasCtx.createLinearGradient(0, 0, 0, canvas.height);
-                gradient.addColorStop(0, 'rgba(0, 120, 255, 0.8)');
-                gradient.addColorStop(1, 'rgba(80, 160, 255, 0.2)');
+                gradient.addColorStop(0, 'rgba(0, 100, 255, 0.8)');
+                gradient.addColorStop(1, 'rgba(0, 40, 100, 0.2)');
             } else {
-                // Green gradient for listening
+                // Idle - purple/gray
                 gradient = canvasCtx.createLinearGradient(0, 0, 0, canvas.height);
-                gradient.addColorStop(0, 'rgba(0, 200, 80, 0.8)');
-                gradient.addColorStop(1, 'rgba(80, 255, 120, 0.2)');
+                gradient.addColorStop(0, 'rgba(100, 100, 200, 0.8)');
+                gradient.addColorStop(1, 'rgba(40, 40, 80, 0.2)');
             }
             
+            // Draw bars
             for (let i = 0; i < bufferLength; i++) {
-                const barHeight = (dataArray[i] / 255) * canvas.height;
+                const barHeight = dataArray[i] * (canvas.height / 255) * 0.8;
                 
                 canvasCtx.fillStyle = gradient;
                 canvasCtx.fillRect(x, canvas.height - barHeight, barWidth, barHeight);
                 
                 x += barWidth + 1;
+                
+                // Only draw a portion of the bars for performance
+                if (x > canvas.width) break;
             }
         }
         
-        function startRecording() {
-            if (!isRecording && sessionActive) {
-                audioChunks = [];
-                mediaRecorder.start(100); // Collect data in 100ms chunks
-                recordButton.classList.add('recording');
-                recordButton.textContent = 'Release to Stop';
-                status.textContent = 'Recording...';
-                visualizerLabel.textContent = 'Recording...';
-                audioWave.classList.remove('hidden');
-                isRecording = true;
-                
-                socket.emit('start_speaking');
-                
-                // Start sending audio chunks periodically
-                audioSendInterval = setInterval(() => {
-                    if (mediaRecorder.state === 'recording') {
-                        mediaRecorder.requestData(); // Force ondataavailable to fire
-                    }
-                }, 300); // Send every 300ms
-            }
-        }
-        
-        function stopRecording() {
-            if (isRecording) {
-                clearInterval(audioSendInterval);
-                mediaRecorder.stop();
-                recordButton.classList.remove('recording');
-                recordButton.textContent = 'Hold to Speak';
-                status.textContent = 'Processing speech...';
-                visualizerLabel.textContent = 'Processing...';
-                audioWave.classList.add('hidden');
-                isRecording = false;
-            }
-        }
-        
-        function processRecording() {
-            if (audioChunks.length === 0) {
-                status.textContent = 'No audio recorded';
-                visualizerLabel.textContent = 'Listening...';
-                return;
-            }
-            
-            const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
-            
-            // Convert to ArrayBuffer for processing
-            const fileReader = new FileReader();
-            fileReader.onloadend = () => {
-                try {
-                    const arrayBuffer = fileReader.result;
-                    // Convert to Float32Array - this works better with WebAudio API
-                    const audioData = convertToFloat32(arrayBuffer);
-                    
-                    // Convert to base64 for sending
-                    const base64String = arrayBufferToBase64(audioData.buffer);
-                    socket.emit('audio_chunk', { audio: base64String });
-                    
-                    // Signal end of speech
-                    socket.emit('stop_speaking');
-                } catch (e) {
-                    console.error('Error processing audio:', e);
-                    status.textContent = 'Error processing audio';
-                    visualizerLabel.textContent = 'Error';
-                }
-            };
-            
-            fileReader.onerror = () => {
-                status.textContent = 'Error reading audio data';
-                visualizerLabel.textContent = 'Error';
-            };
-            
-            fileReader.readAsArrayBuffer(audioBlob);
-        }
-        
-        function convertToFloat32(arrayBuffer) {
-            try {
-                // Ensure the buffer length is even (multiple of 2 bytes for Int16)
-                const bytesArray = new Uint8Array(arrayBuffer);
-                const evenLength = Math.floor(bytesArray.length / 2) * 2;
-                
-                // If we had to adjust the length, create a new buffer with even length
-                let bufferToProcess = arrayBuffer;
-                if (bytesArray.length !== evenLength) {
-                    console.warn(`Adjusting audio buffer from ${bytesArray.length} to ${evenLength} bytes`);
-                    bufferToProcess = bytesArray.slice(0, evenLength).buffer;
-                }
-                
-                // Get raw audio data as Int16 (common format for audio)
-                const int16Array = new Int16Array(bufferToProcess);
-                
-                // Convert to Float32 (normalize between -1 and 1)
-                const float32Array = new Float32Array(int16Array.length);
-                for (let i = 0; i < int16Array.length; i++) {
-                    float32Array[i] = int16Array[i] / 32768.0;
-                }
-                
-                return float32Array;
-            } catch (e) {
-                console.error('Error converting audio data:', e);
-                // Return an empty audio buffer to avoid breaking the app
-                return new Float32Array(0);
-            }
-        }
-        
+        // Add message to conversation
         function addMessage(sender, text, textOnly = false) {
             const containerDiv = document.createElement('div');
             containerDiv.className = sender === 'user' ? 'message-container user-message-container' : 'message-container bot-message-container';
@@ -769,7 +881,6 @@
                 infoDiv.textContent = 'Transcribed with Whisper';
                 containerDiv.appendChild(infoDiv);
             } else if (textOnly) {
-                // Add indicator for text-only response
                 const textOnlyDiv = document.createElement('div');
                 textOnlyDiv.className = 'text-only-indicator';
                 textOnlyDiv.textContent = 'Text-only response (audio unavailable)';
@@ -780,204 +891,73 @@
             conversation.scrollTop = conversation.scrollHeight;
         }
         
-        function addStatusMessage(message) {
-            const statusDiv = document.createElement('div');
-            statusDiv.className = 'status-message';
-            statusDiv.textContent = message;
-            conversation.appendChild(statusDiv);
-            conversation.scrollTop = conversation.scrollHeight;
+        // Show a toast notification
+        function showToast(message, type = 'info') {
+            // Remove any existing toasts
+            const existingToasts = document.querySelectorAll('.toast');
+            existingToasts.forEach(toast => toast.remove());
             
-            // Auto-remove status messages after 10 seconds
+            // Create toast
+            const toast = document.createElement('div');
+            toast.className = `toast ${type}`;
+            toast.textContent = message;
+            document.body.appendChild(toast);
+            
+            // Remove after 3 seconds
             setTimeout(() => {
-                if (conversation.contains(statusDiv)) {
-                    statusDiv.style.opacity = '0';
-                    statusDiv.style.transition = 'opacity 0.5s';
-                    setTimeout(() => {
-                        if (conversation.contains(statusDiv)) {
-                            conversation.removeChild(statusDiv);
-                        }
-                    }, 500);
+                if (document.body.contains(toast)) {
+                    document.body.removeChild(toast);
                 }
-            }, 10000);
+            }, 3000);
         }
         
-        function arrayBufferToBase64(buffer) {
-            let binary = '';
-            const bytes = new Uint8Array(buffer);
-            const len = bytes.byteLength;
-            for (let i = 0; i < len; i++) {
-                binary += String.fromCharCode(bytes[i]);
+        // Try to reconnect to server
+        function tryReconnect() {
+            if (reconnectAttempts < 5) {
+                reconnectAttempts++;
+                
+                setTimeout(() => {
+                    if (!sessionActive) {
+                        socket.connect();
+                    }
+                }, reconnectAttempts * 1000);
+            } else {
+                showToast('Failed to reconnect. Please refresh the page.', 'error');
             }
-            return window.btoa(binary);
         }
         
-        // Handle page visibility change to avoid issues with background tabs
+        // Handle errors
+        socket.on('error', (data) => {
+            status.textContent = 'Error: ' + data.message;
+            visualizerLabel.textContent = 'Error occurred';
+            showToast(data.message, 'error');
+            console.error('Server error:', data.message);
+        });
+        
+        // Handle visibility change to pause when tab is not visible
         document.addEventListener('visibilitychange', () => {
-            if (document.hidden && isRecording) {
-                stopRecording();
+            if (document.hidden) {
+                if (isStreaming) {
+                    stopTalking();
+                }
             }
         });
         
-        // Clean disconnection when page is closed
+        // Handle page unload
         window.addEventListener('beforeunload', () => {
+            stopAudioStream();
             if (socket && socket.connected) {
                 socket.disconnect();
             }
-            
-            if (visualizerAnimationId) {
-                cancelAnimationFrame(visualizerAnimationId);
-            }
-            
-            if (currentAudioElement) {
-                currentAudioElement.pause();
-                currentAudioElement = null;
-            }
         });
         
-        // Add a reload button for debugging
-        const reloadButton = document.createElement('button');
-        reloadButton.textContent = '🔄 Reload';
-        reloadButton.style.position = 'fixed';
-        reloadButton.style.bottom = '10px';
-        reloadButton.style.right = '10px';
-        reloadButton.style.padding = '5px 10px';
-        reloadButton.style.fontSize = '12px';
-        reloadButton.style.backgroundColor = '#f5f5f5';
-        reloadButton.style.border = '1px solid #ddd';
-        reloadButton.style.borderRadius = '4px';
-        reloadButton.style.cursor = 'pointer';
-        
-        reloadButton.addEventListener('click', () => {
-            window.location.reload();
-        });
-        
-        document.body.appendChild(reloadButton);
-        
-        // Handle window resize to update canvas size
+        // Add window resize handler
         window.addEventListener('resize', () => {
             setupCanvas();
         });
-
-        // Add new handlers for streaming audio:
         
-        // Handle start of streaming response
-        socket.on('start_streaming_response', (data) => {
-            // Clear any existing audio queue and setup for new stream
-            audioQueue = [];
-            isPlayingAudio = false;
-            if (currentAudioElement) {
-                currentAudioElement.pause();
-                currentAudioElement = null;
-            }
-            
-            // Display text message first
-            addMessage('bot', data.text, false);
-            
-            // Update status
-            status.textContent = 'Assistant is responding...';
-            visualizerLabel.textContent = 'Streaming response...';
-            
-            // Add a streaming indicator to the message
-            const lastBotMessage = document.querySelector('.bot-message-container:last-child .message');
-            const streamingIndicator = document.createElement('span');
-            streamingIndicator.className = 'streaming-indicator';
-            streamingIndicator.id = 'current-stream-indicator';
-            lastBotMessage.insertAdjacentElement('afterbegin', streamingIndicator);
-            
-            // Request first chunk
-            socket.emit('request_audio_chunk');
-        });
-        
-        // Receive audio chunks
-        socket.on('audio_chunk', (data) => {
-            // Add chunk to queue
-            audioQueue.push(data.audio);
-            
-            // Start playing if not already playing
-            if (!isPlayingAudio) {
-                playNextAudioChunk();
-            }
-            
-            // If not the last chunk, request the next one
-            if (!data.is_last) {
-                socket.emit('request_audio_chunk');
-            }
-        });
-        
-        // Handle wait for chunk message
-        socket.on('wait_for_chunk', () => {
-            console.log('Waiting for more audio chunks...');
-            // Request again after a short delay
-            setTimeout(() => {
-                socket.emit('request_audio_chunk');
-            }, 100);
-        });
-        
-        // Handle end of streaming
-        socket.on('end_streaming', () => {
-            console.log('Audio streaming completed');
-            
-            // Remove streaming indicator
-            const indicator = document.getElementById('current-stream-indicator');
-            if (indicator) indicator.remove();
-            
-            // If no more chunks are playing, update status
-            if (audioQueue.length === 0 && !isPlayingAudio) {
-                status.textContent = 'Ready to record';
-                visualizerLabel.textContent = 'Listening...';
-            }
-        });
-        
-        // Function to play audio chunks in sequence
-        function playNextAudioChunk() {
-            if (audioQueue.length === 0) {
-                isPlayingAudio = false;
-                if (!isRecording) {
-                    status.textContent = 'Ready to record';
-                    visualizerLabel.textContent = 'Listening...';
-                }
-                return;
-            }
-            
-            isPlayingAudio = true;
-            const audioChunk = audioQueue.shift();
-            
-            // Create audio element
-            const audio = new Audio('data:audio/wav;base64,' + audioChunk);
-            currentAudioElement = audio;
-            
-            // Visualize if active
-            if (visualizerActive) {
-                visualizeResponseAudio(audio);
-            }
-            
-            // When this chunk ends, play the next one
-            audio.onended = () => {
-                if (audioBufferSource) {
-                    audioBufferSource.disconnect();
-                    audioBufferSource = null;
-                }
-                
-                // Play next chunk if available
-                currentAudioElement = null;
-                playNextAudioChunk();
-            };
-            
-            // Handle errors
-            audio.onerror = (err) => {
-                console.error('Error playing audio chunk:', err);
-                // Try next chunk
-                currentAudioElement = null;
-                playNextAudioChunk();
-            };
-            
-            // Start playback
-            audio.play().catch(err => {
-                console.error('Error starting audio playback:', err);
-                currentAudioElement = null;
-                playNextAudioChunk();
-            });
-        }
+        // Initial setup
+        setupCanvas();
     </script>
 </body>
 </html>
\ No newline at end of file
diff --git a/Backend/server.py b/Backend/server.py
index 2b8e126..05abd99 100644
--- a/Backend/server.py
+++ b/Backend/server.py
@@ -14,8 +14,8 @@ import huggingface_hub
 from generator import load_csm_1b, Segment
 import threading
 import queue
-from flask import stream_with_context, Response
-import time
+import asyncio
+import json
 
 # Configure environment with longer timeouts
 os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600"  # 10 minutes timeout for downloads
@@ -26,7 +26,7 @@ os.makedirs("models", exist_ok=True)
 
 app = Flask(__name__)
 app.config['SECRET_KEY'] = 'your-secret-key'
-socketio = SocketIO(app, cors_allowed_origins="*")
+socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
 
 # Explicitly check for CUDA and print more detailed info
 print("\n=== CUDA Information ===")
@@ -128,8 +128,7 @@ def load_models():
 
 # Store conversation context
 conversation_context = {}  # session_id -> context
-CHUNK_SIZE = 24000  # Number of audio samples per chunk (1 second at 24kHz)
-audio_stream_queues = {}  # session_id -> queue for audio chunks
+active_audio_streams = {}  # session_id -> stream status
 
 @app.route('/')
 def index():
@@ -143,9 +142,14 @@ def handle_connect():
         'speakers': [0, 1],  # 0 = user, 1 = bot
         'audio_buffer': deque(maxlen=10),  # Store recent audio chunks
         'is_speaking': False,
-        'silence_start': None
+        'last_activity': time.time(),
+        'active_session': True,
+        'transcription_buffer': []  # For real-time transcription
     }
-    emit('ready', {'message': 'Connection established'})
+    emit('ready', {
+        'message': 'Connection established',
+        'sample_rate': getattr(csm_generator, 'sample_rate', 24000) if csm_generator else 24000
+    })
 
 @socketio.on('disconnect')
 def handle_disconnect():
@@ -154,56 +158,130 @@ def handle_disconnect():
     
     # Clean up resources
     if session_id in conversation_context:
+        conversation_context[session_id]['active_session'] = False
         del conversation_context[session_id]
     
-    if session_id in audio_stream_queues:
-        del audio_stream_queues[session_id]
+    if session_id in active_audio_streams:
+        active_audio_streams[session_id]['active'] = False
+        del active_audio_streams[session_id]
 
-@socketio.on('start_speaking')
-def handle_start_speaking():
-    if request.sid in conversation_context:
-        conversation_context[request.sid]['is_speaking'] = True
-        conversation_context[request.sid]['audio_buffer'].clear()
-        print(f"User {request.sid} started speaking")
-
-@socketio.on('audio_chunk')
-def handle_audio_chunk(data):
-    if request.sid not in conversation_context:
+@socketio.on('audio_stream')
+def handle_audio_stream(data):
+    """Handle incoming audio stream from client"""
+    session_id = request.sid
+    
+    if session_id not in conversation_context:
         return
     
-    context = conversation_context[request.sid]
+    context = conversation_context[session_id]
+    context['last_activity'] = time.time()
     
-    # Decode audio data
-    audio_data = base64.b64decode(data['audio'])
-    audio_numpy = np.frombuffer(audio_data, dtype=np.float32)
-    audio_tensor = torch.tensor(audio_numpy)
+    # Process different stream events
+    if data.get('event') == 'start':
+        # Client is starting to send audio
+        context['is_speaking'] = True
+        context['audio_buffer'].clear()
+        context['transcription_buffer'] = []
+        print(f"User {session_id} started streaming audio")
+        
+        # If AI was speaking, interrupt it
+        if session_id in active_audio_streams and active_audio_streams[session_id]['active']:
+            active_audio_streams[session_id]['active'] = False
+            emit('ai_stream_interrupt', {}, room=session_id)
     
-    # Add to buffer
-    context['audio_buffer'].append(audio_tensor)
+    elif data.get('event') == 'data':
+        # Audio data received
+        if not context['is_speaking']:
+            return
+            
+        # Decode audio chunk
+        try:
+            audio_data = base64.b64decode(data.get('audio', ''))
+            if not audio_data:
+                return
+                
+            audio_numpy = np.frombuffer(audio_data, dtype=np.float32)
+            
+            # Apply a simple noise gate
+            if np.mean(np.abs(audio_numpy)) < 0.01:  # Very quiet
+                return
+                
+            audio_tensor = torch.tensor(audio_numpy)
+            
+            # Add to audio buffer
+            context['audio_buffer'].append(audio_tensor)
+            
+            # Real-time transcription (periodic)
+            if len(context['audio_buffer']) % 3 == 0:  # Process every 3 chunks
+                threading.Thread(
+                    target=process_realtime_transcription,
+                    args=(session_id,),
+                    daemon=True
+                ).start()
+        except Exception as e:
+            print(f"Error processing audio chunk: {e}")
     
-    # Check for silence to detect end of speech
-    if context['is_speaking'] and is_silence(audio_tensor):
-        if context['silence_start'] is None:
-            context['silence_start'] = time.time()
-        elif time.time() - context['silence_start'] > 1.0:  # 1 second of silence
+    elif data.get('event') == 'end':
+        # Client has finished sending audio
+        context['is_speaking'] = False
+        
+        if len(context['audio_buffer']) > 0:
             # Process the complete utterance
-            process_user_utterance(request.sid)
-    else:
-        context['silence_start'] = None
+            threading.Thread(
+                target=process_complete_utterance,
+                args=(session_id,),
+                daemon=True
+            ).start()
+        
+        print(f"User {session_id} stopped streaming audio")
 
-@socketio.on('stop_speaking')
-def handle_stop_speaking():
-    if request.sid in conversation_context:
-        conversation_context[request.sid]['is_speaking'] = False
-        process_user_utterance(request.sid)
-        print(f"User {request.sid} stopped speaking")
+def process_realtime_transcription(session_id):
+    """Process incoming audio for real-time transcription"""
+    if session_id not in conversation_context or not conversation_context[session_id]['active_session']:
+        return
+        
+    context = conversation_context[session_id]
+    
+    if not context['audio_buffer'] or not context['is_speaking']:
+        return
+    
+    try:
+        # Combine current buffer for transcription
+        buffer_copy = list(context['audio_buffer'])
+        if not buffer_copy:
+            return
+            
+        full_audio = torch.cat(buffer_copy, dim=0)
+        
+        # Save audio to temporary WAV file for transcription
+        temp_audio_path = f"temp_rt_{session_id}.wav"
+        torchaudio.save(
+            temp_audio_path, 
+            full_audio.unsqueeze(0), 
+            44100  # Assuming 44.1kHz from client
+        )
+        
+        # Transcribe with Whisper if available
+        if whisper_model is not None:
+            segments, _ = whisper_model.transcribe(temp_audio_path, beam_size=5)
+            text = " ".join([segment.text for segment in segments])
+            
+            if text.strip():
+                context['transcription_buffer'].append(text)
+                # Send partial transcription to client
+                emit('partial_transcription', {'text': text}, room=session_id)
+    except Exception as e:
+        print(f"Error in realtime transcription: {e}")
+    finally:
+        # Clean up
+        if os.path.exists(temp_audio_path):
+            os.remove(temp_audio_path)
 
-def is_silence(audio_tensor, threshold=0.02):
-    """Check if an audio chunk is silence based on amplitude threshold"""
-    return torch.mean(torch.abs(audio_tensor)) < threshold
-
-def process_user_utterance(session_id):
+def process_complete_utterance(session_id):
     """Process completed user utterance, generate response and stream audio back"""
+    if session_id not in conversation_context or not conversation_context[session_id]['active_session']:
+        return
+    
     context = conversation_context[session_id]
     
     if not context['audio_buffer']:
@@ -212,8 +290,6 @@ def process_user_utterance(session_id):
     # Combine audio chunks
     full_audio = torch.cat(list(context['audio_buffer']), dim=0)
     context['audio_buffer'].clear()
-    context['is_speaking'] = False
-    context['silence_start'] = None
     
     # Save audio to temporary WAV file for transcription
     temp_audio_path = f"temp_audio_{session_id}.wav"
@@ -255,23 +331,23 @@ def process_user_utterance(session_id):
         
         # Generate and stream audio response if CSM is available
         if csm_generator is not None:
-            # Set up streaming queue for this session
-            if session_id not in audio_stream_queues:
-                audio_stream_queues[session_id] = queue.Queue()
-            else:
-                # Clear any existing items in the queue
-                while not audio_stream_queues[session_id].empty():
-                    audio_stream_queues[session_id].get()
+            # Create stream state object
+            active_audio_streams[session_id] = {
+                'active': True,
+                'text': bot_response
+            }
             
-            # Start audio generation in a separate thread to not block the server
+            # Send initial response to prepare client
+            emit('ai_stream_start', {
+                'text': bot_response
+            }, room=session_id)
+            
+            # Start audio generation in a separate thread
             threading.Thread(
-                target=generate_and_stream_audio,
+                target=generate_and_stream_audio_realtime,
                 args=(bot_response, context['segments'], session_id),
                 daemon=True
             ).start()
-            
-            # Initial response with text 
-            emit('start_streaming_response', {'text': bot_response}, room=session_id)
         else:
             # Send text-only response if audio generation isn't available
             emit('text_response', {'text': bot_response}, room=session_id)
@@ -378,8 +454,11 @@ def fallback_response(user_text):
     else:
         return "I understand you said something about that. Unfortunately, I'm running in fallback mode with limited capabilities. Please try again later when the main model is available."
 
-def generate_audio_response(text, conversation_segments):
-    """Generate audio response using CSM"""
+def generate_and_stream_audio_realtime(text, conversation_segments, session_id):
+    """Generate audio response using CSM and stream it in real-time to client"""
+    if session_id not in active_audio_streams or not active_audio_streams[session_id]['active']:
+        return
+    
     try:
         # Use the last few conversation segments as context
         context_segments = conversation_segments[-4:] if len(conversation_segments) > 4 else conversation_segments
@@ -394,40 +473,23 @@ def generate_audio_response(text, conversation_segments):
             topk=50
         )
         
-        return audio
-    except Exception as e:
-        print(f"Error generating audio: {e}")
-        # Return silence as fallback
-        return torch.zeros(csm_generator.sample_rate * 3)  # 3 seconds of silence
-
-def generate_and_stream_audio(text, conversation_segments, session_id):
-    """Generate audio response using CSM and stream it in chunks"""
-    try:
-        # Use the last few conversation segments as context
-        context_segments = conversation_segments[-4:] if len(conversation_segments) > 4 else conversation_segments
-        
-        # Generate full audio for bot response
-        audio = csm_generator.generate(
-            text=text,
-            speaker=1,  # Bot is speaker 1
-            context=context_segments,
-            max_audio_length_ms=10000,  # 10 seconds max
-            temperature=0.9,
-            topk=50
-        )
-        
         # Store the full audio for conversation history
         bot_segment = Segment(
             text=text,
             speaker=1,  # Bot is speaker 1
             audio=audio
         )
-        if session_id in conversation_context:
+        if session_id in conversation_context and conversation_context[session_id]['active_session']:
             conversation_context[session_id]['segments'].append(bot_segment)
         
-        # Split audio into chunks for streaming
-        chunk_size = CHUNK_SIZE
+        # Stream audio in small chunks for more responsive playback
+        chunk_size = 4800  # 200ms at 24kHz
+        
         for i in range(0, len(audio), chunk_size):
+            if session_id not in active_audio_streams or not active_audio_streams[session_id]['active']:
+                print("Audio streaming interrupted or session ended")
+                break
+                
             chunk = audio[i:i+chunk_size]
             
             # Convert audio chunk to base64 for streaming
@@ -436,61 +498,33 @@ def generate_and_stream_audio(text, conversation_segments, session_id):
             audio_bytes.seek(0)
             audio_b64 = base64.b64encode(audio_bytes.read()).decode('utf-8')
             
-            # Send the chunk to the client
-            if session_id in audio_stream_queues:
-                audio_stream_queues[session_id].put({
-                    'audio': audio_b64,
-                    'is_last': i + chunk_size >= len(audio)
-                })
-            else:
-                # Session was disconnected before we finished generating
-                break
-                
-        # Signal the end of streaming if queue still exists
-        if session_id in audio_stream_queues:
-            # Add an empty chunk as a sentinel to signal end of streaming
-            audio_stream_queues[session_id].put(None)
+            # Send chunk to client
+            socketio.emit('ai_stream_data', {
+                'audio': audio_b64,
+                'is_last': i + chunk_size >= len(audio)
+            }, room=session_id)
+            
+            # Simulate real-time speech by adding a small delay
+            # Remove this in production for faster response
+            time.sleep(0.15)  # Slight delay for more natural timing
+        
+        # Signal end of stream
+        if session_id in active_audio_streams and active_audio_streams[session_id]['active']:
+            socketio.emit('ai_stream_end', {}, room=session_id)
+            active_audio_streams[session_id]['active'] = False
     
     except Exception as e:
         print(f"Error generating or streaming audio: {e}")
         # Send error message to client
-        if session_id in conversation_context:
+        if session_id in conversation_context and conversation_context[session_id]['active_session']:
             socketio.emit('error', {
                 'message': f'Error generating audio: {str(e)}'
             }, room=session_id)
             
-            # Send a final message to unblock the client
-            if session_id in audio_stream_queues:
-                audio_stream_queues[session_id].put(None)
-
-@socketio.on('request_audio_chunk')
-def handle_request_audio_chunk():
-    """Send the next audio chunk in the queue to the client"""
-    session_id = request.sid
-    
-    if session_id not in audio_stream_queues:
-        emit('error', {'message': 'No audio stream available'})
-        return
-    
-    # Get the next chunk or wait for it to be available
-    try:
-        if not audio_stream_queues[session_id].empty():
-            chunk = audio_stream_queues[session_id].get(block=False)
-            
-            # If chunk is None, we're done streaming
-            if chunk is None:
-                emit('end_streaming')
-                # Clean up the queue
-                if session_id in audio_stream_queues:
-                    del audio_stream_queues[session_id]
-            else:
-                emit('audio_chunk', chunk)
-        else:
-            # If the queue is empty but we're still generating, tell client to wait
-            emit('wait_for_chunk')
-    except Exception as e:
-        print(f"Error sending audio chunk: {e}")
-        emit('error', {'message': f'Error streaming audio: {str(e)}'})
+        # Signal stream end to unblock client
+        socketio.emit('ai_stream_end', {}, room=session_id)
+        if session_id in active_audio_streams:
+            active_audio_streams[session_id]['active'] = False
 
 if __name__ == '__main__':
     # Ensure the existing index.html file is in the correct location
@@ -500,10 +534,10 @@ if __name__ == '__main__':
     if os.path.exists('index.html') and not os.path.exists('templates/index.html'):
         os.rename('index.html', 'templates/index.html')
     
-    # Load models asynchronously before starting the server
+    # Load models before starting the server
     print("Starting model loading...")
     load_models()
     
-    # Start the server
+    # Start the server with eventlet for better WebSocket performance
     print("Starting Flask SocketIO server...")
     socketio.run(app, host='0.0.0.0', port=5000, debug=False)
\ No newline at end of file