HooHacks-12/Backend/index.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Sesame AI Voice Chat</title>
    <style>
        body {
            font-family: 'Arial', sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
            background-color: #f9f9f9;
        }
        .conversation {
            border: 1px solid #ddd;
            border-radius: 12px;
            padding: 20px;
            height: 400px;
            overflow-y: auto;
            margin-bottom: 20px;
            background-color: white;
            box-shadow: 0 2px 10px rgba(0,0,0,0.05);
        }
        .message {
            margin-bottom: 15px;
            padding: 12px;
            border-radius: 12px;
            max-width: 80%;
            line-height: 1.4;
        }
        .user {
            background-color: #e3f2fd;
            text-align: right;
            margin-left: auto;
            border-bottom-right-radius: 4px;
        }
        .ai {
            background-color: #f1f1f1;
            margin-right: auto;
            border-bottom-left-radius: 4px;
        }
        .system {
            background-color: #f8f9fa;
            font-style: italic;
            text-align: center;
            font-size: 0.9em;
            color: #666;
            padding: 8px;
            margin: 10px auto;
            max-width: 90%;
        }
        .controls {
            display: flex;
            gap: 15px;
            justify-content: center;
            align-items: center;
        }
        button {
            padding: 12px 24px;
            border-radius: 24px;
            border: none;
            background-color: #4CAF50;
            color: white;
            cursor: pointer;
            font-weight: bold;
            transition: all 0.2s ease;
            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
        }
        button:hover {
            background-color: #45a049;
            box-shadow: 0 4px 8px rgba(0,0,0,0.15);
        }
        .recording {
            background-color: #f44336;
            animation: pulse 1.5s infinite;
        }
        .processing {
            background-color: #FFA500;
        }
        select {
            padding: 10px;
            border-radius: 24px;
            border: 1px solid #ddd;
            background-color: white;
        }
        .transcript {
            font-style: italic;
            color: #666;
            margin-top: 5px;
        }
        @keyframes pulse {
            0% { opacity: 1; }
            50% { opacity: 0.7; }
            100% { opacity: 1; }
        }
        .status-indicator {
            display: flex;
            align-items: center;
            justify-content: center;
            margin-top: 10px;
            gap: 5px;
        }
        .status-dot {
            width: 10px;
            height: 10px;
            border-radius: 50%;
            background-color: #ccc;
        }
        .status-dot.active {
            background-color: #4CAF50;
        }
        .status-text {
            font-size: 0.9em;
            color: #666;
        }
        audio {
            width: 100%;
            margin-top: 5px;
        }
    </style>
</head>
<body>
    <h1>Sesame AI Voice Chat</h1>
    <div class="conversation" id="conversation"></div>

    <div class="controls">
        <select id="speakerSelect">
            <option value="0">Speaker 0</option>
            <option value="1">Speaker 1</option>
        </select>
        <button id="streamButton">Start Conversation</button>
        <button id="clearButton">Clear Chat</button>
    </div>

    <div class="status-indicator">
        <div class="status-dot" id="statusDot"></div>
        <div class="status-text" id="statusText">Not connected</div>
    </div>

    <script>
        // Variables
        let ws;
        let audioContext;
        let streamProcessor;
        let isStreaming = false;
        let isSpeaking = false;
        let silenceTimer = null;
        let energyWindow = [];
        const ENERGY_WINDOW_SIZE = 10;
        const CLIENT_SILENCE_THRESHOLD = 0.01;
        const CLIENT_SILENCE_DURATION_MS = 1000; // 1 second

        // DOM elements
        const conversationEl = document.getElementById('conversation');
        const speakerSelectEl = document.getElementById('speakerSelect');
        const streamButton = document.getElementById('streamButton');
        const clearButton = document.getElementById('clearButton');
        const statusDot = document.getElementById('statusDot');
        const statusText = document.getElementById('statusText');

        // Initialize on page load
        window.addEventListener('load', () => {
            connectWebSocket();
            setupAudioContext();

            // Event listeners
            streamButton.addEventListener('click', toggleStreaming);
            clearButton.addEventListener('click', clearConversation);
        });

        // Setup audio context for streaming
        function setupAudioContext() {
            try {
                audioContext = new (window.AudioContext || window.webkitAudioContext)();
                console.log('Audio context setup completed');
            } catch (err) {
                console.error('Error setting up audio context:', err);
                addSystemMessage(`Audio context error: ${err.message}`);
            }
        }

        // Connect to WebSocket server
        function connectWebSocket() {
            const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
            const wsUrl = `${wsProtocol}//${window.location.hostname}:8000/ws`;

            ws = new WebSocket(wsUrl);

            ws.onopen = () => {
                console.log('WebSocket connected');
                statusDot.classList.add('active');
                statusText.textContent = 'Connected';
                addSystemMessage('Connected to server');
            };

            ws.onmessage = (event) => {
                const response = JSON.parse(event.data);
                console.log('Received:', response);

                if (response.type === 'audio_response') {
                    // Play audio response
                    const audio = new Audio(response.audio);
                    audio.play();

                    // Add message to conversation
                    addAIMessage(response.text || 'AI response', response.audio);

                    // Reset to speaking state after AI response
                    if (isStreaming) {
                        streamButton.textContent = 'Listening...';
                        streamButton.style.backgroundColor = '#f44336'; // Back to red
                        streamButton.classList.add('recording');
                        isSpeaking = false; // Reset speaking state
                    }
                } else if (response.type === 'error') {
                    addSystemMessage(`Error: ${response.message}`);
                } else if (response.type === 'context_updated') {
                    addSystemMessage(response.message);
                } else if (response.type === 'streaming_status') {
                    addSystemMessage(`Streaming ${response.status}`);
                } else if (response.type === 'transcription') {
                    addUserTranscription(response.text);
                }
            };

            ws.onclose = () => {
                console.log('WebSocket disconnected');
                statusDot.classList.remove('active');
                statusText.textContent = 'Disconnected';
                addSystemMessage('Disconnected from server. Reconnecting...');
                setTimeout(connectWebSocket, 3000);
            };

            ws.onerror = (error) => {
                console.error('WebSocket error:', error);
                statusDot.classList.remove('active');
                statusText.textContent = 'Error';
                addSystemMessage('Connection error');
            };
        }

        // Toggle streaming
        function toggleStreaming() {
            if (isStreaming) {
                stopStreaming();
            } else {
                startStreaming();
            }
        }

        // Start streaming
        async function startStreaming() {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                const speaker = parseInt(speakerSelectEl.value);

                isStreaming = true;
                isSpeaking = false;
                energyWindow = [];

                streamButton.textContent = 'Listening...';
                streamButton.classList.add('recording');

                // Create audio processor node
                const source = audioContext.createMediaStreamSource(stream);
                streamProcessor = audioContext.createScriptProcessor(4096, 1, 1);

                // Process and send audio data
                streamProcessor.onaudioprocess = function(e) {
                    const audioData = e.inputBuffer.getChannelData(0);

                    // Calculate energy (volume) for silence detection
                    const energy = calculateAudioEnergy(audioData);
                    updateEnergyWindow(energy);

                    // Check if currently silent
                    const avgEnergy = calculateAverageEnergy();
                    const isSilent = avgEnergy < CLIENT_SILENCE_THRESHOLD;

                    // Handle silence/speech transitions for visual feedback
                    handleSpeechState(isSilent);

                    // Continue processing audio regardless of silence state
                    const downsampled = downsampleBuffer(audioData, audioContext.sampleRate, 24000);
                    sendAudioChunk(downsampled, speaker);
                };

                // Connect the nodes
                source.connect(streamProcessor);
                streamProcessor.connect(audioContext.destination);

                addSystemMessage('Listening - speak naturally and pause when finished');

            } catch (err) {
                console.error('Error starting audio stream:', err);
                addSystemMessage(`Microphone error: ${err.message}`);
                isStreaming = false;
                streamButton.textContent = 'Start Conversation';
                streamButton.classList.remove('recording');
            }
        }

        // Calculate audio energy (volume)
        function calculateAudioEnergy(buffer) {
            let sum = 0;
            for (let i = 0; i < buffer.length; i++) {
                sum += Math.abs(buffer[i]);
            }
            return sum / buffer.length;
        }

        // Update the sliding energy window
        function updateEnergyWindow(energy) {
            energyWindow.push(energy);
            if (energyWindow.length > ENERGY_WINDOW_SIZE) {
                energyWindow.shift();
            }
        }

        // Calculate average energy from the window
        function calculateAverageEnergy() {
            if (energyWindow.length === 0) return 0;
            return energyWindow.reduce((sum, val) => sum + val, 0) / energyWindow.length;
        }

        // Handle speech state changes and visual feedback
        function handleSpeechState(isSilent) {
            if (isSpeaking && isSilent) {
                // Transition from speaking to silence
                if (!silenceTimer) {
                    silenceTimer = setTimeout(() => {
                        // Silence persisted long enough
                        streamButton.textContent = 'Processing...';
                        streamButton.classList.remove('recording');
                        streamButton.classList.add('processing');
                        addSystemMessage('Detected pause in speech, processing response...');
                    }, CLIENT_SILENCE_DURATION_MS);
                }
            } else if (!isSpeaking && !isSilent) {
                // Transition from silence to speaking
                isSpeaking = true;
                streamButton.textContent = 'Listening...';
                streamButton.classList.add('recording');
                streamButton.classList.remove('processing');

                // Clear any pending silence timer
                if (silenceTimer) {
                    clearTimeout(silenceTimer);
                    silenceTimer = null;
                }
            } else if (isSpeaking && !isSilent) {
                // Still speaking, reset any silence timer
                if (silenceTimer) {
                    clearTimeout(silenceTimer);
                    silenceTimer = null;
                }
            }

            // Update speaking state
            if (!isSilent) {
                isSpeaking = true;
            }
        }

        // Send audio chunk to server
        function sendAudioChunk(audioData, speaker) {
            const wavData = createWavBlob(audioData, 24000);
            const reader = new FileReader();

            reader.onloadend = function() {
                const base64data = reader.result;

                // Send to server
                ws.send(JSON.stringify({
                    action: 'stream_audio',
                    speaker: speaker,
                    audio: base64data
                }));
            };

            reader.readAsDataURL(wavData);
        }

        // Stop streaming
        function stopStreaming() {
            if (streamProcessor) {
                streamProcessor.disconnect();
                streamProcessor = null;
            }

            // Clear any pending silence timer
            if (silenceTimer) {
                clearTimeout(silenceTimer);
                silenceTimer = null;
            }

            isStreaming = false;
            isSpeaking = false;
            energyWindow = [];

            streamButton.textContent = 'Start Conversation';
            streamButton.classList.remove('recording', 'processing');
            streamButton.style.backgroundColor = ''; // Reset to default

            addSystemMessage('Conversation paused');

            // Send stop streaming signal to server
            ws.send(JSON.stringify({
                action: 'stop_streaming',
                speaker: parseInt(speakerSelectEl.value)
            }));
        }

        // Clear conversation
        function clearConversation() {
            // Clear conversation history
            ws.send(JSON.stringify({
                action: 'clear_context'
            }));

            // Clear the UI
            conversationEl.innerHTML = '';
            addSystemMessage('Conversation cleared');
        }

        // Downsample audio buffer to target sample rate
        function downsampleBuffer(buffer, sampleRate, targetSampleRate) {
            if (targetSampleRate === sampleRate) {
                return buffer;
            }

            const sampleRateRatio = sampleRate / targetSampleRate;
            const newLength = Math.round(buffer.length / sampleRateRatio);
            const result = new Float32Array(newLength);

            let offsetResult = 0;
            let offsetBuffer = 0;

            while (offsetResult < result.length) {
                const nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
                let accum = 0, count = 0;

                for (let i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
                    accum += buffer[i];
                    count++;
                }

                result[offsetResult] = accum / count;
                offsetResult++;
                offsetBuffer = nextOffsetBuffer;
            }

            return result;
        }

        // Create WAV blob from Float32Array
        function createWavBlob(samples, sampleRate) {
            const buffer = new ArrayBuffer(44 + samples.length * 2);
            const view = new DataView(buffer);

            // RIFF chunk descriptor
            writeString(view, 0, 'RIFF');
            view.setUint32(4, 36 + samples.length * 2, true);
            writeString(view, 8, 'WAVE');

            // fmt sub-chunk
            writeString(view, 12, 'fmt ');
            view.setUint32(16, 16, true);
            view.setUint16(20, 1, true);     // PCM format
            view.setUint16(22, 1, true);     // Mono channel
            view.setUint32(24, sampleRate, true);
            view.setUint32(28, sampleRate * 2, true);
            view.setUint16(32, 2, true);
            view.setUint16(34, 16, true);

            // data sub-chunk
            writeString(view, 36, 'data');
            view.setUint32(40, samples.length * 2, true);

            // Write the PCM samples
            const volume = 0.5;
            for (let i = 0; i < samples.length; i++) {
                const sample = Math.max(-1, Math.min(1, samples[i]));
                view.setInt16(44 + i * 2, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
            }

            return new Blob([buffer], { type: 'audio/wav' });
        }

        function writeString(view, offset, string) {
            for (let i = 0; i < string.length; i++) {
                view.setUint8(offset + i, string.charCodeAt(i));
            }
        }

        // Message display functions
        function addUserTranscription(text) {
            // Find if there's already a pending user message
            let pendingMessage = document.querySelector('.message.user.pending');

            if (!pendingMessage) {
                // Create a new message
                pendingMessage = document.createElement('div');
                pendingMessage.classList.add('message', 'user', 'pending');
                conversationEl.appendChild(pendingMessage);
            }

            pendingMessage.textContent = text;
            pendingMessage.classList.remove('pending');
            conversationEl.scrollTop = conversationEl.scrollHeight;
        }

        function addAIMessage(text, audioSrc) {
            const messageEl = document.createElement('div');
            messageEl.classList.add('message', 'ai');

            if (text) {
                const textDiv = document.createElement('div');
                textDiv.textContent = text;
                messageEl.appendChild(textDiv);
            }

            const audioEl = document.createElement('audio');
            audioEl.controls = true;
            audioEl.src = audioSrc;
            messageEl.appendChild(audioEl);

            conversationEl.appendChild(messageEl);
            conversationEl.scrollTop = conversationEl.scrollHeight;
        }

        function addSystemMessage(text) {
            const messageEl = document.createElement('div');
            messageEl.classList.add('message', 'system');
            messageEl.textContent = text;
            conversationEl.appendChild(messageEl);
            conversationEl.scrollTop = conversationEl.scrollHeight;
        }
    </script>
</body>
</html>