/**
* Sesame AI Voice Chat Application
*
* This script handles the audio streaming, visualization,
* and Socket.IO communication for the voice chat application.
*/
// Application state
const state = {
socket: null,
audioContext: null,
streamProcessor: null,
analyser: null,
microphone: null,
isStreaming: false,
isSpeaking: false,
silenceTimer: null,
energyWindow: [],
currentSpeaker: 0,
silenceThreshold: 0.01,
visualizerAnimationFrame: null,
volumeUpdateInterval: null,
connectionAttempts: 0
};
// Constants
const ENERGY_WINDOW_SIZE = 10;
const CLIENT_SILENCE_DURATION_MS = 1000; // 1 second of silence before processing
const MAX_CONNECTION_ATTEMPTS = 5;
const RECONNECTION_DELAY_MS = 2000;
// DOM elements
const elements = {
conversation: document.getElementById('conversation'),
speakerSelect: document.getElementById('speakerSelect'),
streamButton: document.getElementById('streamButton'),
clearButton: document.getElementById('clearButton'),
statusDot: document.getElementById('statusDot'),
statusText: document.getElementById('statusText'),
visualizerCanvas: document.getElementById('audioVisualizer'),
visualizerLabel: document.getElementById('visualizerLabel'),
thresholdSlider: document.getElementById('thresholdSlider'),
thresholdValue: document.getElementById('thresholdValue'),
volumeLevel: document.getElementById('volumeLevel'),
autoPlayResponses: document.getElementById('autoPlayResponses'),
showVisualizer: document.getElementById('showVisualizer')
};
// Visualization variables
let canvasContext;
let visualizerBufferLength;
let visualizerDataArray;
// Initialize the application
function initializeApp() {
// Set up event listeners
elements.streamButton.addEventListener('click', toggleStreaming);
elements.clearButton.addEventListener('click', clearConversation);
elements.thresholdSlider.addEventListener('input', updateThreshold);
elements.speakerSelect.addEventListener('change', () => {
state.currentSpeaker = parseInt(elements.speakerSelect.value);
});
elements.showVisualizer.addEventListener('change', toggleVisualizerVisibility);
// Initialize audio context
setupAudioContext();
// Set up visualization
setupVisualizer();
// Connect to Socket.IO server
connectToServer();
// Add welcome message
addSystemMessage('Welcome to Sesame AI Voice Chat! Click "Start Conversation" to begin speaking.');
}
// Connect to Socket.IO server
function connectToServer() {
try {
// Use the server URL with or without a specific port
const serverUrl = window.location.origin;
updateStatus('Connecting...', 'connecting');
console.log(`Connecting to Socket.IO server at ${serverUrl}`);
state.socket = io(serverUrl, {
reconnectionDelay: RECONNECTION_DELAY_MS,
reconnectionDelayMax: 5000,
reconnectionAttempts: MAX_CONNECTION_ATTEMPTS
});
setupSocketListeners();
} catch (error) {
console.error('Error connecting to server:', error);
updateStatus('Connection failed. Retrying...', 'error');
// Try to reconnect
if (state.connectionAttempts < MAX_CONNECTION_ATTEMPTS) {
state.connectionAttempts++;
setTimeout(connectToServer, RECONNECTION_DELAY_MS);
} else {
updateStatus('Could not connect to server', 'error');
addSystemMessage('Failed to connect to the server. Please check your connection and refresh the page.');
}
}
}
// Set up Socket.IO event listeners
function setupSocketListeners() {
if (!state.socket) return;
state.socket.on('connect', () => {
console.log('Connected to Socket.IO server');
updateStatus('Connected', 'connected');
state.connectionAttempts = 0;
elements.streamButton.disabled = false;
addSystemMessage('Connected to server');
});
state.socket.on('disconnect', () => {
console.log('Disconnected from Socket.IO server');
updateStatus('Disconnected', 'disconnected');
// Stop streaming if active
if (state.isStreaming) {
stopStreaming(false); // false = don't send to server
}
elements.streamButton.disabled = true;
addSystemMessage('Disconnected from server. Trying to reconnect...');
});
state.socket.on('status', (data) => {
console.log('Status:', data);
addSystemMessage(data.message);
});
state.socket.on('error', (data) => {
console.error('Server error:', data);
addSystemMessage(`Error: ${data.message}`);
});
state.socket.on('audio_response', handleAudioResponse);
state.socket.on('transcription', handleTranscription);
state.socket.on('context_updated', handleContextUpdate);
state.socket.on('streaming_status', handleStreamingStatus);
state.socket.on('connect_error', (error) => {
console.error('Connection error:', error);
updateStatus('Connection Error', 'error');
});
}
// Update the connection status in the UI
function updateStatus(message, status) {
elements.statusText.textContent = message;
elements.statusDot.className = 'status-dot';
if (status === 'connected') {
elements.statusDot.classList.add('active');
} else if (status === 'connecting') {
elements.statusDot.style.backgroundColor = '#FFA500';
} else if (status === 'error') {
elements.statusDot.style.backgroundColor = '#F44336';
}
}
// Set up audio context
function setupAudioContext() {
try {
state.audioContext = new (window.AudioContext || window.webkitAudioContext)();
console.log('Audio context initialized');
} catch (err) {
console.error('Error setting up audio context:', err);
addSystemMessage(`Audio context error: ${err.message}`);
elements.streamButton.disabled = true;
}
}
// Set up audio visualizer
function setupVisualizer() {
canvasContext = elements.visualizerCanvas.getContext('2d');
// Set canvas size to match container
function resizeCanvas() {
const container = elements.visualizerCanvas.parentElement;
elements.visualizerCanvas.width = container.clientWidth;
elements.visualizerCanvas.height = container.clientHeight;
}
// Call initially and on window resize
resizeCanvas();
window.addEventListener('resize', resizeCanvas);
// Create placeholder data array
visualizerBufferLength = 128;
visualizerDataArray = new Uint8Array(visualizerBufferLength);
}
// Toggle stream on/off
function toggleStreaming() {
if (state.isStreaming) {
stopStreaming(true); // true = send to server
} else {
startStreaming();
}
}
// Start streaming audio to the server
async function startStreaming() {
if (!state.socket || !state.socket.connected) {
addSystemMessage('Cannot start conversation: Not connected to server');
return;
}
try {
// Request microphone access
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// Update state
state.isStreaming = true;
state.isSpeaking = false;
state.energyWindow = [];
state.currentSpeaker = parseInt(elements.speakerSelect.value);
// Update UI
elements.streamButton.innerHTML = ' Listening...';
elements.streamButton.classList.add('recording');
elements.visualizerLabel.style.opacity = '0';
// Set up audio processing
setupAudioProcessing(stream);
// Start volume meter updates
state.volumeUpdateInterval = setInterval(updateVolumeMeter, 100);
addSystemMessage('Listening - speak naturally and pause when finished');
} catch (err) {
console.error('Error starting audio stream:', err);
addSystemMessage(`Microphone error: ${err.message}`);
cleanupAudioResources();
}
}
// Set up audio processing pipeline
function setupAudioProcessing(stream) {
// Store microphone stream for later cleanup
state.microphone = stream;
// Create source from microphone
const source = state.audioContext.createMediaStreamSource(stream);
// Setup analyzer for visualization
state.analyser = state.audioContext.createAnalyser();
state.analyser.fftSize = 256;
state.analyser.smoothingTimeConstant = 0.8;
state.analyser.minDecibels = -90;
state.analyser.maxDecibels = -10;
visualizerBufferLength = state.analyser.frequencyBinCount;
visualizerDataArray = new Uint8Array(visualizerBufferLength);
// Connect source to analyzer
source.connect(state.analyser);
// Start visualization
if (state.visualizerAnimationFrame) {
cancelAnimationFrame(state.visualizerAnimationFrame);
}
drawVisualizer();
// Setup audio processor
state.streamProcessor = state.audioContext.createScriptProcessor(4096, 1, 1);
// Connect audio nodes
source.connect(state.streamProcessor);
state.streamProcessor.connect(state.audioContext.destination);
// Process audio
state.streamProcessor.onaudioprocess = handleAudioProcess;
}
// Handle each frame of audio data
function handleAudioProcess(e) {
const audioData = e.inputBuffer.getChannelData(0);
// Calculate energy (volume) for silence detection
const energy = calculateAudioEnergy(audioData);
updateEnergyWindow(energy);
// Check if currently silent
const avgEnergy = calculateAverageEnergy();
const isSilent = avgEnergy < state.silenceThreshold;
// Handle silence/speech transitions
handleSpeechState(isSilent);
// Process and send audio
const downsampled = downsampleBuffer(audioData, state.audioContext.sampleRate, 24000);
sendAudioChunk(downsampled, state.currentSpeaker);
}
// Stop streaming audio
function stopStreaming(sendToServer = true) {
// Cleanup audio resources
cleanupAudioResources();
// Reset state
state.isStreaming = false;
state.isSpeaking = false;
state.energyWindow = [];
// Update UI
elements.streamButton.innerHTML = ' Start Conversation';
elements.streamButton.classList.remove('recording', 'processing');
elements.streamButton.style.backgroundColor = '';
elements.volumeLevel.style.width = '100%';
// Clear volume meter updates
if (state.volumeUpdateInterval) {
clearInterval(state.volumeUpdateInterval);
state.volumeUpdateInterval = null;
}
addSystemMessage('Conversation paused');
// Notify server
if (sendToServer && state.socket && state.socket.connected) {
state.socket.emit('stop_streaming', {
speaker: state.currentSpeaker
});
}
}
// Clean up audio processing resources
function cleanupAudioResources() {
// Stop microphone stream
if (state.microphone) {
state.microphone.getTracks().forEach(track => track.stop());
state.microphone = null;
}
// Disconnect audio processor
if (state.streamProcessor) {
state.streamProcessor.disconnect();
state.streamProcessor.onaudioprocess = null;
state.streamProcessor = null;
}
// Disconnect analyzer
if (state.analyser) {
state.analyser.disconnect();
state.analyser = null;
}
// Cancel visualizer animation
if (state.visualizerAnimationFrame) {
cancelAnimationFrame(state.visualizerAnimationFrame);
state.visualizerAnimationFrame = null;
}
// Cancel silence timer
if (state.silenceTimer) {
clearTimeout(state.silenceTimer);
state.silenceTimer = null;
}
// Reset visualizer display
if (canvasContext) {
canvasContext.clearRect(0, 0, elements.visualizerCanvas.width, elements.visualizerCanvas.height);
elements.visualizerLabel.style.opacity = '0.7';
}
}
// Clear conversation history
function clearConversation() {
// Clear UI
elements.conversation.innerHTML = '';
addSystemMessage('Conversation cleared');
// Notify server
if (state.socket && state.socket.connected) {
state.socket.emit('clear_context');
}
}
// Calculate audio energy (volume)
function calculateAudioEnergy(buffer) {
let sum = 0;
for (let i = 0; i < buffer.length; i++) {
sum += Math.abs(buffer[i]);
}
return sum / buffer.length;
}
// Update energy window for averaging
function updateEnergyWindow(energy) {
state.energyWindow.push(energy);
if (state.energyWindow.length > ENERGY_WINDOW_SIZE) {
state.energyWindow.shift();
}
}
// Calculate average energy from window
function calculateAverageEnergy() {
if (state.energyWindow.length === 0) return 0;
return state.energyWindow.reduce((sum, val) => sum + val, 0) / state.energyWindow.length;
}
// Update the threshold from the slider
function updateThreshold() {
state.silenceThreshold = parseFloat(elements.thresholdSlider.value);
elements.thresholdValue.textContent = state.silenceThreshold.toFixed(3);
}
// Update the volume meter display
function updateVolumeMeter() {
if (!state.isStreaming || !state.analyser) return;
// Get current volume level
const dataArray = new Uint8Array(state.analyser.frequencyBinCount);
state.analyser.getByteFrequencyData(dataArray);
// Calculate average volume
let sum = 0;
for (let i = 0; i < dataArray.length; i++) {
sum += dataArray[i];
}
const average = sum / dataArray.length;
// Normalize to 0-100%
const percentage = Math.min(100, Math.max(0, average / 128 * 100));
// Invert because we're showing the "empty" portion
elements.volumeLevel.style.width = (100 - percentage) + '%';
// Change color based on level
if (percentage > 70) {
elements.volumeLevel.style.backgroundColor = 'rgba(244, 67, 54, 0.5)'; // Red
} else if (percentage > 30) {
elements.volumeLevel.style.backgroundColor = 'rgba(255, 235, 59, 0.5)'; // Yellow
} else {
elements.volumeLevel.style.backgroundColor = 'rgba(0, 0, 0, 0.5)'; // Dark
}
}
// Handle speech/silence state transitions
function handleSpeechState(isSilent) {
if (state.isSpeaking && isSilent) {
// Transition from speaking to silence
if (!state.silenceTimer) {
state.silenceTimer = setTimeout(() => {
// Silence persisted long enough - process the audio
elements.streamButton.innerHTML = ' Processing...';
elements.streamButton.classList.remove('recording');
elements.streamButton.classList.add('processing');
addSystemMessage('Detected pause in speech, processing response...');
}, CLIENT_SILENCE_DURATION_MS);
}
} else if (!state.isSpeaking && !isSilent) {
// Transition from silence to speaking
state.isSpeaking = true;
elements.streamButton.innerHTML = ' Listening...';
elements.streamButton.classList.add('recording');
elements.streamButton.classList.remove('processing');
// Clear silence timer
if (state.silenceTimer) {
clearTimeout(state.silenceTimer);
state.silenceTimer = null;
}
} else if (state.isSpeaking && !isSilent) {
// Still speaking, reset silence timer
if (state.silenceTimer) {
clearTimeout(state.silenceTimer);
state.silenceTimer = null;
}
}
// Update speaking state for non-silent audio
if (!isSilent) {
state.isSpeaking = true;
}
}
// Send audio chunk to server
function sendAudioChunk(audioData, speaker) {
if (!state.socket || !state.socket.connected) {
console.warn('Cannot send audio: socket not connected');
return;
}
const wavData = createWavBlob(audioData, 24000);
const reader = new FileReader();
reader.onloadend = function() {
const base64data = reader.result;
// Send to server using Socket.IO
state.socket.emit('stream_audio', {
speaker: speaker,
audio: base64data
});
};
reader.readAsDataURL(wavData);
}
// Draw audio visualizer
function drawVisualizer() {
if (!canvasContext) {
return;
}
state.visualizerAnimationFrame = requestAnimationFrame(drawVisualizer);
// Skip drawing if visualizer is hidden
if (!elements.showVisualizer.checked) {
if (elements.visualizerCanvas.style.opacity !== '0') {
elements.visualizerCanvas.style.opacity = '0';
}
return;
} else if (elements.visualizerCanvas.style.opacity !== '1') {
elements.visualizerCanvas.style.opacity = '1';
}
// Get frequency data if available
if (state.isStreaming && state.analyser) {
try {
state.analyser.getByteFrequencyData(visualizerDataArray);
} catch (e) {
console.error("Error getting frequency data:", e);
}
} else {
// Fade out when not streaming
for (let i = 0; i < visualizerDataArray.length; i++) {
visualizerDataArray[i] = Math.max(0, visualizerDataArray[i] - 5);
}
}
// Clear canvas
canvasContext.fillStyle = 'rgb(0, 0, 0)';
canvasContext.fillRect(0, 0, elements.visualizerCanvas.width, elements.visualizerCanvas.height);
// Draw gradient bars
const width = elements.visualizerCanvas.width;
const height = elements.visualizerCanvas.height;
const barCount = Math.min(visualizerBufferLength, 64);
const barWidth = width / barCount - 1;
for (let i = 0; i < barCount; i++) {
const index = Math.floor(i * visualizerBufferLength / barCount);
const value = visualizerDataArray[index];
// Use logarithmic scale for better audio visualization
// This makes low values more visible while still maintaining full range
const logFactor = 20;
const scaledValue = Math.log(1 + (value / 255) * logFactor) / Math.log(1 + logFactor);
const barHeight = scaledValue * height;
// Position bars
const x = i * (barWidth + 1);
const y = height - barHeight;
// Create color gradient based on frequency and amplitude
const hue = i / barCount * 360; // Full color spectrum
const saturation = 80 + (value / 255 * 20); // Higher values more saturated
const lightness = 40 + (value / 255 * 20); // Dynamic brightness based on amplitude
// Draw main bar
canvasContext.fillStyle = `hsl(${hue}, ${saturation}%, ${lightness}%)`;
canvasContext.fillRect(x, y, barWidth, barHeight);
// Add reflection effect
if (barHeight > 5) {
const gradient = canvasContext.createLinearGradient(
x, y,
x, y + barHeight * 0.5
);
gradient.addColorStop(0, `hsla(${hue}, ${saturation}%, ${lightness + 20}%, 0.4)`);
gradient.addColorStop(1, `hsla(${hue}, ${saturation}%, ${lightness}%, 0)`);
canvasContext.fillStyle = gradient;
canvasContext.fillRect(x, y, barWidth, barHeight * 0.5);
// Add highlight on top of the bar for better 3D effect
canvasContext.fillStyle = `hsla(${hue}, ${saturation - 20}%, ${lightness + 30}%, 0.7)`;
canvasContext.fillRect(x, y, barWidth, 2);
}
}
// Show/hide the label
elements.visualizerLabel.style.opacity = (state.isStreaming) ? '0' : '0.7';
}
// Toggle visualizer visibility
function toggleVisualizerVisibility() {
const isVisible = elements.showVisualizer.checked;
elements.visualizerCanvas.style.opacity = isVisible ? '1' : '0';
if (isVisible && state.isStreaming && !state.visualizerAnimationFrame) {
drawVisualizer();
}
}
// Handle audio response from server
function handleAudioResponse(data) {
console.log('Received audio response');
// Create message container
const messageElement = document.createElement('div');
messageElement.className = 'message ai';
// Add text content if available
if (data.text) {
const textElement = document.createElement('p');
textElement.textContent = data.text;
messageElement.appendChild(textElement);
}
// Create and configure audio element
const audioElement = document.createElement('audio');
audioElement.controls = true;
audioElement.className = 'audio-player';
// Set audio source
const audioSource = document.createElement('source');
audioSource.src = data.audio;
audioSource.type = 'audio/wav';
// Add fallback text
audioElement.textContent = 'Your browser does not support the audio element.';
// Assemble audio element
audioElement.appendChild(audioSource);
messageElement.appendChild(audioElement);
// Add timestamp
const timeElement = document.createElement('span');
timeElement.className = 'message-time';
timeElement.textContent = new Date().toLocaleTimeString();
messageElement.appendChild(timeElement);
// Add to conversation
elements.conversation.appendChild(messageElement);
// Auto-scroll to bottom
elements.conversation.scrollTop = elements.conversation.scrollHeight;
// Auto-play if enabled
if (elements.autoPlayResponses.checked) {
audioElement.play()
.catch(err => {
console.warn('Auto-play failed:', err);
addSystemMessage('Auto-play failed. Please click play to hear the response.');
});
}
// Re-enable stream button after processing is complete
if (state.isStreaming) {
elements.streamButton.innerHTML = ' Listening...';
elements.streamButton.classList.add('recording');
elements.streamButton.classList.remove('processing');
}
}
// Handle transcription response from server
function handleTranscription(data) {
console.log('Received transcription:', data.text);
// Create message element
const messageElement = document.createElement('div');
messageElement.className = 'message user';
// Add text content
const textElement = document.createElement('p');
textElement.textContent = data.text;
messageElement.appendChild(textElement);
// Add timestamp
const timeElement = document.createElement('span');
timeElement.className = 'message-time';
timeElement.textContent = new Date().toLocaleTimeString();
messageElement.appendChild(timeElement);
// Add to conversation
elements.conversation.appendChild(messageElement);
// Auto-scroll to bottom
elements.conversation.scrollTop = elements.conversation.scrollHeight;
}
// Handle context update from server
function handleContextUpdate(data) {
console.log('Context updated:', data.message);
}
// Handle streaming status updates from server
function handleStreamingStatus(data) {
console.log('Streaming status:', data.status);
if (data.status === 'stopped') {
// Reset UI if needed
if (state.isStreaming) {
stopStreaming(false); // Don't send to server since this came from server
}
}
}
// Add a system message to the conversation
function addSystemMessage(message) {
const messageElement = document.createElement('div');
messageElement.className = 'message system';
messageElement.textContent = message;
elements.conversation.appendChild(messageElement);
// Auto-scroll to bottom
elements.conversation.scrollTop = elements.conversation.scrollHeight;
}
// Create WAV blob from audio data
function createWavBlob(audioData, sampleRate) {
// Function to convert Float32Array to Int16Array for WAV format
function floatTo16BitPCM(output, offset, input) {
for (let i = 0; i < input.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
// Create WAV header
function writeString(view, offset, string) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
// Create WAV file with header
function encodeWAV(samples) {
const buffer = new ArrayBuffer(44 + samples.length * 2);
const view = new DataView(buffer);
// RIFF chunk descriptor
writeString(view, 0, 'RIFF');
view.setUint32(4, 36 + samples.length * 2, true);
writeString(view, 8, 'WAVE');
// fmt sub-chunk
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true); // PCM format
view.setUint16(22, 1, true); // Mono channel
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * 2, true); // Byte rate
view.setUint16(32, 2, true); // Block align
view.setUint16(34, 16, true); // Bits per sample
// data sub-chunk
writeString(view, 36, 'data');
view.setUint32(40, samples.length * 2, true);
floatTo16BitPCM(view, 44, samples);
return buffer;
}
// Convert audio data to TypedArray if it's a regular Array
const samples = Array.isArray(audioData) ? new Float32Array(audioData) : audioData;
// Create WAV blob
const wavBuffer = encodeWAV(samples);
return new Blob([wavBuffer], { type: 'audio/wav' });
}
// Downsample audio buffer to target sample rate
function downsampleBuffer(buffer, originalSampleRate, targetSampleRate) {
if (originalSampleRate === targetSampleRate) {
return buffer;
}
const ratio = originalSampleRate / targetSampleRate;
const newLength = Math.round(buffer.length / ratio);
const result = new Float32Array(newLength);
for (let i = 0; i < newLength; i++) {
const pos = Math.round(i * ratio);
result[i] = buffer[pos];
}
return result;
}
// Initialize the application when DOM is fully loaded
document.addEventListener('DOMContentLoaded', initializeApp);