Demo Update 2
This commit is contained in:
@@ -487,6 +487,6 @@
|
|||||||
</footer>
|
</footer>
|
||||||
|
|
||||||
<!-- Load our JavaScript file -->
|
<!-- Load our JavaScript file -->
|
||||||
<script src="voice-chat.js"></script>
|
<script src="./voice-chat.js"></script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
@@ -1,388 +1,445 @@
|
|||||||
/**
|
/**
|
||||||
* Sesame AI Voice Chat Application
|
* Sesame AI Voice Chat Client
|
||||||
*
|
*
|
||||||
* This script handles the audio streaming, visualization,
|
* A web client that connects to a Sesame AI voice chat server and enables
|
||||||
* and Socket.IO communication for the voice chat application.
|
* real-time voice conversation with an AI assistant.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// Configuration constants
|
||||||
|
const SERVER_URL = window.location.hostname === 'localhost' ?
|
||||||
|
'http://localhost:5000' : window.location.origin;
|
||||||
|
const ENERGY_WINDOW_SIZE = 15;
|
||||||
|
const CLIENT_SILENCE_DURATION_MS = 750;
|
||||||
|
|
||||||
|
// DOM elements
|
||||||
|
const elements = {
|
||||||
|
conversation: null,
|
||||||
|
streamButton: null,
|
||||||
|
clearButton: null,
|
||||||
|
thresholdSlider: null,
|
||||||
|
thresholdValue: null,
|
||||||
|
visualizerCanvas: null,
|
||||||
|
visualizerLabel: null,
|
||||||
|
volumeLevel: null,
|
||||||
|
statusDot: null,
|
||||||
|
statusText: null,
|
||||||
|
speakerSelection: null,
|
||||||
|
autoPlayResponses: null,
|
||||||
|
showVisualizer: null
|
||||||
|
};
|
||||||
|
|
||||||
// Application state
|
// Application state
|
||||||
const state = {
|
const state = {
|
||||||
socket: null,
|
socket: null,
|
||||||
audioContext: null,
|
audioContext: null,
|
||||||
streamProcessor: null,
|
|
||||||
analyser: null,
|
analyser: null,
|
||||||
microphone: null,
|
microphone: null,
|
||||||
|
streamProcessor: null,
|
||||||
isStreaming: false,
|
isStreaming: false,
|
||||||
isSpeaking: false,
|
isSpeaking: false,
|
||||||
silenceTimer: null,
|
|
||||||
energyWindow: [],
|
|
||||||
currentSpeaker: 0,
|
|
||||||
silenceThreshold: 0.01,
|
silenceThreshold: 0.01,
|
||||||
visualizerAnimationFrame: null,
|
energyWindow: [],
|
||||||
|
silenceTimer: null,
|
||||||
volumeUpdateInterval: null,
|
volumeUpdateInterval: null,
|
||||||
connectionAttempts: 0
|
visualizerAnimationFrame: null,
|
||||||
|
currentSpeaker: 0
|
||||||
};
|
};
|
||||||
|
|
||||||
// Constants
|
// Visualizer variables
|
||||||
const ENERGY_WINDOW_SIZE = 10;
|
let canvasContext = null;
|
||||||
const CLIENT_SILENCE_DURATION_MS = 1000; // 1 second of silence before processing
|
let visualizerBufferLength = 0;
|
||||||
const MAX_CONNECTION_ATTEMPTS = 5;
|
let visualizerDataArray = null;
|
||||||
const RECONNECTION_DELAY_MS = 2000;
|
|
||||||
|
|
||||||
// DOM elements
|
|
||||||
const elements = {
|
|
||||||
conversation: document.getElementById('conversation'),
|
|
||||||
speakerSelect: document.getElementById('speakerSelect'),
|
|
||||||
streamButton: document.getElementById('streamButton'),
|
|
||||||
clearButton: document.getElementById('clearButton'),
|
|
||||||
statusDot: document.getElementById('statusDot'),
|
|
||||||
statusText: document.getElementById('statusText'),
|
|
||||||
visualizerCanvas: document.getElementById('audioVisualizer'),
|
|
||||||
visualizerLabel: document.getElementById('visualizerLabel'),
|
|
||||||
thresholdSlider: document.getElementById('thresholdSlider'),
|
|
||||||
thresholdValue: document.getElementById('thresholdValue'),
|
|
||||||
volumeLevel: document.getElementById('volumeLevel'),
|
|
||||||
autoPlayResponses: document.getElementById('autoPlayResponses'),
|
|
||||||
showVisualizer: document.getElementById('showVisualizer')
|
|
||||||
};
|
|
||||||
|
|
||||||
// Visualization variables
|
|
||||||
let canvasContext;
|
|
||||||
let visualizerBufferLength;
|
|
||||||
let visualizerDataArray;
|
|
||||||
|
|
||||||
// Initialize the application
|
// Initialize the application
|
||||||
function initializeApp() {
|
function initializeApp() {
|
||||||
// Set up event listeners
|
// Initialize the UI elements
|
||||||
elements.streamButton.addEventListener('click', toggleStreaming);
|
initializeUIElements();
|
||||||
elements.clearButton.addEventListener('click', clearConversation);
|
|
||||||
elements.thresholdSlider.addEventListener('input', updateThreshold);
|
|
||||||
elements.speakerSelect.addEventListener('change', () => {
|
|
||||||
state.currentSpeaker = parseInt(elements.speakerSelect.value);
|
|
||||||
});
|
|
||||||
elements.showVisualizer.addEventListener('change', toggleVisualizerVisibility);
|
|
||||||
|
|
||||||
// Initialize audio context
|
// Initialize socket.io connection
|
||||||
setupAudioContext();
|
setupSocketConnection();
|
||||||
|
|
||||||
// Set up visualization
|
// Setup event listeners
|
||||||
|
setupEventListeners();
|
||||||
|
|
||||||
|
// Initialize visualizer
|
||||||
setupVisualizer();
|
setupVisualizer();
|
||||||
|
|
||||||
// Connect to Socket.IO server
|
// Show welcome message
|
||||||
connectToServer();
|
addSystemMessage('Welcome to Sesame AI Voice Chat! Click "Start Conversation" to begin.');
|
||||||
|
|
||||||
// Add welcome message
|
|
||||||
addSystemMessage('Welcome to Sesame AI Voice Chat! Click "Start Conversation" to begin speaking.');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connect to Socket.IO server
|
// Initialize UI elements
|
||||||
function connectToServer() {
|
function initializeUIElements() {
|
||||||
try {
|
// Main UI containers
|
||||||
// Use the server URL with or without a specific port
|
const chatContainer = document.querySelector('.chat-container');
|
||||||
const serverUrl = window.location.origin;
|
const controlPanel = document.querySelector('.control-panel');
|
||||||
|
|
||||||
updateStatus('Connecting...', 'connecting');
|
// Create conversation section
|
||||||
console.log(`Connecting to Socket.IO server at ${serverUrl}`);
|
chatContainer.innerHTML = `
|
||||||
|
<div class="chat-header">
|
||||||
|
<h2>Conversation</h2>
|
||||||
|
<div class="status-indicator">
|
||||||
|
<div class="status-dot"></div>
|
||||||
|
<span class="status-text">Disconnected</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="conversation"></div>
|
||||||
|
`;
|
||||||
|
|
||||||
state.socket = io(serverUrl, {
|
// Create control panel
|
||||||
reconnectionDelay: RECONNECTION_DELAY_MS,
|
controlPanel.innerHTML = `
|
||||||
reconnectionDelayMax: 5000,
|
<div class="visualizer-section">
|
||||||
reconnectionAttempts: MAX_CONNECTION_ATTEMPTS
|
<div class="visualizer-container">
|
||||||
});
|
<canvas id="audioVisualizer"></canvas>
|
||||||
|
<div class="visualizer-label">Speak to see audio visualization</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
setupSocketListeners();
|
<div class="controls">
|
||||||
} catch (error) {
|
<div class="control-group">
|
||||||
console.error('Error connecting to server:', error);
|
<div class="control-label">Voice Controls</div>
|
||||||
updateStatus('Connection failed. Retrying...', 'error');
|
|
||||||
|
|
||||||
// Try to reconnect
|
<div class="volume-indicator">
|
||||||
if (state.connectionAttempts < MAX_CONNECTION_ATTEMPTS) {
|
<div class="volume-level" style="width:0%"></div>
|
||||||
state.connectionAttempts++;
|
</div>
|
||||||
setTimeout(connectToServer, RECONNECTION_DELAY_MS);
|
|
||||||
} else {
|
<div class="slider-container">
|
||||||
updateStatus('Could not connect to server', 'error');
|
<div class="slider-label">
|
||||||
addSystemMessage('Failed to connect to the server. Please check your connection and refresh the page.');
|
<span>Silence Threshold</span>
|
||||||
}
|
<span id="thresholdValue">0.01</span>
|
||||||
}
|
</div>
|
||||||
|
<input type="range" id="thresholdSlider" min="0.001" max="0.05" step="0.001" value="0.01">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<select id="speakerSelection">
|
||||||
|
<option value="0">Speaker 1 (You)</option>
|
||||||
|
<option value="1">Speaker 2 (Alternative)</option>
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<div class="button-row">
|
||||||
|
<button id="streamButton"><i class="fas fa-microphone"></i> Start Conversation</button>
|
||||||
|
<button id="clearButton"><i class="fas fa-trash"></i> Clear</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="control-group settings-panel">
|
||||||
|
<div class="control-label">Settings</div>
|
||||||
|
|
||||||
|
<div class="settings-toggles">
|
||||||
|
<div class="toggle-switch">
|
||||||
|
<input type="checkbox" id="autoPlayResponses" checked>
|
||||||
|
<label for="autoPlayResponses">Auto-play AI responses</label>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="toggle-switch">
|
||||||
|
<input type="checkbox" id="showVisualizer" checked>
|
||||||
|
<label for="showVisualizer">Show audio visualizer</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Store references to UI elements
|
||||||
|
elements.conversation = document.querySelector('.conversation');
|
||||||
|
elements.streamButton = document.getElementById('streamButton');
|
||||||
|
elements.clearButton = document.getElementById('clearButton');
|
||||||
|
elements.thresholdSlider = document.getElementById('thresholdSlider');
|
||||||
|
elements.thresholdValue = document.getElementById('thresholdValue');
|
||||||
|
elements.visualizerCanvas = document.getElementById('audioVisualizer');
|
||||||
|
elements.visualizerLabel = document.querySelector('.visualizer-label');
|
||||||
|
elements.volumeLevel = document.querySelector('.volume-level');
|
||||||
|
elements.statusDot = document.querySelector('.status-dot');
|
||||||
|
elements.statusText = document.querySelector('.status-text');
|
||||||
|
elements.speakerSelection = document.getElementById('speakerSelection');
|
||||||
|
elements.autoPlayResponses = document.getElementById('autoPlayResponses');
|
||||||
|
elements.showVisualizer = document.getElementById('showVisualizer');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set up Socket.IO event listeners
|
// Setup Socket.IO connection
|
||||||
function setupSocketListeners() {
|
function setupSocketConnection() {
|
||||||
if (!state.socket) return;
|
state.socket = io(SERVER_URL);
|
||||||
|
|
||||||
|
// Connection events
|
||||||
state.socket.on('connect', () => {
|
state.socket.on('connect', () => {
|
||||||
console.log('Connected to Socket.IO server');
|
console.log('Connected to server');
|
||||||
updateStatus('Connected', 'connected');
|
updateConnectionStatus(true);
|
||||||
state.connectionAttempts = 0;
|
|
||||||
elements.streamButton.disabled = false;
|
|
||||||
addSystemMessage('Connected to server');
|
|
||||||
});
|
});
|
||||||
|
|
||||||
state.socket.on('disconnect', () => {
|
state.socket.on('disconnect', () => {
|
||||||
console.log('Disconnected from Socket.IO server');
|
console.log('Disconnected from server');
|
||||||
updateStatus('Disconnected', 'disconnected');
|
updateConnectionStatus(false);
|
||||||
|
|
||||||
// Stop streaming if active
|
// Stop streaming if active
|
||||||
if (state.isStreaming) {
|
if (state.isStreaming) {
|
||||||
stopStreaming(false); // false = don't send to server
|
stopStreaming(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
elements.streamButton.disabled = true;
|
|
||||||
addSystemMessage('Disconnected from server. Trying to reconnect...');
|
|
||||||
});
|
|
||||||
|
|
||||||
state.socket.on('status', (data) => {
|
|
||||||
console.log('Status:', data);
|
|
||||||
addSystemMessage(data.message);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
state.socket.on('error', (data) => {
|
state.socket.on('error', (data) => {
|
||||||
console.error('Server error:', data);
|
console.error('Socket error:', data.message);
|
||||||
addSystemMessage(`Error: ${data.message}`);
|
addSystemMessage(`Error: ${data.message}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Register message handlers
|
||||||
state.socket.on('audio_response', handleAudioResponse);
|
state.socket.on('audio_response', handleAudioResponse);
|
||||||
state.socket.on('transcription', handleTranscription);
|
state.socket.on('transcription', handleTranscription);
|
||||||
state.socket.on('context_updated', handleContextUpdate);
|
state.socket.on('context_updated', handleContextUpdate);
|
||||||
state.socket.on('streaming_status', handleStreamingStatus);
|
state.socket.on('streaming_status', handleStreamingStatus);
|
||||||
|
}
|
||||||
|
|
||||||
state.socket.on('connect_error', (error) => {
|
// Setup event listeners
|
||||||
console.error('Connection error:', error);
|
function setupEventListeners() {
|
||||||
updateStatus('Connection Error', 'error');
|
// Stream button
|
||||||
|
elements.streamButton.addEventListener('click', toggleStreaming);
|
||||||
|
|
||||||
|
// Clear button
|
||||||
|
elements.clearButton.addEventListener('click', clearConversation);
|
||||||
|
|
||||||
|
// Threshold slider
|
||||||
|
elements.thresholdSlider.addEventListener('input', updateThreshold);
|
||||||
|
|
||||||
|
// Speaker selection
|
||||||
|
elements.speakerSelection.addEventListener('change', () => {
|
||||||
|
state.currentSpeaker = parseInt(elements.speakerSelection.value, 10);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Visualizer toggle
|
||||||
|
elements.showVisualizer.addEventListener('change', toggleVisualizerVisibility);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the connection status in the UI
|
// Setup audio visualizer
|
||||||
function updateStatus(message, status) {
|
|
||||||
elements.statusText.textContent = message;
|
|
||||||
elements.statusDot.className = 'status-dot';
|
|
||||||
|
|
||||||
if (status === 'connected') {
|
|
||||||
elements.statusDot.classList.add('active');
|
|
||||||
} else if (status === 'connecting') {
|
|
||||||
elements.statusDot.style.backgroundColor = '#FFA500';
|
|
||||||
} else if (status === 'error') {
|
|
||||||
elements.statusDot.style.backgroundColor = '#F44336';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set up audio context
|
|
||||||
function setupAudioContext() {
|
|
||||||
try {
|
|
||||||
state.audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
|
||||||
console.log('Audio context initialized');
|
|
||||||
} catch (err) {
|
|
||||||
console.error('Error setting up audio context:', err);
|
|
||||||
addSystemMessage(`Audio context error: ${err.message}`);
|
|
||||||
elements.streamButton.disabled = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set up audio visualizer
|
|
||||||
function setupVisualizer() {
|
function setupVisualizer() {
|
||||||
|
if (!elements.visualizerCanvas) return;
|
||||||
|
|
||||||
canvasContext = elements.visualizerCanvas.getContext('2d');
|
canvasContext = elements.visualizerCanvas.getContext('2d');
|
||||||
|
|
||||||
// Set canvas size to match container
|
// Set canvas dimensions
|
||||||
function resizeCanvas() {
|
elements.visualizerCanvas.width = elements.visualizerCanvas.offsetWidth;
|
||||||
const container = elements.visualizerCanvas.parentElement;
|
elements.visualizerCanvas.height = elements.visualizerCanvas.offsetHeight;
|
||||||
elements.visualizerCanvas.width = container.clientWidth;
|
|
||||||
elements.visualizerCanvas.height = container.clientHeight;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Call initially and on window resize
|
// Initialize the visualizer
|
||||||
resizeCanvas();
|
drawVisualizer();
|
||||||
window.addEventListener('resize', resizeCanvas);
|
|
||||||
|
|
||||||
// Create placeholder data array
|
|
||||||
visualizerBufferLength = 128;
|
|
||||||
visualizerDataArray = new Uint8Array(visualizerBufferLength);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Toggle stream on/off
|
// Update connection status UI
|
||||||
|
function updateConnectionStatus(isConnected) {
|
||||||
|
elements.statusDot.classList.toggle('active', isConnected);
|
||||||
|
elements.statusText.textContent = isConnected ? 'Connected' : 'Disconnected';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Toggle streaming state
|
||||||
function toggleStreaming() {
|
function toggleStreaming() {
|
||||||
if (state.isStreaming) {
|
if (state.isStreaming) {
|
||||||
stopStreaming(true); // true = send to server
|
stopStreaming(true);
|
||||||
} else {
|
} else {
|
||||||
startStreaming();
|
startStreaming();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start streaming audio to the server
|
// Start streaming audio to the server
|
||||||
async function startStreaming() {
|
function startStreaming() {
|
||||||
if (!state.socket || !state.socket.connected) {
|
if (state.isStreaming) return;
|
||||||
addSystemMessage('Cannot start conversation: Not connected to server');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
// Request microphone access
|
||||||
// Request microphone access
|
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
.then(stream => {
|
||||||
|
// Show processing state while setting up
|
||||||
|
elements.streamButton.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Initializing...';
|
||||||
|
|
||||||
// Update state
|
// Create audio context
|
||||||
state.isStreaming = true;
|
state.audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||||
state.isSpeaking = false;
|
|
||||||
state.energyWindow = [];
|
|
||||||
state.currentSpeaker = parseInt(elements.speakerSelect.value);
|
|
||||||
|
|
||||||
// Update UI
|
// Create microphone source
|
||||||
elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Listening...';
|
state.microphone = state.audioContext.createMediaStreamSource(stream);
|
||||||
elements.streamButton.classList.add('recording');
|
|
||||||
elements.visualizerLabel.style.opacity = '0';
|
|
||||||
|
|
||||||
// Set up audio processing
|
// Create analyser for visualizer
|
||||||
setupAudioProcessing(stream);
|
state.analyser = state.audioContext.createAnalyser();
|
||||||
|
state.analyser.fftSize = 256;
|
||||||
|
visualizerBufferLength = state.analyser.frequencyBinCount;
|
||||||
|
visualizerDataArray = new Uint8Array(visualizerBufferLength);
|
||||||
|
|
||||||
// Start volume meter updates
|
// Connect microphone to analyser
|
||||||
state.volumeUpdateInterval = setInterval(updateVolumeMeter, 100);
|
state.microphone.connect(state.analyser);
|
||||||
|
|
||||||
addSystemMessage('Listening - speak naturally and pause when finished');
|
// Create script processor for audio processing
|
||||||
|
const bufferSize = 4096;
|
||||||
|
state.streamProcessor = state.audioContext.createScriptProcessor(bufferSize, 1, 1);
|
||||||
|
|
||||||
} catch (err) {
|
// Set up audio processing callback
|
||||||
console.error('Error starting audio stream:', err);
|
state.streamProcessor.onaudioprocess = handleAudioProcess;
|
||||||
addSystemMessage(`Microphone error: ${err.message}`);
|
|
||||||
cleanupAudioResources();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set up audio processing pipeline
|
// Connect the processors
|
||||||
function setupAudioProcessing(stream) {
|
state.analyser.connect(state.streamProcessor);
|
||||||
// Store microphone stream for later cleanup
|
state.streamProcessor.connect(state.audioContext.destination);
|
||||||
state.microphone = stream;
|
|
||||||
|
|
||||||
// Create source from microphone
|
// Update UI
|
||||||
const source = state.audioContext.createMediaStreamSource(stream);
|
state.isStreaming = true;
|
||||||
|
elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Listening...';
|
||||||
|
elements.streamButton.classList.add('recording');
|
||||||
|
|
||||||
// Setup analyzer for visualization
|
// Initialize energy window
|
||||||
state.analyser = state.audioContext.createAnalyser();
|
state.energyWindow = [];
|
||||||
state.analyser.fftSize = 256;
|
|
||||||
state.analyser.smoothingTimeConstant = 0.8;
|
|
||||||
state.analyser.minDecibels = -90;
|
|
||||||
state.analyser.maxDecibels = -10;
|
|
||||||
|
|
||||||
visualizerBufferLength = state.analyser.frequencyBinCount;
|
// Start volume meter updates
|
||||||
visualizerDataArray = new Uint8Array(visualizerBufferLength);
|
state.volumeUpdateInterval = setInterval(updateVolumeMeter, 100);
|
||||||
|
|
||||||
// Connect source to analyzer
|
// Start visualizer if enabled
|
||||||
source.connect(state.analyser);
|
if (elements.showVisualizer.checked && !state.visualizerAnimationFrame) {
|
||||||
|
drawVisualizer();
|
||||||
|
}
|
||||||
|
|
||||||
// Start visualization
|
// Show starting message
|
||||||
if (state.visualizerAnimationFrame) {
|
addSystemMessage('Listening... Speak clearly into your microphone.');
|
||||||
cancelAnimationFrame(state.visualizerAnimationFrame);
|
|
||||||
}
|
|
||||||
drawVisualizer();
|
|
||||||
|
|
||||||
// Setup audio processor
|
// Notify the server that we're starting
|
||||||
state.streamProcessor = state.audioContext.createScriptProcessor(4096, 1, 1);
|
state.socket.emit('stream_audio', {
|
||||||
|
audio: '',
|
||||||
// Connect audio nodes
|
speaker: state.currentSpeaker
|
||||||
source.connect(state.streamProcessor);
|
});
|
||||||
state.streamProcessor.connect(state.audioContext.destination);
|
})
|
||||||
|
.catch(err => {
|
||||||
// Process audio
|
console.error('Error accessing microphone:', err);
|
||||||
state.streamProcessor.onaudioprocess = handleAudioProcess;
|
addSystemMessage(`Error: ${err.message}. Please make sure your microphone is connected and you've granted permission.`);
|
||||||
}
|
elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Start Conversation';
|
||||||
|
});
|
||||||
// Handle each frame of audio data
|
|
||||||
function handleAudioProcess(e) {
|
|
||||||
const audioData = e.inputBuffer.getChannelData(0);
|
|
||||||
|
|
||||||
// Calculate energy (volume) for silence detection
|
|
||||||
const energy = calculateAudioEnergy(audioData);
|
|
||||||
updateEnergyWindow(energy);
|
|
||||||
|
|
||||||
// Check if currently silent
|
|
||||||
const avgEnergy = calculateAverageEnergy();
|
|
||||||
const isSilent = avgEnergy < state.silenceThreshold;
|
|
||||||
|
|
||||||
// Handle silence/speech transitions
|
|
||||||
handleSpeechState(isSilent);
|
|
||||||
|
|
||||||
// Process and send audio
|
|
||||||
const downsampled = downsampleBuffer(audioData, state.audioContext.sampleRate, 24000);
|
|
||||||
sendAudioChunk(downsampled, state.currentSpeaker);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stop streaming audio
|
// Stop streaming audio
|
||||||
function stopStreaming(sendToServer = true) {
|
function stopStreaming(notifyServer = true) {
|
||||||
// Cleanup audio resources
|
if (!state.isStreaming) return;
|
||||||
cleanupAudioResources();
|
|
||||||
|
|
||||||
// Reset state
|
// Update UI first
|
||||||
state.isStreaming = false;
|
|
||||||
state.isSpeaking = false;
|
|
||||||
state.energyWindow = [];
|
|
||||||
|
|
||||||
// Update UI
|
|
||||||
elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Start Conversation';
|
elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Start Conversation';
|
||||||
elements.streamButton.classList.remove('recording', 'processing');
|
elements.streamButton.classList.remove('recording');
|
||||||
elements.streamButton.style.backgroundColor = '';
|
elements.streamButton.classList.remove('processing');
|
||||||
elements.volumeLevel.style.width = '100%';
|
|
||||||
|
|
||||||
// Clear volume meter updates
|
// Stop volume meter updates
|
||||||
if (state.volumeUpdateInterval) {
|
if (state.volumeUpdateInterval) {
|
||||||
clearInterval(state.volumeUpdateInterval);
|
clearInterval(state.volumeUpdateInterval);
|
||||||
state.volumeUpdateInterval = null;
|
state.volumeUpdateInterval = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
addSystemMessage('Conversation paused');
|
// Stop all audio processing
|
||||||
|
|
||||||
// Notify server
|
|
||||||
if (sendToServer && state.socket && state.socket.connected) {
|
|
||||||
state.socket.emit('stop_streaming', {
|
|
||||||
speaker: state.currentSpeaker
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up audio processing resources
|
|
||||||
function cleanupAudioResources() {
|
|
||||||
// Stop microphone stream
|
|
||||||
if (state.microphone) {
|
|
||||||
state.microphone.getTracks().forEach(track => track.stop());
|
|
||||||
state.microphone = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Disconnect audio processor
|
|
||||||
if (state.streamProcessor) {
|
if (state.streamProcessor) {
|
||||||
state.streamProcessor.disconnect();
|
state.streamProcessor.disconnect();
|
||||||
state.streamProcessor.onaudioprocess = null;
|
|
||||||
state.streamProcessor = null;
|
state.streamProcessor = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disconnect analyzer
|
|
||||||
if (state.analyser) {
|
if (state.analyser) {
|
||||||
state.analyser.disconnect();
|
state.analyser.disconnect();
|
||||||
state.analyser = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cancel visualizer animation
|
if (state.microphone) {
|
||||||
|
state.microphone.disconnect();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close audio context
|
||||||
|
if (state.audioContext && state.audioContext.state !== 'closed') {
|
||||||
|
state.audioContext.close().catch(err => console.warn('Error closing audio context:', err));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup animation frames
|
||||||
|
if (state.visualizerAnimationFrame) {
|
||||||
|
cancelAnimationFrame(state.visualizerAnimationFrame);
|
||||||
|
state.visualizerAnimationFrame = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset state
|
||||||
|
state.isStreaming = false;
|
||||||
|
state.isSpeaking = false;
|
||||||
|
|
||||||
|
// Notify the server
|
||||||
|
if (notifyServer && state.socket && state.socket.connected) {
|
||||||
|
state.socket.emit('stop_streaming', {
|
||||||
|
speaker: state.currentSpeaker
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show message
|
||||||
|
addSystemMessage('Conversation paused. Click "Start Conversation" to resume.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle audio processing
|
||||||
|
function handleAudioProcess(event) {
|
||||||
|
const inputData = event.inputBuffer.getChannelData(0);
|
||||||
|
|
||||||
|
// Calculate audio energy (volume level)
|
||||||
|
const energy = calculateAudioEnergy(inputData);
|
||||||
|
|
||||||
|
// Update energy window for averaging
|
||||||
|
updateEnergyWindow(energy);
|
||||||
|
|
||||||
|
// Calculate average energy
|
||||||
|
const avgEnergy = calculateAverageEnergy();
|
||||||
|
|
||||||
|
// Determine if audio is silent
|
||||||
|
const isSilent = avgEnergy < state.silenceThreshold;
|
||||||
|
|
||||||
|
// Handle speech state based on silence
|
||||||
|
handleSpeechState(isSilent);
|
||||||
|
|
||||||
|
// Only send audio chunk if we detect speech
|
||||||
|
if (!isSilent) {
|
||||||
|
// Create a resampled version at 24kHz for the server
|
||||||
|
// Most WebRTC audio is 48kHz, but we want 24kHz for the model
|
||||||
|
const resampledData = downsampleBuffer(inputData, state.audioContext.sampleRate, 24000);
|
||||||
|
|
||||||
|
// Send the audio chunk to the server
|
||||||
|
sendAudioChunk(resampledData, state.currentSpeaker);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup audio resources when done
|
||||||
|
function cleanupAudioResources() {
|
||||||
|
// Stop all audio processing
|
||||||
|
if (state.streamProcessor) {
|
||||||
|
state.streamProcessor.disconnect();
|
||||||
|
state.streamProcessor = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.analyser) {
|
||||||
|
state.analyser.disconnect();
|
||||||
|
state.analyser = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.microphone) {
|
||||||
|
state.microphone.disconnect();
|
||||||
|
state.microphone = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close audio context
|
||||||
|
if (state.audioContext && state.audioContext.state !== 'closed') {
|
||||||
|
state.audioContext.close().catch(err => console.warn('Error closing audio context:', err));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cancel all timers and animation frames
|
||||||
|
if (state.volumeUpdateInterval) {
|
||||||
|
clearInterval(state.volumeUpdateInterval);
|
||||||
|
state.volumeUpdateInterval = null;
|
||||||
|
}
|
||||||
|
|
||||||
if (state.visualizerAnimationFrame) {
|
if (state.visualizerAnimationFrame) {
|
||||||
cancelAnimationFrame(state.visualizerAnimationFrame);
|
cancelAnimationFrame(state.visualizerAnimationFrame);
|
||||||
state.visualizerAnimationFrame = null;
|
state.visualizerAnimationFrame = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cancel silence timer
|
|
||||||
if (state.silenceTimer) {
|
if (state.silenceTimer) {
|
||||||
clearTimeout(state.silenceTimer);
|
clearTimeout(state.silenceTimer);
|
||||||
state.silenceTimer = null;
|
state.silenceTimer = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset visualizer display
|
|
||||||
if (canvasContext) {
|
|
||||||
canvasContext.clearRect(0, 0, elements.visualizerCanvas.width, elements.visualizerCanvas.height);
|
|
||||||
elements.visualizerLabel.style.opacity = '0.7';
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear conversation history
|
// Clear conversation history
|
||||||
function clearConversation() {
|
function clearConversation() {
|
||||||
// Clear UI
|
if (elements.conversation) {
|
||||||
elements.conversation.innerHTML = '';
|
elements.conversation.innerHTML = '';
|
||||||
addSystemMessage('Conversation cleared');
|
addSystemMessage('Conversation cleared.');
|
||||||
|
|
||||||
// Notify server
|
// Notify server to clear context
|
||||||
if (state.socket && state.socket.connected) {
|
if (state.socket && state.socket.connected) {
|
||||||
state.socket.emit('clear_context');
|
state.socket.emit('clear_context');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -390,9 +447,9 @@ function clearConversation() {
|
|||||||
function calculateAudioEnergy(buffer) {
|
function calculateAudioEnergy(buffer) {
|
||||||
let sum = 0;
|
let sum = 0;
|
||||||
for (let i = 0; i < buffer.length; i++) {
|
for (let i = 0; i < buffer.length; i++) {
|
||||||
sum += Math.abs(buffer[i]);
|
sum += buffer[i] * buffer[i];
|
||||||
}
|
}
|
||||||
return sum / buffer.length;
|
return Math.sqrt(sum / buffer.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update energy window for averaging
|
// Update energy window for averaging
|
||||||
@@ -406,7 +463,9 @@ function updateEnergyWindow(energy) {
|
|||||||
// Calculate average energy from window
|
// Calculate average energy from window
|
||||||
function calculateAverageEnergy() {
|
function calculateAverageEnergy() {
|
||||||
if (state.energyWindow.length === 0) return 0;
|
if (state.energyWindow.length === 0) return 0;
|
||||||
return state.energyWindow.reduce((sum, val) => sum + val, 0) / state.energyWindow.length;
|
|
||||||
|
const sum = state.energyWindow.reduce((a, b) => a + b, 0);
|
||||||
|
return sum / state.energyWindow.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the threshold from the slider
|
// Update the threshold from the slider
|
||||||
@@ -417,32 +476,26 @@ function updateThreshold() {
|
|||||||
|
|
||||||
// Update the volume meter display
|
// Update the volume meter display
|
||||||
function updateVolumeMeter() {
|
function updateVolumeMeter() {
|
||||||
if (!state.isStreaming || !state.analyser) return;
|
if (!state.isStreaming || !state.energyWindow.length) return;
|
||||||
|
|
||||||
// Get current volume level
|
const avgEnergy = calculateAverageEnergy();
|
||||||
const dataArray = new Uint8Array(state.analyser.frequencyBinCount);
|
|
||||||
state.analyser.getByteFrequencyData(dataArray);
|
|
||||||
|
|
||||||
// Calculate average volume
|
// Scale energy to percentage (0-100)
|
||||||
let sum = 0;
|
// Typically, energy values will be very small (e.g., 0.001 to 0.1)
|
||||||
for (let i = 0; i < dataArray.length; i++) {
|
// So we multiply by a factor to make it more visible
|
||||||
sum += dataArray[i];
|
const scaleFactor = 1000;
|
||||||
}
|
const percentage = Math.min(100, Math.max(0, avgEnergy * scaleFactor));
|
||||||
const average = sum / dataArray.length;
|
|
||||||
|
|
||||||
// Normalize to 0-100%
|
// Update volume meter width
|
||||||
const percentage = Math.min(100, Math.max(0, average / 128 * 100));
|
elements.volumeLevel.style.width = `${percentage}%`;
|
||||||
|
|
||||||
// Invert because we're showing the "empty" portion
|
|
||||||
elements.volumeLevel.style.width = (100 - percentage) + '%';
|
|
||||||
|
|
||||||
// Change color based on level
|
// Change color based on level
|
||||||
if (percentage > 70) {
|
if (percentage > 70) {
|
||||||
elements.volumeLevel.style.backgroundColor = 'rgba(244, 67, 54, 0.5)'; // Red
|
elements.volumeLevel.style.backgroundColor = '#ff5252';
|
||||||
} else if (percentage > 30) {
|
} else if (percentage > 30) {
|
||||||
elements.volumeLevel.style.backgroundColor = 'rgba(255, 235, 59, 0.5)'; // Yellow
|
elements.volumeLevel.style.backgroundColor = '#4CAF50';
|
||||||
} else {
|
} else {
|
||||||
elements.volumeLevel.style.backgroundColor = 'rgba(0, 0, 0, 0.5)'; // Dark
|
elements.volumeLevel.style.backgroundColor = '#4c84ff';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -452,31 +505,16 @@ function handleSpeechState(isSilent) {
|
|||||||
// Transition from speaking to silence
|
// Transition from speaking to silence
|
||||||
if (!state.silenceTimer) {
|
if (!state.silenceTimer) {
|
||||||
state.silenceTimer = setTimeout(() => {
|
state.silenceTimer = setTimeout(() => {
|
||||||
// Silence persisted long enough - process the audio
|
// Only consider it a real silence after a certain duration
|
||||||
elements.streamButton.innerHTML = '<i class="fas fa-cog fa-spin"></i> Processing...';
|
// This prevents detecting brief pauses as the end of speech
|
||||||
elements.streamButton.classList.remove('recording');
|
state.isSpeaking = false;
|
||||||
elements.streamButton.classList.add('processing');
|
state.silenceTimer = null;
|
||||||
addSystemMessage('Detected pause in speech, processing response...');
|
|
||||||
}, CLIENT_SILENCE_DURATION_MS);
|
}, CLIENT_SILENCE_DURATION_MS);
|
||||||
}
|
}
|
||||||
} else if (!state.isSpeaking && !isSilent) {
|
} else if (state.silenceTimer && !isSilent) {
|
||||||
// Transition from silence to speaking
|
// User started speaking again, cancel the silence timer
|
||||||
state.isSpeaking = true;
|
clearTimeout(state.silenceTimer);
|
||||||
elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Listening...';
|
state.silenceTimer = null;
|
||||||
elements.streamButton.classList.add('recording');
|
|
||||||
elements.streamButton.classList.remove('processing');
|
|
||||||
|
|
||||||
// Clear silence timer
|
|
||||||
if (state.silenceTimer) {
|
|
||||||
clearTimeout(state.silenceTimer);
|
|
||||||
state.silenceTimer = null;
|
|
||||||
}
|
|
||||||
} else if (state.isSpeaking && !isSilent) {
|
|
||||||
// Still speaking, reset silence timer
|
|
||||||
if (state.silenceTimer) {
|
|
||||||
clearTimeout(state.silenceTimer);
|
|
||||||
state.silenceTimer = null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update speaking state for non-silent audio
|
// Update speaking state for non-silent audio
|
||||||
@@ -488,7 +526,7 @@ function handleSpeechState(isSilent) {
|
|||||||
// Send audio chunk to server
|
// Send audio chunk to server
|
||||||
function sendAudioChunk(audioData, speaker) {
|
function sendAudioChunk(audioData, speaker) {
|
||||||
if (!state.socket || !state.socket.connected) {
|
if (!state.socket || !state.socket.connected) {
|
||||||
console.warn('Cannot send audio: socket not connected');
|
console.warn('Socket not connected');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -498,10 +536,10 @@ function sendAudioChunk(audioData, speaker) {
|
|||||||
reader.onloadend = function() {
|
reader.onloadend = function() {
|
||||||
const base64data = reader.result;
|
const base64data = reader.result;
|
||||||
|
|
||||||
// Send to server using Socket.IO
|
// Send the audio chunk to the server
|
||||||
state.socket.emit('stream_audio', {
|
state.socket.emit('stream_audio', {
|
||||||
speaker: speaker,
|
audio: base64data,
|
||||||
audio: base64data
|
speaker: speaker
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -531,7 +569,7 @@ function drawVisualizer() {
|
|||||||
try {
|
try {
|
||||||
state.analyser.getByteFrequencyData(visualizerDataArray);
|
state.analyser.getByteFrequencyData(visualizerDataArray);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error("Error getting frequency data:", e);
|
console.warn('Error getting frequency data:', e);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Fade out when not streaming
|
// Fade out when not streaming
|
||||||
|
|||||||
Reference in New Issue
Block a user