Demo Update 13
This commit is contained in:
@@ -168,6 +168,10 @@
|
||||
animation: pulse 1.5s infinite;
|
||||
}
|
||||
|
||||
button.processing {
|
||||
background-color: #ffa000;
|
||||
}
|
||||
|
||||
@keyframes pulse {
|
||||
0% { opacity: 1; }
|
||||
50% { opacity: 0.7; }
|
||||
@@ -193,6 +197,63 @@
|
||||
background-color: var(--success-color);
|
||||
}
|
||||
|
||||
/* Audio visualizer styles */
|
||||
.visualizer-container {
|
||||
margin-top: 15px;
|
||||
position: relative;
|
||||
width: 100%;
|
||||
height: 100px;
|
||||
background-color: #000;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
#audioVisualizer {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
transition: opacity 0.3s;
|
||||
}
|
||||
|
||||
#visualizerLabel {
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
color: rgba(255, 255, 255, 0.7);
|
||||
font-size: 0.9em;
|
||||
pointer-events: none;
|
||||
transition: opacity 0.3s;
|
||||
}
|
||||
|
||||
.volume-meter {
|
||||
height: 8px;
|
||||
width: 100%;
|
||||
background-color: #eee;
|
||||
border-radius: 4px;
|
||||
margin-top: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
#volumeLevel {
|
||||
height: 100%;
|
||||
width: 0%;
|
||||
background-color: var(--primary-color);
|
||||
border-radius: 4px;
|
||||
transition: width 0.1s ease, background-color 0.2s;
|
||||
}
|
||||
|
||||
.settings-toggles {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.toggle-switch {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
footer {
|
||||
margin-top: 30px;
|
||||
text-align: center;
|
||||
@@ -233,6 +294,15 @@
|
||||
Clear
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- Audio visualizer and volume meter -->
|
||||
<div class="visualizer-container">
|
||||
<canvas id="audioVisualizer"></canvas>
|
||||
<div id="visualizerLabel">Start speaking to see audio visualization</div>
|
||||
</div>
|
||||
<div class="volume-meter">
|
||||
<div id="volumeLevel"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="settings-panel">
|
||||
@@ -242,6 +312,10 @@
|
||||
<input type="checkbox" id="autoPlayResponses" checked>
|
||||
<label for="autoPlayResponses">Autoplay Responses</label>
|
||||
</div>
|
||||
<div class="toggle-switch">
|
||||
<input type="checkbox" id="showVisualizer" checked>
|
||||
<label for="showVisualizer">Show Audio Visualizer</label>
|
||||
</div>
|
||||
<div>
|
||||
<label for="speakerSelect">Speaker Voice:</label>
|
||||
<select id="speakerSelect">
|
||||
@@ -249,6 +323,10 @@
|
||||
<option value="1">Speaker 1 (AI)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label for="thresholdSlider">Silence Threshold: <span id="thresholdValue">0.010</span></label>
|
||||
<input type="range" id="thresholdSlider" min="0.001" max="0.05" step="0.001" value="0.01">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -258,454 +336,7 @@
|
||||
<p>Powered by CSM 1B & Llama 3.2 | Whisper for speech recognition</p>
|
||||
</footer>
|
||||
|
||||
<script>
|
||||
// Configuration constants
|
||||
const SERVER_URL = window.location.hostname === 'localhost' ?
|
||||
'http://localhost:5000' : window.location.origin;
|
||||
const ENERGY_WINDOW_SIZE = 15;
|
||||
const CLIENT_SILENCE_DURATION_MS = 750;
|
||||
|
||||
// DOM elements
|
||||
const elements = {
|
||||
conversation: document.getElementById('conversation'),
|
||||
streamButton: document.getElementById('streamButton'),
|
||||
clearButton: document.getElementById('clearButton'),
|
||||
statusDot: document.getElementById('statusDot'),
|
||||
statusText: document.getElementById('statusText'),
|
||||
speakerSelection: document.getElementById('speakerSelect'),
|
||||
autoPlayResponses: document.getElementById('autoPlayResponses')
|
||||
};
|
||||
|
||||
// Application state
|
||||
const state = {
|
||||
socket: null,
|
||||
audioContext: null,
|
||||
analyser: null,
|
||||
microphone: null,
|
||||
streamProcessor: null,
|
||||
isStreaming: false,
|
||||
isSpeaking: false,
|
||||
silenceThreshold: 0.01,
|
||||
energyWindow: [],
|
||||
silenceTimer: null,
|
||||
currentSpeaker: 0
|
||||
};
|
||||
|
||||
// Initialize the application
|
||||
function initializeApp() {
|
||||
// Initialize socket.io connection
|
||||
setupSocketConnection();
|
||||
|
||||
// Setup event listeners
|
||||
setupEventListeners();
|
||||
|
||||
// Show welcome message
|
||||
addSystemMessage('Welcome to CSM Voice Chat! Click "Start Conversation" to begin.');
|
||||
}
|
||||
|
||||
// Setup Socket.IO connection
|
||||
function setupSocketConnection() {
|
||||
state.socket = io(SERVER_URL);
|
||||
|
||||
// Connection events
|
||||
state.socket.on('connect', () => {
|
||||
updateConnectionStatus(true);
|
||||
addSystemMessage('Connected to server.');
|
||||
});
|
||||
|
||||
state.socket.on('disconnect', () => {
|
||||
updateConnectionStatus(false);
|
||||
addSystemMessage('Disconnected from server.');
|
||||
stopStreaming(false);
|
||||
});
|
||||
|
||||
state.socket.on('error', (data) => {
|
||||
addSystemMessage(`Error: ${data.message}`);
|
||||
console.error('Server error:', data.message);
|
||||
});
|
||||
|
||||
// Register message handlers
|
||||
state.socket.on('transcription', handleTranscription);
|
||||
state.socket.on('context_updated', handleContextUpdate);
|
||||
state.socket.on('streaming_status', handleStreamingStatus);
|
||||
state.socket.on('processing_status', handleProcessingStatus);
|
||||
|
||||
// Handlers for incremental audio streaming
|
||||
state.socket.on('audio_response_start', handleAudioResponseStart);
|
||||
state.socket.on('audio_response_chunk', handleAudioResponseChunk);
|
||||
state.socket.on('audio_response_complete', handleAudioResponseComplete);
|
||||
}
|
||||
|
||||
// Setup event listeners
|
||||
function setupEventListeners() {
|
||||
// Stream button
|
||||
elements.streamButton.addEventListener('click', toggleStreaming);
|
||||
|
||||
// Clear button
|
||||
elements.clearButton.addEventListener('click', clearConversation);
|
||||
|
||||
// Speaker selection
|
||||
elements.speakerSelection.addEventListener('change', () => {
|
||||
state.currentSpeaker = parseInt(elements.speakerSelection.value);
|
||||
});
|
||||
}
|
||||
|
||||
// Update connection status UI
|
||||
function updateConnectionStatus(isConnected) {
|
||||
if (isConnected) {
|
||||
elements.statusDot.classList.add('active');
|
||||
elements.statusText.textContent = 'Connected';
|
||||
} else {
|
||||
elements.statusDot.classList.remove('active');
|
||||
elements.statusText.textContent = 'Disconnected';
|
||||
}
|
||||
}
|
||||
|
||||
// Toggle streaming state
|
||||
function toggleStreaming() {
|
||||
if (state.isStreaming) {
|
||||
stopStreaming();
|
||||
} else {
|
||||
startStreaming();
|
||||
}
|
||||
}
|
||||
|
||||
// Start streaming audio to the server
|
||||
function startStreaming() {
|
||||
if (!state.socket || !state.socket.connected) {
|
||||
addSystemMessage('Not connected to server. Please refresh the page.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Request microphone access
|
||||
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
|
||||
.then(stream => {
|
||||
state.isStreaming = true;
|
||||
elements.streamButton.classList.add('recording');
|
||||
elements.streamButton.innerHTML = '<i class="fas fa-stop"></i> Stop Recording';
|
||||
|
||||
// Initialize Web Audio API
|
||||
state.audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
|
||||
state.microphone = state.audioContext.createMediaStreamSource(stream);
|
||||
state.analyser = state.audioContext.createAnalyser();
|
||||
state.analyser.fftSize = 1024;
|
||||
|
||||
state.microphone.connect(state.analyser);
|
||||
|
||||
// Create processor node for audio data
|
||||
const processorNode = state.audioContext.createScriptProcessor(4096, 1, 1);
|
||||
processorNode.onaudioprocess = handleAudioProcess;
|
||||
state.analyser.connect(processorNode);
|
||||
processorNode.connect(state.audioContext.destination);
|
||||
state.streamProcessor = processorNode;
|
||||
|
||||
state.silenceTimer = null;
|
||||
state.energyWindow = [];
|
||||
state.isSpeaking = false;
|
||||
|
||||
// Notify server
|
||||
state.socket.emit('start_stream');
|
||||
addSystemMessage('Recording started. Speak now...');
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error accessing microphone:', error);
|
||||
addSystemMessage('Could not access microphone. Please check permissions.');
|
||||
});
|
||||
}
|
||||
|
||||
// Stop streaming audio
|
||||
function stopStreaming(notifyServer = true) {
|
||||
if (state.isStreaming) {
|
||||
state.isStreaming = false;
|
||||
elements.streamButton.classList.remove('recording');
|
||||
elements.streamButton.innerHTML = '<i class="fas fa-microphone"></i> Start Conversation';
|
||||
|
||||
// Clean up audio resources
|
||||
if (state.streamProcessor) {
|
||||
state.streamProcessor.disconnect();
|
||||
state.streamProcessor = null;
|
||||
}
|
||||
|
||||
if (state.analyser) {
|
||||
state.analyser.disconnect();
|
||||
state.analyser = null;
|
||||
}
|
||||
|
||||
if (state.microphone) {
|
||||
state.microphone.disconnect();
|
||||
state.microphone = null;
|
||||
}
|
||||
|
||||
if (state.audioContext) {
|
||||
state.audioContext.close();
|
||||
state.audioContext = null;
|
||||
}
|
||||
|
||||
// Clear any pending silence timer
|
||||
if (state.silenceTimer) {
|
||||
clearTimeout(state.silenceTimer);
|
||||
state.silenceTimer = null;
|
||||
}
|
||||
|
||||
// Notify server if needed
|
||||
if (notifyServer && state.socket && state.socket.connected) {
|
||||
state.socket.emit('stop_stream');
|
||||
}
|
||||
|
||||
addSystemMessage('Recording stopped.');
|
||||
}
|
||||
}
|
||||
|
||||
// Handle audio processing
|
||||
function handleAudioProcess(event) {
|
||||
if (!state.isStreaming) return;
|
||||
|
||||
const inputData = event.inputBuffer.getChannelData(0);
|
||||
const energy = calculateAudioEnergy(inputData);
|
||||
updateEnergyWindow(energy);
|
||||
|
||||
const averageEnergy = calculateAverageEnergy();
|
||||
const isSilent = averageEnergy < state.silenceThreshold;
|
||||
|
||||
handleSpeechState(isSilent);
|
||||
}
|
||||
|
||||
// Calculate audio energy (volume)
|
||||
function calculateAudioEnergy(buffer) {
|
||||
let sum = 0;
|
||||
for (let i = 0; i < buffer.length; i++) {
|
||||
sum += buffer[i] * buffer[i];
|
||||
}
|
||||
return Math.sqrt(sum / buffer.length);
|
||||
}
|
||||
|
||||
// Update energy window for averaging
|
||||
function updateEnergyWindow(energy) {
|
||||
state.energyWindow.push(energy);
|
||||
if (state.energyWindow.length > ENERGY_WINDOW_SIZE) {
|
||||
state.energyWindow.shift();
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate average energy from window
|
||||
function calculateAverageEnergy() {
|
||||
if (state.energyWindow.length === 0) return 0;
|
||||
|
||||
const sum = state.energyWindow.reduce((acc, val) => acc + val, 0);
|
||||
return sum / state.energyWindow.length;
|
||||
}
|
||||
|
||||
// Handle speech/silence state transitions
|
||||
function handleSpeechState(isSilent) {
|
||||
if (state.isSpeaking) {
|
||||
if (isSilent) {
|
||||
// User was speaking but now is silent
|
||||
if (!state.silenceTimer) {
|
||||
state.silenceTimer = setTimeout(() => {
|
||||
// Silence lasted long enough, consider speech done
|
||||
if (state.isSpeaking) {
|
||||
state.isSpeaking = false;
|
||||
|
||||
// Get the current audio data and send it
|
||||
const audioBuffer = new Float32Array(state.audioContext.sampleRate * 5); // 5 seconds max
|
||||
state.analyser.getFloatTimeDomainData(audioBuffer);
|
||||
|
||||
// Create WAV blob
|
||||
const wavBlob = createWavBlob(audioBuffer, state.audioContext.sampleRate);
|
||||
|
||||
// Convert to base64
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = function() {
|
||||
sendAudioChunk(reader.result, state.currentSpeaker);
|
||||
};
|
||||
reader.readAsDataURL(wavBlob);
|
||||
|
||||
addSystemMessage('Processing your message...');
|
||||
}
|
||||
}, CLIENT_SILENCE_DURATION_MS);
|
||||
}
|
||||
} else {
|
||||
// User is still speaking, reset silence timer
|
||||
if (state.silenceTimer) {
|
||||
clearTimeout(state.silenceTimer);
|
||||
state.silenceTimer = null;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!isSilent) {
|
||||
// User started speaking
|
||||
state.isSpeaking = true;
|
||||
if (state.silenceTimer) {
|
||||
clearTimeout(state.silenceTimer);
|
||||
state.silenceTimer = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Send audio chunk to server
|
||||
function sendAudioChunk(audioData, speaker) {
|
||||
if (state.socket && state.socket.connected) {
|
||||
state.socket.emit('audio_chunk', {
|
||||
audio: audioData,
|
||||
speaker: speaker
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Create WAV blob from audio data
|
||||
function createWavBlob(audioData, sampleRate) {
|
||||
const numChannels = 1;
|
||||
const bitsPerSample = 16;
|
||||
const bytesPerSample = bitsPerSample / 8;
|
||||
|
||||
// Create buffer for WAV file
|
||||
const buffer = new ArrayBuffer(44 + audioData.length * bytesPerSample);
|
||||
const view = new DataView(buffer);
|
||||
|
||||
// Write WAV header
|
||||
// "RIFF" chunk descriptor
|
||||
writeString(view, 0, 'RIFF');
|
||||
view.setUint32(4, 36 + audioData.length * bytesPerSample, true);
|
||||
writeString(view, 8, 'WAVE');
|
||||
|
||||
// "fmt " sub-chunk
|
||||
writeString(view, 12, 'fmt ');
|
||||
view.setUint32(16, 16, true); // subchunk1size
|
||||
view.setUint16(20, 1, true); // audio format (PCM)
|
||||
view.setUint16(22, numChannels, true);
|
||||
view.setUint32(24, sampleRate, true);
|
||||
view.setUint32(28, sampleRate * numChannels * bytesPerSample, true); // byte rate
|
||||
view.setUint16(32, numChannels * bytesPerSample, true); // block align
|
||||
view.setUint16(34, bitsPerSample, true);
|
||||
|
||||
// "data" sub-chunk
|
||||
writeString(view, 36, 'data');
|
||||
view.setUint32(40, audioData.length * bytesPerSample, true);
|
||||
|
||||
// Write audio data
|
||||
const audioDataStart = 44;
|
||||
for (let i = 0; i < audioData.length; i++) {
|
||||
const sample = Math.max(-1, Math.min(1, audioData[i]));
|
||||
const value = sample < 0 ? sample * 0x8000 : sample * 0x7FFF;
|
||||
view.setInt16(audioDataStart + i * bytesPerSample, value, true);
|
||||
}
|
||||
|
||||
return new Blob([buffer], { type: 'audio/wav' });
|
||||
}
|
||||
|
||||
// Helper function to write strings to DataView
|
||||
function writeString(view, offset, string) {
|
||||
for (let i = 0; i < string.length; i++) {
|
||||
view.setUint8(offset + i, string.charCodeAt(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Clear conversation history
|
||||
function clearConversation() {
|
||||
elements.conversation.innerHTML = '';
|
||||
if (state.socket && state.socket.connected) {
|
||||
state.socket.emit('clear_context');
|
||||
}
|
||||
addSystemMessage('Conversation cleared.');
|
||||
}
|
||||
|
||||
// Handle transcription response from server
|
||||
function handleTranscription(data) {
|
||||
const speaker = data.speaker === 0 ? 'user' : 'ai';
|
||||
addMessage(data.text, speaker);
|
||||
}
|
||||
|
||||
// Handle context update from server
|
||||
function handleContextUpdate(data) {
|
||||
if (data.status === 'cleared') {
|
||||
elements.conversation.innerHTML = '';
|
||||
addSystemMessage('Conversation context cleared.');
|
||||
}
|
||||
}
|
||||
|
||||
// Handle streaming status updates from server
|
||||
function handleStreamingStatus(data) {
|
||||
if (data.status === 'active') {
|
||||
console.log('Server acknowledged streaming is active');
|
||||
} else if (data.status === 'inactive') {
|
||||
console.log('Server acknowledged streaming is inactive');
|
||||
}
|
||||
}
|
||||
|
||||
// Handle processing status updates
|
||||
function handleProcessingStatus(data) {
|
||||
switch (data.status) {
|
||||
case 'transcribing':
|
||||
addSystemMessage('Transcribing your message...');
|
||||
break;
|
||||
case 'generating':
|
||||
addSystemMessage('Generating response...');
|
||||
break;
|
||||
case 'synthesizing':
|
||||
addSystemMessage('Synthesizing voice...');
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the start of an audio streaming response
|
||||
function handleAudioResponseStart(data) {
|
||||
// Prepare for receiving chunked audio
|
||||
console.log(`Expecting ${data.total_chunks} audio chunks`);
|
||||
}
|
||||
|
||||
// Handle an incoming audio chunk
|
||||
function handleAudioResponseChunk(data) {
|
||||
// Create audio element for the response
|
||||
const audioElement = document.createElement('audio');
|
||||
if (elements.autoPlayResponses.checked) {
|
||||
audioElement.autoplay = true;
|
||||
}
|
||||
audioElement.controls = true;
|
||||
audioElement.className = 'audio-player';
|
||||
audioElement.src = data.chunk;
|
||||
|
||||
// Add to the most recent AI message if it exists
|
||||
const messages = elements.conversation.querySelectorAll('.message.ai');
|
||||
if (messages.length > 0) {
|
||||
const lastAiMessage = messages[messages.length - 1];
|
||||
lastAiMessage.appendChild(audioElement);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle completion of audio streaming
|
||||
function handleAudioResponseComplete(data) {
|
||||
// Update the AI message with the full text
|
||||
addMessage(data.text, 'ai');
|
||||
}
|
||||
|
||||
// Add a message to the conversation
|
||||
function addMessage(text, sender) {
|
||||
const messageDiv = document.createElement('div');
|
||||
messageDiv.className = `message ${sender}`;
|
||||
messageDiv.textContent = text;
|
||||
|
||||
const timeSpan = document.createElement('span');
|
||||
timeSpan.className = 'message-time';
|
||||
const now = new Date();
|
||||
timeSpan.textContent = `${now.getHours().toString().padStart(2, '0')}:${now.getMinutes().toString().padStart(2, '0')}`;
|
||||
messageDiv.appendChild(timeSpan);
|
||||
|
||||
elements.conversation.appendChild(messageDiv);
|
||||
elements.conversation.scrollTop = elements.conversation.scrollHeight;
|
||||
}
|
||||
|
||||
// Add a system message to the conversation
|
||||
function addSystemMessage(message) {
|
||||
const messageDiv = document.createElement('div');
|
||||
messageDiv.className = 'message system';
|
||||
messageDiv.textContent = message;
|
||||
elements.conversation.appendChild(messageDiv);
|
||||
elements.conversation.scrollTop = elements.conversation.scrollHeight;
|
||||
}
|
||||
|
||||
// Initialize the application when DOM is fully loaded
|
||||
document.addEventListener('DOMContentLoaded', initializeApp);
|
||||
</script>
|
||||
<!-- Load external JavaScript file -->
|
||||
<script src="voice-chat.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user