983 lines
34 KiB
HTML
983 lines
34 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Voice Assistant - CSM & Whisper</title>
|
|
<script src="https://cdn.socket.io/4.6.0/socket.io.min.js"></script>
|
|
<style>
|
|
body {
|
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
|
max-width: 800px;
|
|
margin: 0 auto;
|
|
padding: 20px;
|
|
background-color: #f5f7fa;
|
|
color: #333;
|
|
}
|
|
|
|
h1 {
|
|
color: #2c3e50;
|
|
text-align: center;
|
|
margin-bottom: 30px;
|
|
}
|
|
|
|
#conversation {
|
|
height: 400px;
|
|
border: 1px solid #ddd;
|
|
border-radius: 10px;
|
|
padding: 20px;
|
|
margin-bottom: 20px;
|
|
overflow-y: auto;
|
|
background-color: white;
|
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
|
}
|
|
|
|
.message-container {
|
|
display: flex;
|
|
flex-direction: column;
|
|
margin-bottom: 15px;
|
|
}
|
|
|
|
.user-message-container {
|
|
align-items: flex-end;
|
|
}
|
|
|
|
.bot-message-container {
|
|
align-items: flex-start;
|
|
}
|
|
|
|
.message {
|
|
max-width: 80%;
|
|
padding: 12px;
|
|
border-radius: 18px;
|
|
position: relative;
|
|
word-break: break-word;
|
|
}
|
|
|
|
.user-message {
|
|
background-color: #dcf8c6;
|
|
color: #000;
|
|
border-bottom-right-radius: 4px;
|
|
}
|
|
|
|
.bot-message {
|
|
background-color: #f1f0f0;
|
|
color: #000;
|
|
border-bottom-left-radius: 4px;
|
|
}
|
|
|
|
.message-label {
|
|
font-size: 0.8em;
|
|
margin-bottom: 4px;
|
|
color: #657786;
|
|
}
|
|
|
|
#controls {
|
|
display: flex;
|
|
gap: 10px;
|
|
justify-content: center;
|
|
margin-bottom: 15px;
|
|
}
|
|
|
|
button {
|
|
padding: 12px 24px;
|
|
font-size: 16px;
|
|
cursor: pointer;
|
|
border-radius: 50px;
|
|
border: none;
|
|
outline: none;
|
|
transition: all 0.3s ease;
|
|
}
|
|
|
|
#recordButton {
|
|
background-color: #4CAF50;
|
|
color: white;
|
|
width: 200px;
|
|
box-shadow: 0 4px 8px rgba(76, 175, 80, 0.3);
|
|
}
|
|
|
|
#recordButton:hover {
|
|
background-color: #45a049;
|
|
transform: translateY(-2px);
|
|
}
|
|
|
|
#recordButton.recording {
|
|
background-color: #f44336;
|
|
animation: pulse 1.5s infinite;
|
|
box-shadow: 0 4px 8px rgba(244, 67, 54, 0.3);
|
|
}
|
|
|
|
@keyframes pulse {
|
|
0% {
|
|
transform: scale(1);
|
|
}
|
|
50% {
|
|
transform: scale(1.05);
|
|
}
|
|
100% {
|
|
transform: scale(1);
|
|
}
|
|
}
|
|
|
|
#status {
|
|
text-align: center;
|
|
margin-top: 15px;
|
|
font-style: italic;
|
|
color: #657786;
|
|
}
|
|
|
|
.audio-wave {
|
|
display: flex;
|
|
justify-content: center;
|
|
align-items: center;
|
|
height: 40px;
|
|
gap: 3px;
|
|
}
|
|
|
|
.audio-wave span {
|
|
display: block;
|
|
width: 3px;
|
|
height: 100%;
|
|
background-color: #4CAF50;
|
|
animation: wave 1.5s infinite ease-in-out;
|
|
border-radius: 6px;
|
|
}
|
|
|
|
.audio-wave span:nth-child(2) {
|
|
animation-delay: 0.2s;
|
|
}
|
|
.audio-wave span:nth-child(3) {
|
|
animation-delay: 0.4s;
|
|
}
|
|
.audio-wave span:nth-child(4) {
|
|
animation-delay: 0.6s;
|
|
}
|
|
.audio-wave span:nth-child(5) {
|
|
animation-delay: 0.8s;
|
|
}
|
|
|
|
@keyframes wave {
|
|
0%, 100% {
|
|
height: 8px;
|
|
}
|
|
50% {
|
|
height: 30px;
|
|
}
|
|
}
|
|
|
|
.hidden {
|
|
display: none;
|
|
}
|
|
|
|
.transcription-info {
|
|
font-size: 0.8em;
|
|
color: #888;
|
|
margin-top: 4px;
|
|
text-align: right;
|
|
}
|
|
|
|
.text-only-indicator {
|
|
font-size: 0.8em;
|
|
color: #e74c3c;
|
|
margin-top: 4px;
|
|
font-style: italic;
|
|
}
|
|
|
|
.status-message {
|
|
text-align: center;
|
|
padding: 8px;
|
|
margin: 10px 0;
|
|
background-color: #f8f9fa;
|
|
border-radius: 5px;
|
|
color: #666;
|
|
font-size: 0.9em;
|
|
}
|
|
|
|
/* Audio visualizer styles */
|
|
.visualizer-container {
|
|
width: 100%;
|
|
height: 120px;
|
|
margin: 15px 0;
|
|
border-radius: 10px;
|
|
overflow: hidden;
|
|
background-color: #000;
|
|
position: relative;
|
|
}
|
|
|
|
#visualizer {
|
|
width: 100%;
|
|
height: 100%;
|
|
display: block;
|
|
}
|
|
|
|
.visualizer-label {
|
|
position: absolute;
|
|
top: 10px;
|
|
left: 10px;
|
|
color: white;
|
|
font-size: 0.8em;
|
|
background-color: rgba(0, 0, 0, 0.5);
|
|
padding: 4px 8px;
|
|
border-radius: 4px;
|
|
}
|
|
|
|
/* Modern switch for visualizer toggle */
|
|
.switch-container {
|
|
display: flex;
|
|
align-items: center;
|
|
justify-content: center;
|
|
margin-bottom: 10px;
|
|
}
|
|
|
|
.switch {
|
|
position: relative;
|
|
display: inline-block;
|
|
width: 50px;
|
|
height: 24px;
|
|
margin-left: 10px;
|
|
}
|
|
|
|
.switch input {
|
|
opacity: 0;
|
|
width: 0;
|
|
height: 0;
|
|
}
|
|
|
|
.slider {
|
|
position: absolute;
|
|
cursor: pointer;
|
|
top: 0;
|
|
left: 0;
|
|
right: 0;
|
|
bottom: 0;
|
|
background-color: #ccc;
|
|
transition: .4s;
|
|
border-radius: 24px;
|
|
}
|
|
|
|
.slider:before {
|
|
position: absolute;
|
|
content: "";
|
|
height: 16px;
|
|
width: 16px;
|
|
left: 4px;
|
|
bottom: 4px;
|
|
background-color: white;
|
|
transition: .4s;
|
|
border-radius: 50%;
|
|
}
|
|
|
|
input:checked + .slider {
|
|
background-color: #4CAF50;
|
|
}
|
|
|
|
input:checked + .slider:before {
|
|
transform: translateX(26px);
|
|
}
|
|
|
|
/* Add this style for streaming indicator */
|
|
.streaming-indicator {
|
|
display: inline-block;
|
|
width: 10px;
|
|
height: 10px;
|
|
background-color: #3498db;
|
|
border-radius: 50%;
|
|
margin-right: 5px;
|
|
animation: pulse-blue 1.5s infinite;
|
|
}
|
|
|
|
@keyframes pulse-blue {
|
|
0% {
|
|
transform: scale(0.8);
|
|
opacity: 0.8;
|
|
}
|
|
50% {
|
|
transform: scale(1.2);
|
|
opacity: 1;
|
|
}
|
|
100% {
|
|
transform: scale(0.8);
|
|
opacity: 0.8;
|
|
}
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>Voice Assistant with CSM & Whisper</h1>
|
|
<div id="conversation"></div>
|
|
|
|
<div class="switch-container">
|
|
<span>Audio Visualizer</span>
|
|
<label class="switch">
|
|
<input type="checkbox" id="visualizerToggle" checked>
|
|
<span class="slider"></span>
|
|
</label>
|
|
</div>
|
|
|
|
<div class="visualizer-container" id="visualizerContainer">
|
|
<canvas id="visualizer"></canvas>
|
|
<div class="visualizer-label" id="visualizerLabel">Listening...</div>
|
|
</div>
|
|
|
|
<div id="controls">
|
|
<button id="recordButton">Hold to Speak</button>
|
|
</div>
|
|
|
|
<div id="audioWave" class="audio-wave hidden">
|
|
<span></span>
|
|
<span></span>
|
|
<span></span>
|
|
<span></span>
|
|
<span></span>
|
|
</div>
|
|
|
|
<div id="status">Connecting to server...</div>
|
|
|
|
<script>
|
|
const socket = io();
|
|
const recordButton = document.getElementById('recordButton');
|
|
const conversation = document.getElementById('conversation');
|
|
const status = document.getElementById('status');
|
|
const audioWave = document.getElementById('audioWave');
|
|
const visualizerToggle = document.getElementById('visualizerToggle');
|
|
const visualizerContainer = document.getElementById('visualizerContainer');
|
|
const visualizerLabel = document.getElementById('visualizerLabel');
|
|
const canvas = document.getElementById('visualizer');
|
|
const canvasCtx = canvas.getContext('2d');
|
|
|
|
let mediaRecorder;
|
|
let audioChunks = [];
|
|
let isRecording = false;
|
|
let audioSendInterval;
|
|
let sessionActive = false;
|
|
let reconnectAttempts = 0;
|
|
let audioStream = null;
|
|
let audioAnalyser = null;
|
|
let visualizerActive = true;
|
|
let visualizerAnimationId = null;
|
|
let audioBufferSource = null;
|
|
|
|
// Variables for streaming audio
|
|
let audioQueue = [];
|
|
let isPlayingAudio = false;
|
|
let currentAudioElement = null;
|
|
|
|
// Initialize audio context
|
|
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
|
|
|
// Set up canvas size
|
|
function setupCanvas() {
|
|
canvas.width = visualizerContainer.offsetWidth;
|
|
canvas.height = visualizerContainer.offsetHeight;
|
|
}
|
|
|
|
// Handle visualizer toggle
|
|
visualizerToggle.addEventListener('change', function() {
|
|
visualizerActive = this.checked;
|
|
visualizerContainer.style.display = visualizerActive ? 'block' : 'none';
|
|
|
|
if (!visualizerActive && visualizerAnimationId) {
|
|
cancelAnimationFrame(visualizerAnimationId);
|
|
visualizerAnimationId = null;
|
|
} else if (visualizerActive && audioAnalyser) {
|
|
drawVisualizer();
|
|
}
|
|
});
|
|
|
|
// Connect to server
|
|
socket.on('connect', () => {
|
|
status.textContent = 'Connected to server';
|
|
sessionActive = true;
|
|
reconnectAttempts = 0;
|
|
|
|
if (conversation.children.length > 0) {
|
|
addStatusMessage("Reconnected to server");
|
|
}
|
|
});
|
|
|
|
socket.on('disconnect', () => {
|
|
status.textContent = 'Disconnected from server';
|
|
sessionActive = false;
|
|
|
|
addStatusMessage("Disconnected from server. Attempting to reconnect...");
|
|
|
|
// Attempt to reconnect
|
|
tryReconnect();
|
|
});
|
|
|
|
function tryReconnect() {
|
|
if (reconnectAttempts < 5) {
|
|
reconnectAttempts++;
|
|
setTimeout(() => {
|
|
if (!sessionActive) {
|
|
socket.connect();
|
|
}
|
|
}, 1000 * reconnectAttempts);
|
|
} else {
|
|
addStatusMessage("Failed to reconnect. Please refresh the page.");
|
|
}
|
|
}
|
|
|
|
socket.on('ready', (data) => {
|
|
status.textContent = data.message;
|
|
setupAudioRecording();
|
|
});
|
|
|
|
socket.on('transcription', (data) => {
|
|
addMessage('user', data.text);
|
|
status.textContent = 'Assistant is thinking...';
|
|
visualizerLabel.textContent = 'Processing...';
|
|
});
|
|
|
|
socket.on('audio_response', (data) => {
|
|
// Remove any streaming indicator that might be present
|
|
const indicator = document.getElementById('current-stream-indicator');
|
|
if (indicator) indicator.remove();
|
|
|
|
// Play audio
|
|
status.textContent = 'Playing response...';
|
|
visualizerLabel.textContent = 'Assistant speaking...';
|
|
|
|
// Create audio element
|
|
const audio = new Audio('data:audio/wav;base64,' + data.audio);
|
|
currentAudioElement = audio;
|
|
|
|
// Visualize assistant audio if visualizer is active
|
|
if (visualizerActive) {
|
|
visualizeResponseAudio(audio);
|
|
}
|
|
|
|
audio.onended = () => {
|
|
status.textContent = 'Ready to record';
|
|
visualizerLabel.textContent = 'Listening...';
|
|
if (audioBufferSource) {
|
|
audioBufferSource.disconnect();
|
|
audioBufferSource = null;
|
|
}
|
|
currentAudioElement = null;
|
|
};
|
|
|
|
audio.onerror = () => {
|
|
status.textContent = 'Error playing audio';
|
|
visualizerLabel.textContent = 'Listening...';
|
|
console.error('Error playing audio response');
|
|
currentAudioElement = null;
|
|
};
|
|
|
|
audio.play().catch(err => {
|
|
status.textContent = 'Error playing audio: ' + err.message;
|
|
visualizerLabel.textContent = 'Listening...';
|
|
console.error('Error playing audio:', err);
|
|
currentAudioElement = null;
|
|
});
|
|
|
|
// Display text if not already displayed
|
|
if (!document.querySelector('.bot-message:last-child')?.textContent.includes(data.text)) {
|
|
addMessage('bot', data.text, false);
|
|
}
|
|
});
|
|
|
|
// Visualize response audio
|
|
async function visualizeResponseAudio(audioElement) {
|
|
try {
|
|
// Create media element source
|
|
const audioSource = audioContext.createMediaElementSource(audioElement);
|
|
|
|
// Create analyser
|
|
const analyser = audioContext.createAnalyser();
|
|
analyser.fftSize = 2048;
|
|
|
|
// Connect
|
|
audioSource.connect(analyser);
|
|
analyser.connect(audioContext.destination);
|
|
|
|
// Store reference
|
|
audioAnalyser = analyser;
|
|
|
|
// Start visualization
|
|
drawVisualizer();
|
|
} catch (e) {
|
|
console.error('Error setting up audio visualization:', e);
|
|
}
|
|
}
|
|
|
|
// Handle text-only responses when audio generation isn't available
|
|
socket.on('text_response', (data) => {
|
|
status.textContent = 'Received text response';
|
|
visualizerLabel.textContent = 'Text only (no audio)';
|
|
addMessage('bot', data.text, true);
|
|
setTimeout(() => {
|
|
status.textContent = 'Ready to record';
|
|
visualizerLabel.textContent = 'Listening...';
|
|
}, 1000);
|
|
});
|
|
|
|
socket.on('error', (data) => {
|
|
status.textContent = 'Error: ' + data.message;
|
|
visualizerLabel.textContent = 'Error occurred';
|
|
console.error('Server error:', data.message);
|
|
addStatusMessage("Error: " + data.message);
|
|
});
|
|
|
|
function setupAudioRecording() {
|
|
// Check if browser supports required APIs
|
|
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
|
status.textContent = 'Your browser does not support audio recording';
|
|
return;
|
|
}
|
|
|
|
// Set up canvas
|
|
setupCanvas();
|
|
|
|
// Get user media
|
|
navigator.mediaDevices.getUserMedia({ audio: true })
|
|
.then(stream => {
|
|
// Store stream for visualizer
|
|
audioStream = stream;
|
|
|
|
// Create audio analyser for visualization
|
|
const source = audioContext.createMediaStreamSource(stream);
|
|
const analyser = audioContext.createAnalyser();
|
|
analyser.fftSize = 2048;
|
|
source.connect(analyser);
|
|
|
|
// Store analyser for visualization
|
|
audioAnalyser = analyser;
|
|
|
|
// Start visualizer if enabled
|
|
if (visualizerActive) {
|
|
drawVisualizer();
|
|
}
|
|
|
|
// Setup recording with better audio quality
|
|
const options = {
|
|
mimeType: 'audio/webm',
|
|
audioBitsPerSecond: 128000
|
|
};
|
|
|
|
try {
|
|
mediaRecorder = new MediaRecorder(stream, options);
|
|
} catch (e) {
|
|
// Fallback if the specified options aren't supported
|
|
mediaRecorder = new MediaRecorder(stream);
|
|
}
|
|
|
|
mediaRecorder.ondataavailable = event => {
|
|
if (event.data.size > 0) {
|
|
audioChunks.push(event.data);
|
|
}
|
|
};
|
|
|
|
mediaRecorder.onstop = () => {
|
|
processRecording();
|
|
};
|
|
|
|
// Setup button handlers with better touch handling
|
|
recordButton.addEventListener('mousedown', startRecording);
|
|
recordButton.addEventListener('touchstart', (e) => {
|
|
e.preventDefault(); // Prevent default touch behavior
|
|
startRecording();
|
|
});
|
|
|
|
recordButton.addEventListener('mouseup', stopRecording);
|
|
recordButton.addEventListener('touchend', (e) => {
|
|
e.preventDefault();
|
|
stopRecording();
|
|
});
|
|
|
|
recordButton.addEventListener('mouseleave', stopRecording);
|
|
|
|
status.textContent = 'Ready to record';
|
|
})
|
|
.catch(err => {
|
|
status.textContent = 'Error accessing microphone: ' + err.message;
|
|
console.error('Error accessing microphone:', err);
|
|
});
|
|
}
|
|
|
|
// Draw visualizer animation
|
|
function drawVisualizer() {
|
|
if (!visualizerActive || !audioAnalyser) {
|
|
return;
|
|
}
|
|
|
|
visualizerAnimationId = requestAnimationFrame(drawVisualizer);
|
|
|
|
const bufferLength = audioAnalyser.frequencyBinCount;
|
|
const dataArray = new Uint8Array(bufferLength);
|
|
|
|
// Get frequency data
|
|
audioAnalyser.getByteFrequencyData(dataArray);
|
|
|
|
// Clear canvas
|
|
canvasCtx.fillStyle = '#000';
|
|
canvasCtx.fillRect(0, 0, canvas.width, canvas.height);
|
|
|
|
// Draw visualization based on audio data
|
|
const barWidth = (canvas.width / bufferLength) * 2.5;
|
|
let x = 0;
|
|
|
|
// Choose color based on state
|
|
let gradient;
|
|
if (isRecording) {
|
|
// Red gradient for recording
|
|
gradient = canvasCtx.createLinearGradient(0, 0, 0, canvas.height);
|
|
gradient.addColorStop(0, 'rgba(255, 0, 0, 0.8)');
|
|
gradient.addColorStop(1, 'rgba(255, 80, 80, 0.2)');
|
|
} else if (visualizerLabel.textContent === 'Assistant speaking...') {
|
|
// Blue gradient for assistant
|
|
gradient = canvasCtx.createLinearGradient(0, 0, 0, canvas.height);
|
|
gradient.addColorStop(0, 'rgba(0, 120, 255, 0.8)');
|
|
gradient.addColorStop(1, 'rgba(80, 160, 255, 0.2)');
|
|
} else {
|
|
// Green gradient for listening
|
|
gradient = canvasCtx.createLinearGradient(0, 0, 0, canvas.height);
|
|
gradient.addColorStop(0, 'rgba(0, 200, 80, 0.8)');
|
|
gradient.addColorStop(1, 'rgba(80, 255, 120, 0.2)');
|
|
}
|
|
|
|
for (let i = 0; i < bufferLength; i++) {
|
|
const barHeight = (dataArray[i] / 255) * canvas.height;
|
|
|
|
canvasCtx.fillStyle = gradient;
|
|
canvasCtx.fillRect(x, canvas.height - barHeight, barWidth, barHeight);
|
|
|
|
x += barWidth + 1;
|
|
}
|
|
}
|
|
|
|
function startRecording() {
|
|
if (!isRecording && sessionActive) {
|
|
audioChunks = [];
|
|
mediaRecorder.start(100); // Collect data in 100ms chunks
|
|
recordButton.classList.add('recording');
|
|
recordButton.textContent = 'Release to Stop';
|
|
status.textContent = 'Recording...';
|
|
visualizerLabel.textContent = 'Recording...';
|
|
audioWave.classList.remove('hidden');
|
|
isRecording = true;
|
|
|
|
socket.emit('start_speaking');
|
|
|
|
// Start sending audio chunks periodically
|
|
audioSendInterval = setInterval(() => {
|
|
if (mediaRecorder.state === 'recording') {
|
|
mediaRecorder.requestData(); // Force ondataavailable to fire
|
|
}
|
|
}, 300); // Send every 300ms
|
|
}
|
|
}
|
|
|
|
function stopRecording() {
|
|
if (isRecording) {
|
|
clearInterval(audioSendInterval);
|
|
mediaRecorder.stop();
|
|
recordButton.classList.remove('recording');
|
|
recordButton.textContent = 'Hold to Speak';
|
|
status.textContent = 'Processing speech...';
|
|
visualizerLabel.textContent = 'Processing...';
|
|
audioWave.classList.add('hidden');
|
|
isRecording = false;
|
|
}
|
|
}
|
|
|
|
function processRecording() {
|
|
if (audioChunks.length === 0) {
|
|
status.textContent = 'No audio recorded';
|
|
visualizerLabel.textContent = 'Listening...';
|
|
return;
|
|
}
|
|
|
|
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
|
|
|
|
// Convert to ArrayBuffer for processing
|
|
const fileReader = new FileReader();
|
|
fileReader.onloadend = () => {
|
|
try {
|
|
const arrayBuffer = fileReader.result;
|
|
// Convert to Float32Array - this works better with WebAudio API
|
|
const audioData = convertToFloat32(arrayBuffer);
|
|
|
|
// Convert to base64 for sending
|
|
const base64String = arrayBufferToBase64(audioData.buffer);
|
|
socket.emit('audio_chunk', { audio: base64String });
|
|
|
|
// Signal end of speech
|
|
socket.emit('stop_speaking');
|
|
} catch (e) {
|
|
console.error('Error processing audio:', e);
|
|
status.textContent = 'Error processing audio';
|
|
visualizerLabel.textContent = 'Error';
|
|
}
|
|
};
|
|
|
|
fileReader.onerror = () => {
|
|
status.textContent = 'Error reading audio data';
|
|
visualizerLabel.textContent = 'Error';
|
|
};
|
|
|
|
fileReader.readAsArrayBuffer(audioBlob);
|
|
}
|
|
|
|
function convertToFloat32(arrayBuffer) {
|
|
try {
|
|
// Ensure the buffer length is even (multiple of 2 bytes for Int16)
|
|
const bytesArray = new Uint8Array(arrayBuffer);
|
|
const evenLength = Math.floor(bytesArray.length / 2) * 2;
|
|
|
|
// If we had to adjust the length, create a new buffer with even length
|
|
let bufferToProcess = arrayBuffer;
|
|
if (bytesArray.length !== evenLength) {
|
|
console.warn(`Adjusting audio buffer from ${bytesArray.length} to ${evenLength} bytes`);
|
|
bufferToProcess = bytesArray.slice(0, evenLength).buffer;
|
|
}
|
|
|
|
// Get raw audio data as Int16 (common format for audio)
|
|
const int16Array = new Int16Array(bufferToProcess);
|
|
|
|
// Convert to Float32 (normalize between -1 and 1)
|
|
const float32Array = new Float32Array(int16Array.length);
|
|
for (let i = 0; i < int16Array.length; i++) {
|
|
float32Array[i] = int16Array[i] / 32768.0;
|
|
}
|
|
|
|
return float32Array;
|
|
} catch (e) {
|
|
console.error('Error converting audio data:', e);
|
|
// Return an empty audio buffer to avoid breaking the app
|
|
return new Float32Array(0);
|
|
}
|
|
}
|
|
|
|
function addMessage(sender, text, textOnly = false) {
|
|
const containerDiv = document.createElement('div');
|
|
containerDiv.className = sender === 'user' ? 'message-container user-message-container' : 'message-container bot-message-container';
|
|
|
|
const labelDiv = document.createElement('div');
|
|
labelDiv.className = 'message-label';
|
|
labelDiv.textContent = sender === 'user' ? 'You' : 'Assistant';
|
|
containerDiv.appendChild(labelDiv);
|
|
|
|
const messageDiv = document.createElement('div');
|
|
messageDiv.className = sender === 'user' ? 'message user-message' : 'message bot-message';
|
|
messageDiv.textContent = text;
|
|
containerDiv.appendChild(messageDiv);
|
|
|
|
if (sender === 'user') {
|
|
const infoDiv = document.createElement('div');
|
|
infoDiv.className = 'transcription-info';
|
|
infoDiv.textContent = 'Transcribed with Whisper';
|
|
containerDiv.appendChild(infoDiv);
|
|
} else if (textOnly) {
|
|
// Add indicator for text-only response
|
|
const textOnlyDiv = document.createElement('div');
|
|
textOnlyDiv.className = 'text-only-indicator';
|
|
textOnlyDiv.textContent = 'Text-only response (audio unavailable)';
|
|
containerDiv.appendChild(textOnlyDiv);
|
|
}
|
|
|
|
conversation.appendChild(containerDiv);
|
|
conversation.scrollTop = conversation.scrollHeight;
|
|
}
|
|
|
|
function addStatusMessage(message) {
|
|
const statusDiv = document.createElement('div');
|
|
statusDiv.className = 'status-message';
|
|
statusDiv.textContent = message;
|
|
conversation.appendChild(statusDiv);
|
|
conversation.scrollTop = conversation.scrollHeight;
|
|
|
|
// Auto-remove status messages after 10 seconds
|
|
setTimeout(() => {
|
|
if (conversation.contains(statusDiv)) {
|
|
statusDiv.style.opacity = '0';
|
|
statusDiv.style.transition = 'opacity 0.5s';
|
|
setTimeout(() => {
|
|
if (conversation.contains(statusDiv)) {
|
|
conversation.removeChild(statusDiv);
|
|
}
|
|
}, 500);
|
|
}
|
|
}, 10000);
|
|
}
|
|
|
|
function arrayBufferToBase64(buffer) {
|
|
let binary = '';
|
|
const bytes = new Uint8Array(buffer);
|
|
const len = bytes.byteLength;
|
|
for (let i = 0; i < len; i++) {
|
|
binary += String.fromCharCode(bytes[i]);
|
|
}
|
|
return window.btoa(binary);
|
|
}
|
|
|
|
// Handle page visibility change to avoid issues with background tabs
|
|
document.addEventListener('visibilitychange', () => {
|
|
if (document.hidden && isRecording) {
|
|
stopRecording();
|
|
}
|
|
});
|
|
|
|
// Clean disconnection when page is closed
|
|
window.addEventListener('beforeunload', () => {
|
|
if (socket && socket.connected) {
|
|
socket.disconnect();
|
|
}
|
|
|
|
if (visualizerAnimationId) {
|
|
cancelAnimationFrame(visualizerAnimationId);
|
|
}
|
|
|
|
if (currentAudioElement) {
|
|
currentAudioElement.pause();
|
|
currentAudioElement = null;
|
|
}
|
|
});
|
|
|
|
// Add a reload button for debugging
|
|
const reloadButton = document.createElement('button');
|
|
reloadButton.textContent = '🔄 Reload';
|
|
reloadButton.style.position = 'fixed';
|
|
reloadButton.style.bottom = '10px';
|
|
reloadButton.style.right = '10px';
|
|
reloadButton.style.padding = '5px 10px';
|
|
reloadButton.style.fontSize = '12px';
|
|
reloadButton.style.backgroundColor = '#f5f5f5';
|
|
reloadButton.style.border = '1px solid #ddd';
|
|
reloadButton.style.borderRadius = '4px';
|
|
reloadButton.style.cursor = 'pointer';
|
|
|
|
reloadButton.addEventListener('click', () => {
|
|
window.location.reload();
|
|
});
|
|
|
|
document.body.appendChild(reloadButton);
|
|
|
|
// Handle window resize to update canvas size
|
|
window.addEventListener('resize', () => {
|
|
setupCanvas();
|
|
});
|
|
|
|
// Add new handlers for streaming audio:
|
|
|
|
// Handle start of streaming response
|
|
socket.on('start_streaming_response', (data) => {
|
|
// Clear any existing audio queue and setup for new stream
|
|
audioQueue = [];
|
|
isPlayingAudio = false;
|
|
if (currentAudioElement) {
|
|
currentAudioElement.pause();
|
|
currentAudioElement = null;
|
|
}
|
|
|
|
// Display text message first
|
|
addMessage('bot', data.text, false);
|
|
|
|
// Update status
|
|
status.textContent = 'Assistant is responding...';
|
|
visualizerLabel.textContent = 'Streaming response...';
|
|
|
|
// Add a streaming indicator to the message
|
|
const lastBotMessage = document.querySelector('.bot-message-container:last-child .message');
|
|
const streamingIndicator = document.createElement('span');
|
|
streamingIndicator.className = 'streaming-indicator';
|
|
streamingIndicator.id = 'current-stream-indicator';
|
|
lastBotMessage.insertAdjacentElement('afterbegin', streamingIndicator);
|
|
|
|
// Request first chunk
|
|
socket.emit('request_audio_chunk');
|
|
});
|
|
|
|
// Receive audio chunks
|
|
socket.on('audio_chunk', (data) => {
|
|
// Add chunk to queue
|
|
audioQueue.push(data.audio);
|
|
|
|
// Start playing if not already playing
|
|
if (!isPlayingAudio) {
|
|
playNextAudioChunk();
|
|
}
|
|
|
|
// If not the last chunk, request the next one
|
|
if (!data.is_last) {
|
|
socket.emit('request_audio_chunk');
|
|
}
|
|
});
|
|
|
|
// Handle wait for chunk message
|
|
socket.on('wait_for_chunk', () => {
|
|
console.log('Waiting for more audio chunks...');
|
|
// Request again after a short delay
|
|
setTimeout(() => {
|
|
socket.emit('request_audio_chunk');
|
|
}, 100);
|
|
});
|
|
|
|
// Handle end of streaming
|
|
socket.on('end_streaming', () => {
|
|
console.log('Audio streaming completed');
|
|
|
|
// Remove streaming indicator
|
|
const indicator = document.getElementById('current-stream-indicator');
|
|
if (indicator) indicator.remove();
|
|
|
|
// If no more chunks are playing, update status
|
|
if (audioQueue.length === 0 && !isPlayingAudio) {
|
|
status.textContent = 'Ready to record';
|
|
visualizerLabel.textContent = 'Listening...';
|
|
}
|
|
});
|
|
|
|
// Function to play audio chunks in sequence
|
|
function playNextAudioChunk() {
|
|
if (audioQueue.length === 0) {
|
|
isPlayingAudio = false;
|
|
if (!isRecording) {
|
|
status.textContent = 'Ready to record';
|
|
visualizerLabel.textContent = 'Listening...';
|
|
}
|
|
return;
|
|
}
|
|
|
|
isPlayingAudio = true;
|
|
const audioChunk = audioQueue.shift();
|
|
|
|
// Create audio element
|
|
const audio = new Audio('data:audio/wav;base64,' + audioChunk);
|
|
currentAudioElement = audio;
|
|
|
|
// Visualize if active
|
|
if (visualizerActive) {
|
|
visualizeResponseAudio(audio);
|
|
}
|
|
|
|
// When this chunk ends, play the next one
|
|
audio.onended = () => {
|
|
if (audioBufferSource) {
|
|
audioBufferSource.disconnect();
|
|
audioBufferSource = null;
|
|
}
|
|
|
|
// Play next chunk if available
|
|
currentAudioElement = null;
|
|
playNextAudioChunk();
|
|
};
|
|
|
|
// Handle errors
|
|
audio.onerror = (err) => {
|
|
console.error('Error playing audio chunk:', err);
|
|
// Try next chunk
|
|
currentAudioElement = null;
|
|
playNextAudioChunk();
|
|
};
|
|
|
|
// Start playback
|
|
audio.play().catch(err => {
|
|
console.error('Error starting audio playback:', err);
|
|
currentAudioElement = null;
|
|
playNextAudioChunk();
|
|
});
|
|
}
|
|
</script>
|
|
</body>
|
|
</html> |