Files
HooHacks-12/Backend/index.html
2025-03-30 08:59:26 -04:00

963 lines
32 KiB
HTML

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Live Voice Assistant with CSM</title>
<script src="https://cdn.socket.io/4.6.0/socket.io.min.js"></script>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
background-color: #f5f7fa;
color: #333;
}
h1 {
color: #2c3e50;
text-align: center;
margin-bottom: 30px;
}
#conversation {
height: 400px;
border: 1px solid #ddd;
border-radius: 10px;
padding: 20px;
margin-bottom: 20px;
overflow-y: auto;
background-color: white;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
.message-container {
display: flex;
flex-direction: column;
margin-bottom: 15px;
}
.user-message-container {
align-items: flex-end;
}
.bot-message-container {
align-items: flex-start;
}
.message {
max-width: 80%;
padding: 12px;
border-radius: 18px;
position: relative;
word-break: break-word;
}
.user-message {
background-color: #dcf8c6;
color: #000;
border-bottom-right-radius: 4px;
}
.bot-message {
background-color: #f1f0f0;
color: #000;
border-bottom-left-radius: 4px;
}
.message-label {
font-size: 0.8em;
margin-bottom: 4px;
color: #657786;
}
#controls {
display: flex;
gap: 10px;
justify-content: center;
margin-bottom: 15px;
}
button {
padding: 12px 24px;
font-size: 16px;
cursor: pointer;
border-radius: 50px;
border: none;
outline: none;
transition: all 0.3s ease;
}
#talkButton {
background-color: #4CAF50;
color: white;
width: 200px;
box-shadow: 0 4px 8px rgba(76, 175, 80, 0.3);
}
#talkButton:hover {
background-color: #45a049;
transform: translateY(-2px);
}
#talkButton.recording {
background-color: #f44336;
animation: pulse 1.5s infinite;
box-shadow: 0 4px 8px rgba(244, 67, 54, 0.3);
}
@keyframes pulse {
0% {
transform: scale(1);
}
50% {
transform: scale(1.05);
}
100% {
transform: scale(1);
}
}
#status {
text-align: center;
margin-top: 15px;
font-style: italic;
color: #657786;
}
.hidden {
display: none;
}
.transcription-info {
font-size: 0.8em;
color: #888;
margin-top: 4px;
text-align: right;
}
.text-only-indicator {
font-size: 0.8em;
color: #e74c3c;
margin-top: 4px;
font-style: italic;
}
.status-message {
text-align: center;
padding: 8px;
margin: 10px 0;
background-color: #f8f9fa;
border-radius: 5px;
color: #666;
font-size: 0.9em;
}
/* Audio visualizer styles */
.visualizer-container {
width: 100%;
height: 120px;
margin: 15px 0;
border-radius: 10px;
overflow: hidden;
background-color: #000;
position: relative;
}
#visualizer {
width: 100%;
height: 100%;
display: block;
}
.visualizer-label {
position: absolute;
top: 10px;
left: 10px;
color: white;
font-size: 0.8em;
background-color: rgba(0, 0, 0, 0.5);
padding: 4px 8px;
border-radius: 4px;
}
/* Real-time transcription */
.live-transcription {
position: absolute;
bottom: 10px;
left: 10px;
right: 10px;
color: white;
font-size: 0.9em;
background-color: rgba(0, 0, 0, 0.5);
padding: 8px;
border-radius: 4px;
text-align: center;
max-height: 60px;
overflow-y: auto;
font-style: italic;
}
/* Wave animation for active speaker */
.speaking-wave {
display: inline-block;
margin-left: 5px;
vertical-align: middle;
}
.speaking-wave span {
display: inline-block;
width: 3px;
height: 12px;
margin: 0 1px;
background-color: currentColor;
border-radius: 1px;
animation: speakingWave 1s infinite ease-in-out;
}
.speaking-wave span:nth-child(2) {
animation-delay: 0.1s;
}
.speaking-wave span:nth-child(3) {
animation-delay: 0.2s;
}
.speaking-wave span:nth-child(4) {
animation-delay: 0.3s;
}
@keyframes speakingWave {
0%, 100% {
height: 4px;
}
50% {
height: 12px;
}
}
/* Modern switch for visualizer toggle */
.switch-container {
display: flex;
align-items: center;
justify-content: center;
margin-bottom: 10px;
}
.switch {
position: relative;
display: inline-block;
width: 50px;
height: 24px;
margin-left: 10px;
}
.switch input {
opacity: 0;
width: 0;
height: 0;
}
.slider {
position: absolute;
cursor: pointer;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: #ccc;
transition: .4s;
border-radius: 24px;
}
.slider:before {
position: absolute;
content: "";
height: 16px;
width: 16px;
left: 4px;
bottom: 4px;
background-color: white;
transition: .4s;
border-radius: 50%;
}
input:checked + .slider {
background-color: #4CAF50;
}
input:checked + .slider:before {
transform: translateX(26px);
}
/* Toast notification for feedback */
.toast {
position: fixed;
bottom: 20px;
left: 50%;
transform: translateX(-50%);
background-color: #333;
color: white;
padding: 12px 20px;
border-radius: 4px;
max-width: 80%;
z-index: 1000;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
animation: fadeIn 0.3s, fadeOut 0.3s 2.7s forwards;
text-align: center;
}
.toast.error {
background-color: #e74c3c;
}
.toast.info {
background-color: #3498db;
}
@keyframes fadeIn {
from { opacity: 0; transform: translate(-50%, 20px); }
to { opacity: 1; transform: translate(-50%, 0); }
}
@keyframes fadeOut {
from { opacity: 1; transform: translate(-50%, 0); }
to { opacity: 0; transform: translate(-50%, 20px); }
}
</style>
</head>
<body>
<h1>Live Voice Assistant with CSM</h1>
<div id="conversation"></div>
<div class="switch-container">
<span>Audio Visualizer</span>
<label class="switch">
<input type="checkbox" id="visualizerToggle" checked>
<span class="slider"></span>
</label>
</div>
<div class="visualizer-container" id="visualizerContainer">
<canvas id="visualizer"></canvas>
<div class="visualizer-label" id="visualizerLabel">Listening...</div>
<div class="live-transcription" id="liveTranscription"></div>
</div>
<div id="controls">
<button id="talkButton">Press to Talk</button>
</div>
<div id="status">Connecting to server...</div>
<script>
const socket = io();
const talkButton = document.getElementById('talkButton');
const conversation = document.getElementById('conversation');
const status = document.getElementById('status');
const visualizerToggle = document.getElementById('visualizerToggle');
const visualizerContainer = document.getElementById('visualizerContainer');
const visualizerLabel = document.getElementById('visualizerLabel');
const liveTranscription = document.getElementById('liveTranscription');
const canvas = document.getElementById('visualizer');
const canvasCtx = canvas.getContext('2d');
// Audio processing variables
let audioContext;
let mediaStream = null;
let mediaRecorder = null;
let audioProcessor = null;
let isStreaming = false;
let isAITalking = false;
let sessionActive = false;
let reconnectAttempts = 0;
let audioAnalyser = null;
let visualizerActive = true;
let visualizerAnimationId = null;
let streamingInterval = null;
let audioQueue = [];
let audioWorkletSupported = false;
let sampleRate = 24000; // Default, will be updated from server
// Set up canvas size
function setupCanvas() {
canvas.width = visualizerContainer.offsetWidth;
canvas.height = visualizerContainer.offsetHeight;
}
// Handle visualizer toggle
visualizerToggle.addEventListener('change', function() {
visualizerActive = this.checked;
visualizerContainer.style.display = visualizerActive ? 'block' : 'none';
if (!visualizerActive && visualizerAnimationId) {
cancelAnimationFrame(visualizerAnimationId);
visualizerAnimationId = null;
} else if (visualizerActive && audioAnalyser) {
drawVisualizer();
}
});
// Connect to server
socket.on('connect', () => {
status.textContent = 'Connected to server';
sessionActive = true;
reconnectAttempts = 0;
// Initialize audio context
initializeAudio();
// Ask server for configuration
socket.emit('get_config');
});
socket.on('disconnect', () => {
status.textContent = 'Disconnected from server';
sessionActive = false;
isStreaming = false;
isAITalking = false;
showToast('Disconnected from server. Attempting to reconnect...', 'error');
stopAudioStream();
// Attempt to reconnect
tryReconnect();
});
socket.on('config', (data) => {
// Set configuration from server
sampleRate = data.sample_rate || sampleRate;
status.textContent = 'Ready to talk';
});
// Initialize audio
function initializeAudio() {
try {
// Create audio context
audioContext = new (window.AudioContext || window.webkitAudioContext)({
latencyHint: 'interactive',
sampleRate: 44100
});
// Check for AudioWorklet support
audioWorkletSupported = !!audioContext.audioWorklet;
// Setup canvas
setupCanvas();
// Setup audio stream
setupAudioStream();
} catch (e) {
console.error('Error initializing audio:', e);
status.textContent = 'Error initializing audio';
showToast('Error initializing audio: ' + e.message, 'error');
}
}
function setupAudioStream() {
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
status.textContent = 'Your browser does not support audio recording';
showToast('Your browser does not support audio recording', 'error');
return;
}
navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
}
})
.then(stream => {
mediaStream = stream;
// Create analyzer for visualizer
const source = audioContext.createMediaStreamSource(stream);
audioAnalyser = audioContext.createAnalyser();
audioAnalyser.fftSize = 2048;
source.connect(audioAnalyser);
// Start visualizer
if (visualizerActive) {
drawVisualizer();
}
// Create a ScriptProcessor or use AudioWorklet if supported
if (audioWorkletSupported) {
setupAudioWorklet(stream);
} else {
setupScriptProcessor(stream);
}
// Setup talk button
talkButton.addEventListener('click', toggleTalking);
// Setup keyboard shortcuts
document.addEventListener('keydown', (e) => {
if (e.code === 'Space' && !e.repeat && !isInputActive()) {
e.preventDefault();
if (!isStreaming) {
startTalking();
}
}
});
document.addEventListener('keyup', (e) => {
if (e.code === 'Space' && isStreaming && !isInputActive()) {
e.preventDefault();
stopTalking();
}
});
status.textContent = 'Ready to talk (press button or spacebar)';
})
.catch(err => {
console.error('Error accessing microphone:', err);
status.textContent = 'Error accessing microphone';
showToast('Error accessing microphone: ' + err.message, 'error');
});
}
// Check if user is typing in an input field
function isInputActive() {
const activeElement = document.activeElement;
return activeElement.tagName === 'INPUT' ||
activeElement.tagName === 'TEXTAREA' ||
activeElement.isContentEditable;
}
// Setup AudioWorklet for modern browsers
async function setupAudioWorklet(stream) {
try {
// Create a processor worklet
await audioContext.audioWorklet.addModule('processor.js');
// Create worklet node
const workletNode = new AudioWorkletNode(audioContext, 'audio-processor', {
numberOfInputs: 1,
numberOfOutputs: 1,
processorOptions: {
sampleRate: audioContext.sampleRate
}
});
// Handle processed audio data
workletNode.port.onmessage = (event) => {
if (isStreaming && !isAITalking) {
const audioData = event.data.audioData;
if (audioData && audioData.length > 0) {
sendAudioChunk(audioData);
}
}
};
// Connect nodes
const source = audioContext.createMediaStreamSource(stream);
source.connect(workletNode);
// Store for later
audioProcessor = workletNode;
} catch (e) {
console.error('Error setting up AudioWorklet:', e);
// Fall back to ScriptProcessor
setupScriptProcessor(stream);
}
}
// Fallback to ScriptProcessor for older browsers
function setupScriptProcessor(stream) {
// Create script processor
const bufferSize = 4096;
const scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
scriptProcessor.onaudioprocess = (audioProcessingEvent) => {
if (isStreaming && !isAITalking) {
const inputBuffer = audioProcessingEvent.inputBuffer;
const audioData = inputBuffer.getChannelData(0);
// Convert to format expected by server
const audioArray = new Float32Array(audioData);
// Send to server
sendAudioChunk(audioArray);
}
// Need to copy input to output to keep the node active
const outputBuffer = audioProcessingEvent.outputBuffer;
for (let channel = 0; channel < outputBuffer.numberOfChannels; channel++) {
const outputData = outputBuffer.getChannelData(channel);
for (let sample = 0; sample < outputBuffer.length; sample++) {
// Silent output
outputData[sample] = 0;
}
}
};
// Connect nodes
const source = audioContext.createMediaStreamSource(stream);
source.connect(scriptProcessor);
scriptProcessor.connect(audioContext.destination);
// Store for later
audioProcessor = scriptProcessor;
}
// Toggle talking state
function toggleTalking() {
if (isStreaming) {
stopTalking();
} else {
startTalking();
}
}
// Start talking to the assistant
function startTalking() {
if (!sessionActive || isAITalking) return;
isStreaming = true;
talkButton.classList.add('recording');
talkButton.textContent = 'Release to Stop';
status.textContent = 'Listening...';
visualizerLabel.textContent = 'You are speaking...';
// Resume audio context if suspended
if (audioContext.state === 'suspended') {
audioContext.resume();
}
// Tell server we're starting to speak
socket.emit('start_speaking');
// Clear previous transcriptions
liveTranscription.textContent = '';
liveTranscription.classList.remove('hidden');
}
// Stop talking to the assistant
function stopTalking() {
if (!isStreaming) return;
isStreaming = false;
talkButton.classList.remove('recording');
talkButton.textContent = 'Press to Talk';
status.textContent = 'Processing...';
// Tell server we're done speaking
socket.emit('stop_speaking');
// Hide live transcription temporarily
liveTranscription.classList.add('hidden');
}
// Send audio chunk to server
function sendAudioChunk(audioData) {
// Convert to base64 for sending over websocket
const buffer = new ArrayBuffer(audioData.length * 4);
const view = new DataView(buffer);
for (let i = 0; i < audioData.length; i++) {
view.setFloat32(i * 4, audioData[i], true);
}
const base64Audio = arrayBufferToBase64(buffer);
// Send to server
socket.emit('audio_stream', {
audio: base64Audio,
sampleRate: audioContext.sampleRate
});
}
// Convert ArrayBuffer to base64
function arrayBufferToBase64(buffer) {
const binary = new Uint8Array(buffer);
let binaryString = '';
for (let i = 0; i < binary.byteLength; i++) {
binaryString += String.fromCharCode(binary[i]);
}
return btoa(binaryString);
}
// Clean up audio stream
function stopAudioStream() {
if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop());
mediaStream = null;
}
if (audioProcessor) {
if (audioProcessor.disconnect) {
audioProcessor.disconnect();
}
audioProcessor = null;
}
if (visualizerAnimationId) {
cancelAnimationFrame(visualizerAnimationId);
visualizerAnimationId = null;
}
}
// Handle real-time transcription
socket.on('live_transcription', (data) => {
liveTranscription.textContent = data.text || '...';
liveTranscription.classList.remove('hidden');
});
// Handle final transcription
socket.on('transcription', (data) => {
addMessage('user', data.text);
status.textContent = 'Assistant is thinking...';
visualizerLabel.textContent = 'Waiting for response...';
});
// Handle AI response start
socket.on('ai_response_start', (data) => {
isAITalking = true;
status.textContent = 'Assistant is responding...';
visualizerLabel.textContent = 'Assistant is speaking...';
// Create container for the response
addMessage('bot', data.text);
// Add speaking indicator
const lastBotMessage = document.querySelector('.bot-message-container:last-child .message');
const speakingWave = document.createElement('span');
speakingWave.className = 'speaking-wave';
speakingWave.innerHTML = '<span></span><span></span><span></span><span></span>';
speakingWave.id = 'current-speaking-wave';
lastBotMessage.appendChild(speakingWave);
});
// Handle streaming audio from assistant
socket.on('ai_audio_chunk', (data) => {
// Play audio chunk
playAudioChunk(data.audio);
});
// Handle AI response end
socket.on('ai_response_end', () => {
isAITalking = false;
status.textContent = 'Ready to talk';
visualizerLabel.textContent = 'Listening...';
// Remove speaking indicator
const speakingWave = document.getElementById('current-speaking-wave');
if (speakingWave) {
speakingWave.remove();
}
// Re-enable talk button if it was disabled
talkButton.disabled = false;
});
// Legacy handler for text-only responses
socket.on('text_response', (data) => {
addMessage('bot', data.text, true);
isAITalking = false;
status.textContent = 'Ready to talk';
visualizerLabel.textContent = 'Listening...';
});
// Play audio chunk
function playAudioChunk(base64Audio) {
try {
const audio = new Audio('data:audio/wav;base64,' + base64Audio);
// Visualize if active
if (visualizerActive) {
visualizeResponseAudio(audio);
}
audio.play().catch(err => {
console.error('Error playing audio chunk:', err);
});
} catch (e) {
console.error('Error playing audio chunk:', e);
}
}
// Visualize audio response
async function visualizeResponseAudio(audioElement) {
try {
// Create media element source
const audioSource = audioContext.createMediaElementSource(audioElement);
// Create analyser for visualization
const analyser = audioContext.createAnalyser();
analyser.fftSize = 2048;
// Connect
audioSource.connect(analyser);
analyser.connect(audioContext.destination);
// Store reference for visualization
audioAnalyser = analyser;
// Start visualization if not already running
if (visualizerActive && !visualizerAnimationId) {
drawVisualizer();
}
} catch (e) {
console.error('Error setting up visualization:', e);
}
}
// Draw visualizer
function drawVisualizer() {
if (!visualizerActive || !audioAnalyser) {
return;
}
visualizerAnimationId = requestAnimationFrame(drawVisualizer);
const bufferLength = audioAnalyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
// Get frequency data
audioAnalyser.getByteFrequencyData(dataArray);
// Clear canvas
canvasCtx.fillStyle = '#000';
canvasCtx.fillRect(0, 0, canvas.width, canvas.height);
// Calculate bar width based on canvas size and buffer length
const barWidth = (canvas.width / (bufferLength * 0.5)) - 1;
let x = 0;
// Choose color based on who is speaking
let gradient;
if (isStreaming && !isAITalking) {
// User speaking - green
gradient = canvasCtx.createLinearGradient(0, 0, 0, canvas.height);
gradient.addColorStop(0, 'rgba(0, 255, 0, 0.8)');
gradient.addColorStop(1, 'rgba(0, 100, 0, 0.2)');
} else if (isAITalking) {
// AI speaking - blue
gradient = canvasCtx.createLinearGradient(0, 0, 0, canvas.height);
gradient.addColorStop(0, 'rgba(0, 100, 255, 0.8)');
gradient.addColorStop(1, 'rgba(0, 40, 100, 0.2)');
} else {
// Idle - purple/gray
gradient = canvasCtx.createLinearGradient(0, 0, 0, canvas.height);
gradient.addColorStop(0, 'rgba(100, 100, 200, 0.8)');
gradient.addColorStop(1, 'rgba(40, 40, 80, 0.2)');
}
// Draw bars
for (let i = 0; i < bufferLength; i++) {
const barHeight = dataArray[i] * (canvas.height / 255) * 0.8;
canvasCtx.fillStyle = gradient;
canvasCtx.fillRect(x, canvas.height - barHeight, barWidth, barHeight);
x += barWidth + 1;
// Only draw a portion of the bars for performance
if (x > canvas.width) break;
}
}
// Add message to conversation
function addMessage(sender, text, textOnly = false) {
const containerDiv = document.createElement('div');
containerDiv.className = sender === 'user' ? 'message-container user-message-container' : 'message-container bot-message-container';
const labelDiv = document.createElement('div');
labelDiv.className = 'message-label';
labelDiv.textContent = sender === 'user' ? 'You' : 'Assistant';
containerDiv.appendChild(labelDiv);
const messageDiv = document.createElement('div');
messageDiv.className = sender === 'user' ? 'message user-message' : 'message bot-message';
messageDiv.textContent = text;
containerDiv.appendChild(messageDiv);
if (sender === 'user') {
const infoDiv = document.createElement('div');
infoDiv.className = 'transcription-info';
infoDiv.textContent = 'Transcribed with Whisper';
containerDiv.appendChild(infoDiv);
} else if (textOnly) {
const textOnlyDiv = document.createElement('div');
textOnlyDiv.className = 'text-only-indicator';
textOnlyDiv.textContent = 'Text-only response (audio unavailable)';
containerDiv.appendChild(textOnlyDiv);
}
conversation.appendChild(containerDiv);
conversation.scrollTop = conversation.scrollHeight;
}
// Show a toast notification
function showToast(message, type = 'info') {
// Remove any existing toasts
const existingToasts = document.querySelectorAll('.toast');
existingToasts.forEach(toast => toast.remove());
// Create toast
const toast = document.createElement('div');
toast.className = `toast ${type}`;
toast.textContent = message;
document.body.appendChild(toast);
// Remove after 3 seconds
setTimeout(() => {
if (document.body.contains(toast)) {
document.body.removeChild(toast);
}
}, 3000);
}
// Try to reconnect to server
function tryReconnect() {
if (reconnectAttempts < 5) {
reconnectAttempts++;
setTimeout(() => {
if (!sessionActive) {
socket.connect();
}
}, reconnectAttempts * 1000);
} else {
showToast('Failed to reconnect. Please refresh the page.', 'error');
}
}
// Handle errors
socket.on('error', (data) => {
status.textContent = 'Error: ' + data.message;
visualizerLabel.textContent = 'Error occurred';
showToast(data.message, 'error');
console.error('Server error:', data.message);
});
// Handle visibility change to pause when tab is not visible
document.addEventListener('visibilitychange', () => {
if (document.hidden) {
if (isStreaming) {
stopTalking();
}
}
});
// Handle page unload
window.addEventListener('beforeunload', () => {
stopAudioStream();
if (socket && socket.connected) {
socket.disconnect();
}
});
// Add window resize handler
window.addEventListener('resize', () => {
setupCanvas();
});
// Initial setup
setupCanvas();
</script>
</body>
</html>