Files
HooHacks-12/Backend/index.html
2025-03-30 11:17:24 -04:00

723 lines
28 KiB
HTML

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Voice Chat</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
margin: 0;
padding: 0;
background-color: #f5f5f5;
color: #333;
}
.container {
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
header {
text-align: center;
margin-bottom: 30px;
}
h1 {
color: #2c3e50;
}
.status-bar {
background-color: #2c3e50;
color: white;
padding: 10px;
border-radius: 5px;
margin-bottom: 20px;
display: flex;
justify-content: space-between;
align-items: center;
}
.status-indicator {
display: flex;
align-items: center;
}
.status-dot {
height: 10px;
width: 10px;
border-radius: 50%;
margin-right: 8px;
}
.status-dot.connected { background-color: #2ecc71; }
.status-dot.connecting { background-color: #f39c12; }
.status-dot.disconnected { background-color: #e74c3c; }
.conversation {
background-color: white;
border-radius: 10px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
height: 400px;
padding: 20px;
overflow-y: auto;
margin-bottom: 20px;
}
.message {
margin-bottom: 15px;
padding: 10px 15px;
border-radius: 18px;
max-width: 80%;
word-wrap: break-word;
}
.user-message {
background-color: #e3f2fd;
margin-left: auto;
border-bottom-right-radius: 5px;
}
.ai-message {
background-color: #f0f0f0;
margin-right: auto;
border-bottom-left-radius: 5px;
}
.controls {
display: flex;
justify-content: center;
gap: 15px;
margin-bottom: 20px;
}
button {
background-color: #2c3e50;
color: white;
border: none;
padding: 12px 24px;
border-radius: 25px;
cursor: pointer;
font-size: 16px;
transition: all 0.2s;
display: flex;
align-items: center;
justify-content: center;
gap: 8px;
}
button:hover {
background-color: #1a252f;
}
button:disabled {
background-color: #95a5a6;
cursor: not-allowed;
}
.button-icon {
width: 20px;
height: 20px;
}
.mic-animation {
width: 60px;
height: 60px;
border-radius: 50%;
background-color: rgba(231, 76, 60, 0.2);
display: flex;
align-items: center;
justify-content: center;
animation: pulse 1.5s infinite;
margin: 0 auto 15px;
}
@keyframes pulse {
0% {
transform: scale(0.95);
box-shadow: 0 0 0 0 rgba(231, 76, 60, 0.5);
}
70% {
transform: scale(1);
box-shadow: 0 0 0 15px rgba(231, 76, 60, 0);
}
100% {
transform: scale(0.95);
box-shadow: 0 0 0 0 rgba(231, 76, 60, 0);
}
}
.settings {
margin-top: 20px;
padding: 15px;
background-color: white;
border-radius: 10px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
.settings h3 {
margin-top: 0;
color: #2c3e50;
border-bottom: 1px solid #eee;
padding-bottom: 10px;
}
.settings-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 15px;
}
.setting-item {
padding: 10px;
background-color: #f9f9f9;
border-radius: 5px;
}
.audio-visualizer {
height: 50px;
width: 100%;
background-color: #f0f0f0;
margin-top: 10px;
border-radius: 5px;
overflow: hidden;
}
.info-message {
text-align: center;
color: #7f8c8d;
margin: 10px 0;
font-style: italic;
}
.loading {
text-align: center;
margin: 20px 0;
}
.spinner {
border: 4px solid rgba(0, 0, 0, 0.1);
border-radius: 50%;
border-top: 4px solid #2c3e50;
width: 30px;
height: 30px;
animation: spin 1s linear infinite;
margin: 0 auto 10px;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
footer {
text-align: center;
margin-top: 30px;
padding: 20px;
color: #7f8c8d;
font-size: 14px;
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>AI Voice Assistant</h1>
</header>
<div class="status-bar">
<div class="status-indicator">
<div class="status-dot disconnected" id="connection-dot"></div>
<span id="connection-status">Disconnected</span>
</div>
<div id="runtime-info">
<span id="models-status"></span>
</div>
</div>
<div class="conversation" id="conversation">
<div class="info-message">Your conversation will appear here.</div>
</div>
<div id="mic-animation" class="mic-animation" style="display: none;">
<svg width="24" height="24" viewBox="0 0 24 24" fill="white">
<path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3zm5.91-3c-.49 0-.9.36-.98.85C16.52 14.2 14.47 16 12 16s-4.52-1.8-4.93-4.15c-.08-.49-.49-.85-.98-.85-.61 0-1.09.54-1 1.14.49 3 2.89 5.35 5.91 5.78V20c0 .55.45 1 1 1s1-.45 1-1v-2.08c3.02-.43 5.42-2.78 5.91-5.78.1-.6-.39-1.14-1-1.14z"></path>
</svg>
</div>
<div class="controls">
<button id="start-button" disabled>
<svg class="button-icon" viewBox="0 0 24 24" fill="white">
<path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3zm5.91-3c-.49 0-.9.36-.98.85C16.52 14.2 14.47 16 12 16s-4.52-1.8-4.93-4.15c-.08-.49-.49-.85-.98-.85-.61 0-1.09.54-1 1.14.49 3 2.89 5.35 5.91 5.78V20c0 .55.45 1 1 1s1-.45 1-1v-2.08c3.02-.43 5.42-2.78 5.91-5.78.1-.6-.39-1.14-1-1.14z"></path>
</svg>
Start Listening
</button>
<button id="interrupt-button" disabled>
<svg class="button-icon" viewBox="0 0 24 24" fill="white">
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm1 15h-2v-2h2v2zm0-4h-2V7h2v6z"></path>
</svg>
Interrupt
</button>
</div>
<div id="loading" class="loading" style="display: none;">
<div class="spinner"></div>
<p id="loading-text">Processing your speech...</p>
</div>
<div class="settings">
<h3>Status</h3>
<div class="settings-grid">
<div class="setting-item">
<div><strong>Whisper Model:</strong> <span id="whisper-status">Loading...</span></div>
</div>
<div class="setting-item">
<div><strong>CSM Audio Model:</strong> <span id="csm-status">Loading...</span></div>
</div>
<div class="setting-item">
<div><strong>LLM Model:</strong> <span id="llm-status">Loading...</span></div>
</div>
<div class="setting-item">
<div><strong>WebRTC:</strong> <span id="webrtc-status">Not Connected</span></div>
</div>
</div>
</div>
</div>
<footer>
<p>AI Voice Assistant | Using Fast Whisper, Llama 3.2, and CSM Audio Models</p>
</footer>
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.6.1/socket.io.min.js"></script>
<script>
document.addEventListener('DOMContentLoaded', () => {
// DOM Elements
const startButton = document.getElementById('start-button');
const interruptButton = document.getElementById('interrupt-button');
const conversationDiv = document.getElementById('conversation');
const connectionDot = document.getElementById('connection-dot');
const connectionStatus = document.getElementById('connection-status');
const whisperStatus = document.getElementById('whisper-status');
const csmStatus = document.getElementById('csm-status');
const llmStatus = document.getElementById('llm-status');
const micAnimation = document.getElementById('mic-animation');
const loadingDiv = document.getElementById('loading');
const loadingText = document.getElementById('loading-text');
// State variables
let socket;
let isConnected = false;
let isListening = false;
let isAiSpeaking = false;
let audioContext;
let mediaStream;
let audioProcessor;
// Audio playback
let audioQueue = [];
let isPlaying = false;
// Configuration variables
let serverSampleRate = 24000;
let clientSampleRate = 44100;
// Initialize the application
initApp();
// Main initialization function
function initApp() {
updateConnectionStatus('connecting');
setupSocketConnection();
setupEventListeners();
}
// Set up Socket.IO connection with server
function setupSocketConnection() {
socket = io();
socket.on('connect', () => {
console.log('Connected to server');
updateConnectionStatus('connected');
isConnected = true;
});
socket.on('disconnect', () => {
console.log('Disconnected from server');
updateConnectionStatus('disconnected');
isConnected = false;
cleanupAudio();
});
socket.on('session_ready', (data) => {
console.log('Session ready:', data);
updateModelStatus(data);
clientSampleRate = data.client_sample_rate;
serverSampleRate = data.server_sample_rate;
// Enable start button if models are available
if (data.whisper_available && data.csm_available) {
startButton.disabled = false;
addInfoMessage('Ready for conversation. Click "Start Listening" to begin.');
} else {
addInfoMessage('Some models are not available. Voice chat might not work properly.');
}
});
socket.on('ready_for_speech', (data) => {
console.log('Ready for speech:', data);
startButton.disabled = false;
addInfoMessage('Ready for conversation. Click "Start Listening" to begin.');
});
socket.on('transcription', (data) => {
console.log('Transcription:', data);
addUserMessage(data.text);
loadingDiv.style.display = 'none';
});
socket.on('ai_response_text', (data) => {
console.log('AI response text:', data);
addAIMessage(data.text);
loadingDiv.style.display = 'none';
});
socket.on('ai_speech_start', () => {
console.log('AI started speaking');
isAiSpeaking = true;
interruptButton.disabled = false;
});
socket.on('ai_speech_chunk', (data) => {
console.log('Received AI speech chunk');
playAudioChunk(data.audio, data.is_last);
});
socket.on('ai_speech_end', () => {
console.log('AI stopped speaking');
isAiSpeaking = false;
interruptButton.disabled = true;
});
socket.on('user_speech_start', () => {
console.log('User speech detected');
showSpeakingIndicator(true);
});
socket.on('processing_speech', () => {
console.log('Processing speech');
showSpeakingIndicator(false);
showLoadingIndicator('Processing your speech...');
});
socket.on('no_speech_detected', () => {
console.log('No speech detected');
hideLoadingIndicator();
addInfoMessage('No speech detected. Please try again.');
});
socket.on('ai_interrupted', () => {
console.log('AI interrupted');
clearAudioQueue();
isAiSpeaking = false;
interruptButton.disabled = true;
});
socket.on('ai_interrupted_by_user', () => {
console.log('AI interrupted by user');
clearAudioQueue();
isAiSpeaking = false;
interruptButton.disabled = true;
addInfoMessage('AI interrupted by your speech');
});
socket.on('error', (data) => {
console.error('Server error:', data);
hideLoadingIndicator();
addInfoMessage(`Error: ${data.message}`);
});
}
// Set up UI event listeners
function setupEventListeners() {
startButton.addEventListener('click', toggleListening);
interruptButton.addEventListener('click', interruptAI);
}
// Update UI connection status
function updateConnectionStatus(status) {
connectionDot.className = 'status-dot ' + status;
switch (status) {
case 'connected':
connectionStatus.textContent = 'Connected';
break;
case 'connecting':
connectionStatus.textContent = 'Connecting...';
break;
case 'disconnected':
connectionStatus.textContent = 'Disconnected';
startButton.disabled = true;
interruptButton.disabled = true;
break;
}
}
// Update model status indicators
function updateModelStatus(data) {
whisperStatus.textContent = data.whisper_available ? 'Available' : 'Not Available';
whisperStatus.style.color = data.whisper_available ? 'green' : 'red';
csmStatus.textContent = data.csm_available ? 'Available' : 'Not Available';
csmStatus.style.color = data.csm_available ? 'green' : 'red';
llmStatus.textContent = data.llm_available ? 'Available' : 'Not Available';
llmStatus.style.color = data.llm_available ? 'green' : 'red';
}
// Toggle audio listening
function toggleListening() {
if (isListening) {
stopListening();
} else {
startListening();
}
}
// Start listening for audio
async function startListening() {
if (!isConnected) return;
try {
await initAudio();
isListening = true;
startButton.textContent = 'Stop Listening';
startButton.innerHTML = `
<svg class="button-icon" viewBox="0 0 24 24" fill="white">
<path d="M6 6h12v12H6z"></path>
</svg>
Stop Listening
`;
} catch (error) {
console.error('Error starting audio:', error);
addInfoMessage('Error accessing microphone. Please check permissions.');
}
}
// Stop listening for audio
function stopListening() {
cleanupAudio();
isListening = false;
startButton.innerHTML = `
<svg class="button-icon" viewBox="0 0 24 24" fill="white">
<path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3zm5.91-3c-.49 0-.9.36-.98.85C16.52 14.2 14.47 16 12 16s-4.52-1.8-4.93-4.15c-.08-.49-.49-.85-.98-.85-.61 0-1.09.54-1 1.14.49 3 2.89 5.35 5.91 5.78V20c0 .55.45 1 1 1s1-.45 1-1v-2.08c3.02-.43 5.42-2.78 5.91-5.78.1-.6-.39-1.14-1-1.14z"></path>
</svg>
Start Listening
`;
showSpeakingIndicator(false);
}
// Initialize audio capture
async function initAudio() {
// Request microphone access
mediaStream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: clientSampleRate,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
}
});
// Initialize AudioContext
audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: clientSampleRate
});
// Create audio source from stream
const source = audioContext.createMediaStreamSource(mediaStream);
// Create ScriptProcessor for audio processing
const bufferSize = 4096;
audioProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
// Process audio data
audioProcessor.onaudioprocess = (event) => {
if (!isListening || isAiSpeaking) return;
const input = event.inputBuffer.getChannelData(0);
const audioData = convertFloat32ToInt16(input);
sendAudioChunk(audioData);
};
// Connect the nodes
source.connect(audioProcessor);
audioProcessor.connect(audioContext.destination);
}
// Clean up audio resources
function cleanupAudio() {
if (audioProcessor) {
audioProcessor.disconnect();
audioProcessor = null;
}
if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop());
mediaStream = null;
}
if (audioContext && audioContext.state !== 'closed') {
audioContext.close().catch(error => console.error('Error closing AudioContext:', error));
}
}
// Convert Float32Array to Int16Array for sending to server
function convertFloat32ToInt16(float32Array) {
const int16Array = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
// Convert float [-1.0, 1.0] to int16 [-32768, 32767]
int16Array[i] = Math.max(-32768, Math.min(32767, Math.floor(float32Array[i] * 32768)));
}
return int16Array;
}
// Send audio chunk to server
function sendAudioChunk(audioData) {
if (!isConnected || !isListening) return;
// Convert to base64 for transmission
const base64Audio = arrayBufferToBase64(audioData.buffer);
// Send via Socket.IO
socket.emit('audio_stream', { audio: base64Audio });
}
// Play audio chunk received from server
function playAudioChunk(base64Audio, isLast) {
const audioData = base64ToArrayBuffer(base64Audio);
// Add to queue
audioQueue.push({
data: audioData,
isLast: isLast
});
// Start playing if not already playing
if (!isPlaying) {
playNextAudioChunk();
}
}
// Play the next audio chunk in the queue
function playNextAudioChunk() {
if (audioQueue.length === 0) {
isPlaying = false;
return;
}
isPlaying = true;
const chunk = audioQueue.shift();
try {
// Create audio context if needed
if (!audioContext || audioContext.state === 'closed') {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
}
// Resume audio context if suspended
if (audioContext.state === 'suspended') {
audioContext.resume();
}
// Decode the WAV data
audioContext.decodeAudioData(chunk.data, (buffer) => {
const source = audioContext.createBufferSource();
source.buffer = buffer;
source.connect(audioContext.destination);
// When playback ends, play the next chunk
source.onended = () => {
playNextAudioChunk();
};
source.start(0);
// If it's the last chunk, update UI
if (chunk.isLast) {
setTimeout(() => {
isAiSpeaking = false;
interruptButton.disabled = true;
}, buffer.duration * 1000);
}
}, (error) => {
console.error('Error decoding audio data:', error);
playNextAudioChunk(); // Skip this chunk and try the next
});
} catch (error) {
console.error('Error playing audio chunk:', error);
playNextAudioChunk(); // Try the next chunk
}
}
// Clear the audio queue (used when interrupting)
function clearAudioQueue() {
audioQueue.length = 0;
isPlaying = false;
// Stop any currently playing audio
if (audioContext) {
audioContext.suspend();
}
}
// Send interrupt signal to server
function interruptAI() {
if (!isConnected || !isAiSpeaking) return;
socket.emit('interrupt_ai');
clearAudioQueue();
}
// Convert ArrayBuffer to Base64 string
function arrayBufferToBase64(buffer) {
const binary = new Uint8Array(buffer);
let base64 = '';
const len = binary.byteLength;
for (let i = 0; i < len; i++) {
base64 += String.fromCharCode(binary[i]);
}
return window.btoa(base64);
}
// Convert Base64 string to ArrayBuffer
function base64ToArrayBuffer(base64) {
const binaryString = window.atob(base64);
const len = binaryString.length;
const bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
// Add user message to conversation
function addUserMessage(text) {
const messageDiv = document.createElement('div');
messageDiv.className = 'message user-message';
messageDiv.textContent = text;
conversationDiv.appendChild(messageDiv);
conversationDiv.scrollTop = conversationDiv.scrollHeight;
}
// Add AI message to conversation
function addAIMessage(text) {
const messageDiv = document.createElement('div');
messageDiv.className = 'message ai-message';
messageDiv.textContent = text;
conversationDiv.appendChild(messageDiv);
conversationDiv.scrollTop = conversationDiv.scrollHeight;
}
// Add info message to conversation
function addInfoMessage(text) {
const messageDiv = document.createElement('div');
messageDiv.className = 'info-message';
messageDiv.textContent = text;
conversationDiv.appendChild(messageDiv);
conversationDiv.scrollTop = conversationDiv.scrollHeight;
}
// Show/hide speaking indicator
function showSpeakingIndicator(show) {
micAnimation.style.display = show ? 'flex' : 'none';
}
// Show loading indicator
function showLoadingIndicator(text) {
loadingText.textContent = text || 'Processing...';
loadingDiv.style.display = 'block';
}
// Hide loading indicator
function hideLoadingIndicator() {
loadingDiv.style.display = 'none';
}
});
</script>
</body>
</html>