Audio recording

This commit is contained in:
suraj.shenoy.b@gmail.com
2025-01-25 11:31:54 -06:00
parent eb2816b4e5
commit 163444e3ae
3 changed files with 63 additions and 18 deletions

View File

@@ -8,6 +8,7 @@ const AudioTranscriber: React.FC = () => {
const [transcription, setTranscription] = useState<string | null>(null); const [transcription, setTranscription] = useState<string | null>(null);
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
const [recording, setRecording] = useState(false); const [recording, setRecording] = useState(false);
const [error, setError] = useState<string | null>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null); const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioChunksRef = useRef<Blob[]>([]); const audioChunksRef = useRef<Blob[]>([]);
@@ -15,27 +16,42 @@ const AudioTranscriber: React.FC = () => {
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => { const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
if (event.target.files && event.target.files.length > 0) { if (event.target.files && event.target.files.length > 0) {
setFile(event.target.files[0]); setFile(event.target.files[0]);
console.log("File selected:", event.target.files[0].name);
} }
}; };
// Handle file transcription // Handle file transcription
const handleTranscription = async (audioFile: File) => { const handleTranscription = async (audioFile: File) => {
if (!audioFile) return alert("No audio file to transcribe!"); if (!audioFile) {
alert("No audio file to transcribe!");
return;
}
console.log("Starting transcription for:", audioFile.name);
const formData = new FormData(); const formData = new FormData();
formData.append("file", audioFile); formData.append("file", audioFile);
setLoading(true); setLoading(true);
setError(null); // Clear previous errors
try { try {
const response = await axios.post("http://localhost:8000/transcribe", formData, { const response = await axios.post("http://localhost:8000/transcribe", formData, {
headers: { headers: {
"Content-Type": "multipart/form-data", "Content-Type": "multipart/form-data",
}, },
}); });
setTranscription(response.data.transcription);
console.log("Transcription response:", response.data);
if (response.data && response.data.transcription) {
setTranscription(response.data.transcription);
} else {
setError("Unexpected response format. Check backend API.");
console.error("Invalid response format:", response.data);
}
} catch (error) { } catch (error) {
console.error("Error transcribing audio:", error); console.error("Error transcribing audio:", error);
alert("Failed to transcribe audio. Please try again."); setError("Failed to transcribe audio. Please try again.");
} finally { } finally {
setLoading(false); setLoading(false);
} }
@@ -45,11 +61,14 @@ const AudioTranscriber: React.FC = () => {
const startRecording = async () => { const startRecording = async () => {
try { try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorderRef.current = new MediaRecorder(stream); console.log("Microphone access granted.");
mediaRecorderRef.current = new MediaRecorder(stream);
audioChunksRef.current = []; // Reset audio chunks audioChunksRef.current = []; // Reset audio chunks
mediaRecorderRef.current.ondataavailable = (event) => { mediaRecorderRef.current.ondataavailable = (event) => {
if (event.data.size > 0) { if (event.data.size > 0) {
console.log("Audio chunk received:", event.data);
audioChunksRef.current.push(event.data); audioChunksRef.current.push(event.data);
} }
}; };
@@ -57,24 +76,27 @@ const AudioTranscriber: React.FC = () => {
mediaRecorderRef.current.onstop = async () => { mediaRecorderRef.current.onstop = async () => {
const audioBlob = new Blob(audioChunksRef.current, { type: "audio/mp3" }); const audioBlob = new Blob(audioChunksRef.current, { type: "audio/mp3" });
const audioFile = new File([audioBlob], "recording.mp3", { type: "audio/mp3" }); const audioFile = new File([audioBlob], "recording.mp3", { type: "audio/mp3" });
setFile(audioFile); // Save the recorded file
// Transcribe the recorded audio console.log("Recording stopped. Blob created:", audioBlob);
setTranscription("Transcribing the recorded audio...");
await handleTranscription(audioFile); setFile(audioFile); // Save the recorded file
setTranscription("Processing transcription for recorded audio...");
await handleTranscription(audioFile); // Automatically transcribe
}; };
mediaRecorderRef.current.start(); mediaRecorderRef.current.start();
console.log("Recording started.");
setRecording(true); setRecording(true);
} catch (error) { } catch (error) {
console.error("Error starting recording:", error); console.error("Error starting recording:", error);
alert("Failed to start recording. Please check microphone permissions."); setError("Failed to start recording. Please check microphone permissions.");
} }
}; };
// Stop recording audio // Stop recording audio
const stopRecording = () => { const stopRecording = () => {
if (mediaRecorderRef.current) { if (mediaRecorderRef.current) {
console.log("Stopping recording...");
mediaRecorderRef.current.stop(); mediaRecorderRef.current.stop();
setRecording(false); setRecording(false);
} }
@@ -115,6 +137,13 @@ const AudioTranscriber: React.FC = () => {
<p>No transcription available yet.</p> <p>No transcription available yet.</p>
)} )}
</div> </div>
{/* Error Message */}
{error && (
<div style={{ color: "red" }}>
<strong>Error:</strong> {error}
</div>
)}
</div> </div>
); );
}; };

View File

@@ -1,15 +1,19 @@
from fastapi import FastAPI, File, UploadFile from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
import whisper import whisper
import os import os
import tempfile import tempfile
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
app = FastAPI() app = FastAPI()
model = whisper.load_model("base") # Load the model once for efficiency model = whisper.load_model("base") # Load the model once for efficiency
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=["http://localhost:3000"], # Frontend origin (adjust as needed) allow_origins=["*"], # Frontend origin (adjust as needed)
allow_credentials=True, allow_credentials=True,
allow_methods=["*"], # Allow all HTTP methods (GET, POST, etc.) allow_methods=["*"], # Allow all HTTP methods (GET, POST, etc.)
allow_headers=["*"], # Allow all headers (Authorization, Content-Type, etc.) allow_headers=["*"], # Allow all headers (Authorization, Content-Type, etc.)
@@ -17,17 +21,29 @@ app.add_middleware(
@app.post("/transcribe") @app.post("/transcribe")
async def transcribe_audio(file: UploadFile = File(...)): async def transcribe_audio(file: UploadFile = File(...)):
# Save the uploaded file to a temporary location # Check the file extension
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file: file_extension = file.filename.split('.')[-1].lower()
temp_file.write(await file.read()) if file_extension not in ["mp3", "wav", "flac", "m4a"]:
temp_path = temp_file.name raise HTTPException(status_code=400, detail="Invalid audio file format. Only mp3, wav, flac, or m4a are supported.")
try: try:
# Transcribe the audio # Save the uploaded file to a temporary location
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as temp_file:
temp_file.write(await file.read())
temp_path = temp_file.name
logging.info(f"Audio file saved at: {temp_path}")
# Transcribe the audio using Whisper
result = model.transcribe(temp_path) result = model.transcribe(temp_path)
transcription = result["text"] transcription = result["text"]
finally:
# Clean up temporary file # Clean up temporary file
os.remove(temp_path) os.remove(temp_path)
logging.info(f"Temporary file {temp_path} removed after transcription.")
return {"transcription": transcription} return {"transcription": transcription}
except Exception as e:
logging.error(f"Error during transcription: {e}")
raise HTTPException(status_code=500, detail="Internal server error during transcription.")