From 163444e3aedb28b6b0be6be66131a86c93ce55b5 Mon Sep 17 00:00:00 2001 From: "suraj.shenoy.b@gmail.com" Date: Sat, 25 Jan 2025 11:31:54 -0600 Subject: [PATCH] Audio recording --- src/app/(web)/transcribe/page.tsx | 47 ++++++++++++++---- .../transcribe/__pycache__/app.cpython-39.pyc | Bin 1005 -> 1573 bytes src/app/api/transcribe/app.py | 34 +++++++++---- 3 files changed, 63 insertions(+), 18 deletions(-) diff --git a/src/app/(web)/transcribe/page.tsx b/src/app/(web)/transcribe/page.tsx index 74017c5..7a8571b 100644 --- a/src/app/(web)/transcribe/page.tsx +++ b/src/app/(web)/transcribe/page.tsx @@ -8,6 +8,7 @@ const AudioTranscriber: React.FC = () => { const [transcription, setTranscription] = useState(null); const [loading, setLoading] = useState(false); const [recording, setRecording] = useState(false); + const [error, setError] = useState(null); const mediaRecorderRef = useRef(null); const audioChunksRef = useRef([]); @@ -15,27 +16,42 @@ const AudioTranscriber: React.FC = () => { const handleFileChange = (event: React.ChangeEvent) => { if (event.target.files && event.target.files.length > 0) { setFile(event.target.files[0]); + console.log("File selected:", event.target.files[0].name); } }; // Handle file transcription const handleTranscription = async (audioFile: File) => { - if (!audioFile) return alert("No audio file to transcribe!"); + if (!audioFile) { + alert("No audio file to transcribe!"); + return; + } + + console.log("Starting transcription for:", audioFile.name); const formData = new FormData(); formData.append("file", audioFile); setLoading(true); + setError(null); // Clear previous errors try { const response = await axios.post("http://localhost:8000/transcribe", formData, { headers: { "Content-Type": "multipart/form-data", }, }); - setTranscription(response.data.transcription); + + console.log("Transcription response:", response.data); + + if (response.data && response.data.transcription) { + setTranscription(response.data.transcription); + } else { + setError("Unexpected response format. Check backend API."); + console.error("Invalid response format:", response.data); + } } catch (error) { console.error("Error transcribing audio:", error); - alert("Failed to transcribe audio. Please try again."); + setError("Failed to transcribe audio. Please try again."); } finally { setLoading(false); } @@ -45,11 +61,14 @@ const AudioTranscriber: React.FC = () => { const startRecording = async () => { try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - mediaRecorderRef.current = new MediaRecorder(stream); + console.log("Microphone access granted."); + mediaRecorderRef.current = new MediaRecorder(stream); audioChunksRef.current = []; // Reset audio chunks + mediaRecorderRef.current.ondataavailable = (event) => { if (event.data.size > 0) { + console.log("Audio chunk received:", event.data); audioChunksRef.current.push(event.data); } }; @@ -57,24 +76,27 @@ const AudioTranscriber: React.FC = () => { mediaRecorderRef.current.onstop = async () => { const audioBlob = new Blob(audioChunksRef.current, { type: "audio/mp3" }); const audioFile = new File([audioBlob], "recording.mp3", { type: "audio/mp3" }); - setFile(audioFile); // Save the recorded file - // Transcribe the recorded audio - setTranscription("Transcribing the recorded audio..."); - await handleTranscription(audioFile); + console.log("Recording stopped. Blob created:", audioBlob); + + setFile(audioFile); // Save the recorded file + setTranscription("Processing transcription for recorded audio..."); + await handleTranscription(audioFile); // Automatically transcribe }; mediaRecorderRef.current.start(); + console.log("Recording started."); setRecording(true); } catch (error) { console.error("Error starting recording:", error); - alert("Failed to start recording. Please check microphone permissions."); + setError("Failed to start recording. Please check microphone permissions."); } }; // Stop recording audio const stopRecording = () => { if (mediaRecorderRef.current) { + console.log("Stopping recording..."); mediaRecorderRef.current.stop(); setRecording(false); } @@ -115,6 +137,13 @@ const AudioTranscriber: React.FC = () => {

No transcription available yet.

)} + + {/* Error Message */} + {error && ( +
+ Error: {error} +
+ )} ); }; diff --git a/src/app/api/transcribe/__pycache__/app.cpython-39.pyc b/src/app/api/transcribe/__pycache__/app.cpython-39.pyc index 9002252327ecc518a5bff6611661b0e51222ec2a..331039037df7f1a4aec1afb3585dabde0eed6e65 100644 GIT binary patch literal 1573 zcmZ`(OK)366rQ;++t-gcp)C|z=?aRdOJqWUO@t6th;bGq5lIzwWXYO%#?FoI!czVEn>wsv15lg*x0SejXmOJV>`-`9N6qAPx9HnKnmI4 zCqDO2i_lBVxx^T8e1S-r=Vle=&M{fx<pi({A-_cGZjJWon_M?=tY+P<09`0|Sb$DNe zX;Q(cfsUFe8?m198<|PesoxKNSQ`aI%=mP?1hOWs|&RwIOB;5 zX(X>bVy;e7wp33zWn9Q{@gpT@B0C~H=5<)Y=|W;1Z1TNohHA4JzeAV6B$}X8G{X$f zF|+QXnKegKJi*K!V)0Pf%waC`F0JqJ_-Eyux(Y=a&5KOy#r4vEu^Xb1F9Y4{w-8gK6;LmGuFpaT}B zfuT3(rXr@Q9_%I2c>s~?0Yt6`CUQMUMF7D8YjTxhhfAO*Pa}7yW!dR_LG~1Bpq^CS#1W|QT$AeR}Og$kW`#6G8x|w1dr3qtE;#O zu1`aD3hTQ3nm)d_Ed=Dp1|m#)ulsHW;a{fS4|fv4NoW*EE{1@}|2L#wTh)0(NJ3+- zUD=O91!p`RL5K^dP3IM4YaI02F1WB|@(p8iMUBofhRw}i(C|MVl*G;=wmbIKPC!JKxnr+~VClW3rDVRLk#&|XCQ*~ls&vo_2)XvV%9YB`>2xQ;Fd0etA>kht!A6Qk}$9d>$mTy&Y6^r-5r}hTP zb>X!EaAAQiY$F^a47cRXtI2x2d}8XIR7l~O+1%(PlzkxOz6rQ(#M>Zr5uTo~vy~>@ z5J=wHZR}|q@+H1`$J!a);+_GhPzL%EnmGMbsw}aLrB}oQgPZ>xVh0ib!n{24T@Y{K X5)_$zcE$3rZ#me)i|+j6<@5MIc;vw^ delta 607 zcmY*VOKTKC5bo;kdF;5mu6Y?>Q9+Q|gISGUf&@fZ%|(PL!3GIqx;Hz^%b`a7`)FOJb3Z!$%{Y0`}_kB{s*h$$u8>a`l_po`sz#bkJpY`t$^V9b>Z{D4nJ*Q z9eqG1iG+ltARwiJrc5xfIhyd)5zbl65?8pGV~Hocv)dQGf@2~Y%B=#)=OE6h#;BJg3e*a3MTm_GUL0% zc$IH+_@im#$Nxx=NT=X2pgi|kf6DKq*+xifSE@uARlE7&U=SbG+^8c{w~UU)`M!=b z6HmrrqjdSb@(Ew_LPz_kp!~?M`%MhK61`G>=dT|uSaBT$!aA@7w*Wa?LF8a?iQewd)by+hLq%O6x`Y&_bwLlw~5Y``$|@IHg`Fe>j)#+je^8 cf4FGt{fAg~n5Zp4maic!K@)syWyf9q3!~77%m4rY diff --git a/src/app/api/transcribe/app.py b/src/app/api/transcribe/app.py index d039ad1..fa6e596 100644 --- a/src/app/api/transcribe/app.py +++ b/src/app/api/transcribe/app.py @@ -1,15 +1,19 @@ -from fastapi import FastAPI, File, UploadFile +from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.middleware.cors import CORSMiddleware import whisper import os import tempfile +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO) app = FastAPI() model = whisper.load_model("base") # Load the model once for efficiency app.add_middleware( CORSMiddleware, - allow_origins=["http://localhost:3000"], # Frontend origin (adjust as needed) + allow_origins=["*"], # Frontend origin (adjust as needed) allow_credentials=True, allow_methods=["*"], # Allow all HTTP methods (GET, POST, etc.) allow_headers=["*"], # Allow all headers (Authorization, Content-Type, etc.) @@ -17,17 +21,29 @@ app.add_middleware( @app.post("/transcribe") async def transcribe_audio(file: UploadFile = File(...)): - # Save the uploaded file to a temporary location - with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file: - temp_file.write(await file.read()) - temp_path = temp_file.name + # Check the file extension + file_extension = file.filename.split('.')[-1].lower() + if file_extension not in ["mp3", "wav", "flac", "m4a"]: + raise HTTPException(status_code=400, detail="Invalid audio file format. Only mp3, wav, flac, or m4a are supported.") try: - # Transcribe the audio + # Save the uploaded file to a temporary location + with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as temp_file: + temp_file.write(await file.read()) + temp_path = temp_file.name + + logging.info(f"Audio file saved at: {temp_path}") + + # Transcribe the audio using Whisper result = model.transcribe(temp_path) transcription = result["text"] - finally: + # Clean up temporary file os.remove(temp_path) + logging.info(f"Temporary file {temp_path} removed after transcription.") - return {"transcription": transcription} + return {"transcription": transcription} + + except Exception as e: + logging.error(f"Error during transcription: {e}") + raise HTTPException(status_code=500, detail="Internal server error during transcription.")