From fe8e1faed86ec0a44795a9075af6782ea1f93a10 Mon Sep 17 00:00:00 2001 From: "suraj.shenoy.b@gmail.com" Date: Sat, 25 Jan 2025 18:40:30 -0600 Subject: [PATCH] transcript --- next.config.ts | 7 +- package.json | 8 +- src/app/(web)/transcribe/page.tsx | 62 +++++---------- src/app/api/transcribe/app.py | 49 ------------ src/app/api/transcribe/app.ts | 128 ++++++++++++++++++++++++++++++ vercel.json | 16 ++++ 6 files changed, 177 insertions(+), 93 deletions(-) delete mode 100644 src/app/api/transcribe/app.py create mode 100644 src/app/api/transcribe/app.ts create mode 100644 vercel.json diff --git a/next.config.ts b/next.config.ts index db0a372..b5a7d3b 100644 --- a/next.config.ts +++ b/next.config.ts @@ -1,6 +1,11 @@ -import type { NextConfig } from "next"; +import { NextConfig } from 'next'; const nextConfig: NextConfig = { + experimental: { + serverActions: { + bodySizeLimit: '10mb', // Adjust the size limit as needed + }, + }, }; export default nextConfig; diff --git a/package.json b/package.json index 9f9df44..330e87f 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,9 @@ "axios": "^1.7.9", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "date-fns": "^4.1.0", + "date-fns": "^2.28.0", + "dotenv": "^16.4.7", + "formidable": "^3.5.2", "hoyahax-2025": "file:", "https-localhost": "^4.7.1", "lucide-react": "^0.474.0", @@ -33,9 +35,10 @@ "multer": "1.4.5-lts.1", "next": "15.1.6", "next-themes": "^0.4.4", + "openai": "^4.80.1", "openai-whisper": "^1.0.2", "react": "^19.0.0", - "react-day-picker": "8.10.1", + "react-day-picker": "^9.5.0", "react-dom": "^19.0.0", "recharts": "^2.15.0", "svix": "^1.45.1", @@ -44,6 +47,7 @@ }, "devDependencies": { "@eslint/eslintrc": "^3.2.0", + "@types/formidable": "^3.4.5", "@types/multer": "^1.4.12", "@types/node": "^20.17.16", "@types/react": "^19.0.8", diff --git a/src/app/(web)/transcribe/page.tsx b/src/app/(web)/transcribe/page.tsx index 7a8571b..d4512b9 100644 --- a/src/app/(web)/transcribe/page.tsx +++ b/src/app/(web)/transcribe/page.tsx @@ -16,59 +16,48 @@ const AudioTranscriber: React.FC = () => { const handleFileChange = (event: React.ChangeEvent) => { if (event.target.files && event.target.files.length > 0) { setFile(event.target.files[0]); - console.log("File selected:", event.target.files[0].name); } }; - // Handle file transcription + // Send the file to the backend for transcription const handleTranscription = async (audioFile: File) => { - if (!audioFile) { - alert("No audio file to transcribe!"); - return; - } - - console.log("Starting transcription for:", audioFile.name); - const formData = new FormData(); formData.append("file", audioFile); - + setLoading(true); - setError(null); // Clear previous errors + setError(null); + try { const response = await axios.post("http://localhost:8000/transcribe", formData, { headers: { "Content-Type": "multipart/form-data", }, }); - - console.log("Transcription response:", response.data); - + + // Handle missing transcription property in the response if (response.data && response.data.transcription) { setTranscription(response.data.transcription); } else { - setError("Unexpected response format. Check backend API."); - console.error("Invalid response format:", response.data); + setError("No transcription available."); } } catch (error) { - console.error("Error transcribing audio:", error); + console.error("Error during transcription:", error); setError("Failed to transcribe audio. Please try again."); } finally { setLoading(false); } }; + // Start recording audio const startRecording = async () => { try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - console.log("Microphone access granted."); - mediaRecorderRef.current = new MediaRecorder(stream); - audioChunksRef.current = []; // Reset audio chunks + audioChunksRef.current = []; mediaRecorderRef.current.ondataavailable = (event) => { if (event.data.size > 0) { - console.log("Audio chunk received:", event.data); audioChunksRef.current.push(event.data); } }; @@ -77,15 +66,11 @@ const AudioTranscriber: React.FC = () => { const audioBlob = new Blob(audioChunksRef.current, { type: "audio/mp3" }); const audioFile = new File([audioBlob], "recording.mp3", { type: "audio/mp3" }); - console.log("Recording stopped. Blob created:", audioBlob); - setFile(audioFile); // Save the recorded file - setTranscription("Processing transcription for recorded audio..."); await handleTranscription(audioFile); // Automatically transcribe }; mediaRecorderRef.current.start(); - console.log("Recording started."); setRecording(true); } catch (error) { console.error("Error starting recording:", error); @@ -96,7 +81,6 @@ const AudioTranscriber: React.FC = () => { // Stop recording audio const stopRecording = () => { if (mediaRecorderRef.current) { - console.log("Stopping recording..."); mediaRecorderRef.current.stop(); setRecording(false); } @@ -104,17 +88,19 @@ const AudioTranscriber: React.FC = () => { return (
-

Audio Transcription

+

Audio Transcription Tool

+

Upload or Record Audio

- {/* File Upload */} -
- {/* Recording Controls */}

Record Audio

{!recording ? ( @@ -126,19 +112,13 @@ const AudioTranscriber: React.FC = () => { )}
- {/* Transcription Result */} -
-

Transcription:

- {loading ? ( -

Processing transcription...

- ) : transcription ? ( + {transcription && ( +
+

Transcription:

{transcription}

- ) : ( -

No transcription available yet.

- )} -
+
+ )} - {/* Error Message */} {error && (
Error: {error} diff --git a/src/app/api/transcribe/app.py b/src/app/api/transcribe/app.py deleted file mode 100644 index 1c11e65..0000000 --- a/src/app/api/transcribe/app.py +++ /dev/null @@ -1,49 +0,0 @@ -from fastapi import FastAPI, File, UploadFile, HTTPException -from fastapi.middleware.cors import CORSMiddleware -import whisper -import os -import tempfile -import logging - -# Set up logging -logging.basicConfig(level=logging.INFO) - -app = FastAPI() -model = whisper.load_model("turbo") # Load the model once for efficiency - -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], # Frontend origin (adjust as needed) - allow_credentials=True, - allow_methods=["*"], # Allow all HTTP methods (GET, POST, etc.) - allow_headers=["*"], # Allow all headers (Authorization, Content-Type, etc.) -) - -@app.post("/transcribe") -async def transcribe_audio(file: UploadFile = File(...)): - # Check the file extension - file_extension = file.filename.split('.')[-1].lower() - if file_extension not in ["mp3", "wav", "flac", "m4a"]: - raise HTTPException(status_code=400, detail="Invalid audio file format. Only mp3, wav, flac, or m4a are supported.") - - try: - # Save the uploaded file to a temporary location - with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as temp_file: - temp_file.write(await file.read()) - temp_path = temp_file.name - - logging.info(f"Audio file saved at: {temp_path}") - - # Transcribe the audio using Whisper - result = model.transcribe(temp_path) - transcription = result["text"] - - # Clean up temporary file - os.remove(temp_path) - logging.info(f"Temporary file {temp_path} removed after transcription.") - - return {"transcription": transcription} - - except Exception as e: - logging.error(f"Error during transcription: {e}") - raise HTTPException(status_code=500, detail="Internal server error during transcription.") diff --git a/src/app/api/transcribe/app.ts b/src/app/api/transcribe/app.ts new file mode 100644 index 0000000..b5b807a --- /dev/null +++ b/src/app/api/transcribe/app.ts @@ -0,0 +1,128 @@ +import type { NextApiRequest, NextApiResponse } from "next"; +import { createReadStream, unlinkSync } from "fs"; +import path from "path"; +import { IncomingMessage } from "http"; +import { config } from "dotenv"; +import formidable, { File } from "formidable"; +import { AxiosError } from "axios"; +import { OpenAI } from 'openai'; + +// Load environment variables +config(); + +const OPENAI_API_KEY = process.env.OPENAI_API_KEY; + +if (!OPENAI_API_KEY) { + throw new Error("OpenAI API key is missing. Set OPENAI_API_KEY in your .env.local file."); +} + +// Initialize OpenAI client +const openaiClient = new OpenAI({ + apiKey: OPENAI_API_KEY, +}); + +export const apiconfig = { + api: { + bodyParser: false, // Disable default body parsing + }, +}; + +// Helper to parse multipart form data +async function parseMultipartForm(req: IncomingMessage): Promise<{ filePath: string; originalFilename: string }> { + const form = formidable({ + multiples: false, // Single file upload + uploadDir: "/tmp", // Temporary directory + keepExtensions: true, + maxFileSize: 50 * 1024 * 1024, // 50 MB + }); + + return new Promise((resolve, reject) => { + form.parse(req, (err, fields, files) => { + if (err) { + reject(err); + return; + } + + const file = files.file as File | undefined; + if (!file) { + reject(new Error("No file found in the upload.")); + return; + } + + resolve({ + filePath: file.filepath, + originalFilename: file.originalFilename || "unknown", + }); + }); + }); +} + +// Main handler +export default async function handler(req: NextApiRequest, res: NextApiResponse) { + if (req.method !== "POST") { + return res.status(405).json({ error: "Method not allowed. Use POST." }); + } + + let filePath: string | null = null; + + try { + // Parse file upload + const { filePath: tempFilePath, originalFilename } = await parseMultipartForm(req); + filePath = tempFilePath; + + // Log file details + console.log("Uploaded file path:", filePath); + console.log("Original filename:", originalFilename); + + // Validate file extension + const allowedExtensions = ["mp3", "wav", "m4a"]; + const fileExtension = path.extname(originalFilename).toLowerCase().replace(".", ""); + if (!allowedExtensions.includes(fileExtension)) { + unlinkSync(filePath); + return res.status(400).json({ + error: `Invalid file format. Only ${allowedExtensions.join(", ")} are supported.`, + }); + } + + // Create file stream + const audioFile = createReadStream(filePath); + console.log("File stream created for:", audioFile.path); + + // Send to OpenAI Whisper API + console.log("Sending file to OpenAI Whisper..."); + const response = await (openaiClient as any).createCompletion({ + model: "whisper-1", + file: audioFile, + }); + + console.log("OpenAI response:", response.data); + + // Clean up temporary file + unlinkSync(filePath); + + // Send response back to client + return res.status(200).json({ transcription: response.data.text }); + } catch (error) { + console.error("Error during transcription:", error); + + if (error instanceof AxiosError) { + console.error("OpenAI API error:", error.response?.data || error.message); + return res.status(error.response?.status || 500).json({ + error: error.response?.data.error?.message || "OpenAI API Error.", + }); + } + + return res.status(500).json({ + error: "An unexpected error occurred.", + }); + } finally { + if (filePath) { + try { + unlinkSync(filePath); + } catch (err) { + console.error("Failed to clean up temporary file:", err); + } + } + } +} + diff --git a/vercel.json b/vercel.json new file mode 100644 index 0000000..f45d933 --- /dev/null +++ b/vercel.json @@ -0,0 +1,16 @@ +{ + "version": 2, + "builds": [ + { + "src": "api/index.py", + "use": "@vercel/python" + } + ], + "routes": [ + { + "src": "/(.*)", + "dest": "api/index.py" + } + ] + } + \ No newline at end of file