transcript

2025-01-25 18:40:30 -06:00
parent 9f8fbccdfe
commit fe8e1faed8
6 changed files with 177 additions and 93 deletions
--- a/next.config.ts
+++ b/next.config.ts
@@ -1,6 +1,11 @@
-import type { NextConfig } from "next";
+import { NextConfig } from 'next';
 const nextConfig: NextConfig = {
  experimental: {
    serverActions: {
      bodySizeLimit: '10mb', // Adjust the size limit as needed
    },
  },
 };
 export default nextConfig;
--- a/package.json
+++ b/package.json
@@ -25,7 +25,9 @@
    "axios": "^1.7.9",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
-    "date-fns": "^4.1.0",
+    "date-fns": "^2.28.0",
    "dotenv": "^16.4.7",
    "formidable": "^3.5.2",
    "hoyahax-2025": "file:",
    "https-localhost": "^4.7.1",
    "lucide-react": "^0.474.0",
@@ -33,9 +35,10 @@
    "multer": "1.4.5-lts.1",
    "next": "15.1.6",
    "next-themes": "^0.4.4",
    "openai": "^4.80.1",
    "openai-whisper": "^1.0.2",
    "react": "^19.0.0",
-    "react-day-picker": "8.10.1",
+    "react-day-picker": "^9.5.0",
    "react-dom": "^19.0.0",
    "recharts": "^2.15.0",
    "svix": "^1.45.1",
@@ -44,6 +47,7 @@
  },
  "devDependencies": {
    "@eslint/eslintrc": "^3.2.0",
    "@types/formidable": "^3.4.5",
    "@types/multer": "^1.4.12",
    "@types/node": "^20.17.16",
    "@types/react": "^19.0.8",
--- a/src/app/(web)/transcribe/page.tsx
+++ b/src/app/(web)/transcribe/page.tsx
@@ -16,24 +16,17 @@ const AudioTranscriber: React.FC = () => {
  const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
    if (event.target.files && event.target.files.length > 0) {
      setFile(event.target.files[0]);
      console.log("File selected:", event.target.files[0].name);
    }
  };
-  // Handle file transcription
+  // Send the file to the backend for transcription
  const handleTranscription = async (audioFile: File) => {
    if (!audioFile) {
      alert("No audio file to transcribe!");
      return;
    }
    console.log("Starting transcription for:", audioFile.name);
    const formData = new FormData();
    formData.append("file", audioFile);
    setLoading(true);
-    setError(null); // Clear previous errors
+    setError(null);
    try {
      const response = await axios.post("http://localhost:8000/transcribe", formData, {
        headers: {
@@ -41,34 +34,30 @@ const AudioTranscriber: React.FC = () => {
        },
      });
-      console.log("Transcription response:", response.data);
+      // Handle missing transcription property in the response
      if (response.data && response.data.transcription) {
        setTranscription(response.data.transcription);
      } else {
-        setError("Unexpected response format. Check backend API.");
+        setError("No transcription available.");
        console.error("Invalid response format:", response.data);
      }
    } catch (error) {
-      console.error("Error transcribing audio:", error);
+      console.error("Error during transcription:", error);
      setError("Failed to transcribe audio. Please try again.");
    } finally {
      setLoading(false);
    }
  };
  // Start recording audio
  const startRecording = async () => {
    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      console.log("Microphone access granted.");
      mediaRecorderRef.current = new MediaRecorder(stream);
-      audioChunksRef.current = []; // Reset audio chunks
+      audioChunksRef.current = [];
      mediaRecorderRef.current.ondataavailable = (event) => {
        if (event.data.size > 0) {
          console.log("Audio chunk received:", event.data);
          audioChunksRef.current.push(event.data);
        }
      };
@@ -77,15 +66,11 @@ const AudioTranscriber: React.FC = () => {
        const audioBlob = new Blob(audioChunksRef.current, { type: "audio/mp3" });
        const audioFile = new File([audioBlob], "recording.mp3", { type: "audio/mp3" });
        console.log("Recording stopped. Blob created:", audioBlob);
        setFile(audioFile); // Save the recorded file
        setTranscription("Processing transcription for recorded audio...");
        await handleTranscription(audioFile); // Automatically transcribe
      };
      mediaRecorderRef.current.start();
      console.log("Recording started.");
      setRecording(true);
    } catch (error) {
      console.error("Error starting recording:", error);
@@ -96,7 +81,6 @@ const AudioTranscriber: React.FC = () => {
  // Stop recording audio
  const stopRecording = () => {
    if (mediaRecorderRef.current) {
      console.log("Stopping recording...");
      mediaRecorderRef.current.stop();
      setRecording(false);
    }
@@ -104,17 +88,19 @@ const AudioTranscriber: React.FC = () => {
  return (
    <div>
-      <h1>Audio Transcription</h1>
+      <h1>Audio Transcription Tool</h1>
      <div>
        <h2>Upload or Record Audio</h2>
        {/* File Upload */}
        <input type="file" accept="audio/*" onChange={handleFileChange} />
-        <button onClick={() => file && handleTranscription(file)} disabled={loading || !file}>
+        <button
          onClick={() => file && handleTranscription(file)}
          disabled={loading || !file}
        >
          {loading ? "Transcribing..." : "Transcribe"}
        </button>
      </div>
      {/* Recording Controls */}
      <div>
        <h2>Record Audio</h2>
        {!recording ? (
@@ -126,19 +112,13 @@ const AudioTranscriber: React.FC = () => {
        )}
      </div>
-      {/* Transcription Result */}
+      {transcription && (
        <div>
          <h2>Transcription:</h2>
        {loading ? (
          <p>Processing transcription...</p>
        ) : transcription ? (
          <p>{transcription}</p>
        ) : (
          <p>No transcription available yet.</p>
        )}
        </div>
      )}
      {/* Error Message */}
      {error && (
        <div style={{ color: "red" }}>
          <strong>Error:</strong> {error}
--- a/src/app/api/transcribe/app.py
+++ b/src/app/api/transcribe/app.py
@@ -1,49 +0,0 @@
 from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 import whisper
 import os
 import tempfile
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 app = FastAPI()
 model = whisper.load_model("turbo")  # Load the model once for efficiency
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Frontend origin (adjust as needed)
    allow_credentials=True,
    allow_methods=["*"],  # Allow all HTTP methods (GET, POST, etc.)
    allow_headers=["*"],  # Allow all headers (Authorization, Content-Type, etc.)
 )
@app.post("/transcribe")
 async def transcribe_audio(file: UploadFile = File(...)):
    # Check the file extension
    file_extension = file.filename.split('.')[-1].lower()
    if file_extension not in ["mp3", "wav", "flac", "m4a"]:
        raise HTTPException(status_code=400, detail="Invalid audio file format. Only mp3, wav, flac, or m4a are supported.")
    try:
        # Save the uploaded file to a temporary location
        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as temp_file:
            temp_file.write(await file.read())
            temp_path = temp_file.name
        logging.info(f"Audio file saved at: {temp_path}")
        # Transcribe the audio using Whisper
        result = model.transcribe(temp_path)
        transcription = result["text"]
        # Clean up temporary file
        os.remove(temp_path)
        logging.info(f"Temporary file {temp_path} removed after transcription.")
        return {"transcription": transcription}
    except Exception as e:
        logging.error(f"Error during transcription: {e}")
        raise HTTPException(status_code=500, detail="Internal server error during transcription.")
--- a/src/app/api/transcribe/app.ts
+++ b/src/app/api/transcribe/app.ts
@@ -0,0 +1,128 @@
 import type { NextApiRequest, NextApiResponse } from "next";
 import { createReadStream, unlinkSync } from "fs";
 import path from "path";
 import { IncomingMessage } from "http";
 import { config } from "dotenv";
 import formidable, { File } from "formidable";
 import { AxiosError } from "axios";
 import { OpenAI } from 'openai';
 // Load environment variables
 config();
 const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
 if (!OPENAI_API_KEY) {
  throw new Error("OpenAI API key is missing. Set OPENAI_API_KEY in your .env.local file.");
 }
 // Initialize OpenAI client
 const openaiClient = new OpenAI({
  apiKey: OPENAI_API_KEY,
 });
 export const apiconfig = {
  api: {
    bodyParser: false, // Disable default body parsing
  },
 };
 // Helper to parse multipart form data
 async function parseMultipartForm(req: IncomingMessage): Promise<{ filePath: string; originalFilename: string }> {
  const form = formidable({
    multiples: false, // Single file upload
    uploadDir: "/tmp", // Temporary directory
    keepExtensions: true,
    maxFileSize: 50 * 1024 * 1024, // 50 MB
  });
  return new Promise((resolve, reject) => {
    form.parse(req, (err, fields, files) => {
      if (err) {
        reject(err);
        return;
      }
      const file = files.file as File | undefined;
      if (!file) {
        reject(new Error("No file found in the upload."));
        return;
      }
      resolve({
        filePath: file.filepath,
        originalFilename: file.originalFilename || "unknown",
      });
    });
  });
 }
 // Main handler
 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
  if (req.method !== "POST") {
    return res.status(405).json({ error: "Method not allowed. Use POST." });
  }
  let filePath: string | null = null;
  try {
    // Parse file upload
    const { filePath: tempFilePath, originalFilename } = await parseMultipartForm(req);
    filePath = tempFilePath;
    // Log file details
    console.log("Uploaded file path:", filePath);
    console.log("Original filename:", originalFilename);
    // Validate file extension
    const allowedExtensions = ["mp3", "wav", "m4a"];
    const fileExtension = path.extname(originalFilename).toLowerCase().replace(".", "");
    if (!allowedExtensions.includes(fileExtension)) {
      unlinkSync(filePath);
      return res.status(400).json({
        error: `Invalid file format. Only ${allowedExtensions.join(", ")} are supported.`,
      });
    }
    // Create file stream
    const audioFile = createReadStream(filePath);
    console.log("File stream created for:", audioFile.path);
    // Send to OpenAI Whisper API
    console.log("Sending file to OpenAI Whisper...");
    const response = await (openaiClient as any).createCompletion({
      model: "whisper-1",
      file: audioFile,
    });
    console.log("OpenAI response:", response.data);
    // Clean up temporary file
    unlinkSync(filePath);
    // Send response back to client
    return res.status(200).json({ transcription: response.data.text });
  } catch (error) {
    console.error("Error during transcription:", error);
    if (error instanceof AxiosError) {
      console.error("OpenAI API error:", error.response?.data || error.message);
      return res.status(error.response?.status || 500).json({
        error: error.response?.data.error?.message || "OpenAI API Error.",
      });
    }
    return res.status(500).json({
      error: "An unexpected error occurred.",
    });
  } finally {
    if (filePath) {
      try {
        unlinkSync(filePath);
      } catch (err) {
        console.error("Failed to clean up temporary file:", err);
      }
    }
  }
 }
--- a/vercel.json
+++ b/vercel.json
@@ -0,0 +1,16 @@
 {
    "version": 2,
    "builds": [
      {
        "src": "api/index.py",
        "use": "@vercel/python"
      }
    ],
    "routes": [
      {
        "src": "/(.*)",
        "dest": "api/index.py"
      }
    ]
  }