UI and Audio Processing Update

2026-01-07 04:09:35 +00:00
parent 864ccabc6e
commit 585830103b
18 changed files with 2069 additions and 481 deletions
--- a/server/app.py
+++ b/server/app.py
@@ -1,11 +1,13 @@
 import os
 import time
-from flask import Flask, request, send_file, jsonify
+import json
+from flask import Flask, request, send_file, jsonify, send_from_directory, Response
 from flask_cors import CORS
 from werkzeug.utils import secure_filename
 from processor import AudioImageProcessor

-app = Flask(__name__)
+# Serve the build folder from the parent directory
+app = Flask(__name__, static_folder='../build', static_url_path='')
 CORS(app) # Allow Svelte to communicate

 # Configuration
@@ -19,10 +21,55 @@ def save_upload(file_obj):
    file_obj.save(path)
    return path

+# --- Frontend Routes ---
+@app.route('/')
+def index():
+    return send_from_directory(app.static_folder, 'index.html')
+
+@app.errorhandler(404)
+def not_found(e):
+    # If the path starts with /api, return actual 404
+    if request.path.startswith('/api/'):
+        return jsonify({"error": "Not found"}), 404
+    # Otherwise return index.html for SPA routing
+    return send_from_directory(app.static_folder, 'index.html')
+
@app.route('/health', methods=['GET'])
 def health():
    return jsonify({"status": "ok", "max_mb": 40})

+# --- Background Cleanup ---
+import threading
+
+def cleanup_task():
+    """Background thread to clean up old files."""
+    expiration_seconds = 600  # 10 minutes
+    while True:
+        try:
+            now = time.time()
+            if os.path.exists(UPLOAD_FOLDER):
+                for filename in os.listdir(UPLOAD_FOLDER):
+                    filepath = os.path.join(UPLOAD_FOLDER, filename)
+                    if os.path.isfile(filepath):
+                        # check creation time
+                        if now - os.path.getctime(filepath) > expiration_seconds:
+                            try:
+                                os.remove(filepath)
+                                print(f"Cleaned up: {filename}")
+                            except Exception as e:
+                                print(f"Error cleaning {filename}: {e}")
+        except Exception as e:
+            print(f"Cleanup Error: {e}")
+        
+        time.sleep(60) # Run every minute
+
+# Start cleanup thread safely
+if os.environ.get('WERKZEUG_RUN_MAIN') == 'true' or not os.environ.get('WERKZEUG_RUN_MAIN'):
+    # Simple check to try and avoid double threads in reloader, though not perfect
+    t = threading.Thread(target=cleanup_task, daemon=True)
+    t.start()
+
+
 # --- Endpoint 1: Create Art (Optional: Embed Audio in it) ---
@app.route('/api/generate-art', methods=['POST'])
 def generate_art():
@@ -32,35 +79,48 @@ def generate_art():
    audio_file = request.files['audio']
    should_embed = request.form.get('embed', 'false').lower() == 'true'
    
+    audio_path = None
+    art_path = None
+    
    try:
        # 1. Save Audio
        audio_path = save_upload(audio_file)
        
        # 2. Generate Art
-        art_path = processor.generate_spectrogram(audio_path)
+        min_pixels = 0
+        if should_embed:
+             # Calculate required pixels: File Bytes * 8 (bits) / 3 (channels)
+             # Add 5% buffer for header and safety
+             file_size = os.path.getsize(audio_path)
+             min_pixels = int((file_size * 8 / 3) * 1.05)
+             
+        art_path = processor.generate_spectrogram(audio_path, min_pixels=min_pixels)
        
        # 3. If Embed requested, run Steganography immediately using the art as host
        final_path = art_path
        if should_embed:
+            # art_path becomes the host, audio_path is the data
            final_path = processor.encode_stego(audio_path, art_path)
            
+            # If we created a new stego image, the pure art_path is intermediate (and audio is input)
+            # We can delete art_path now if it's different (it is)
+            if art_path != final_path:
+                 try: os.remove(art_path)
+                 except: pass
+            
        return send_file(final_path, mimetype='image/png')
        
+    except ValueError as e:
+        return jsonify({"error": str(e)}), 400
    except Exception as e:
        return jsonify({"error": str(e)}), 500
+    finally:
+        # Cleanup Inputs
+        if audio_path and os.path.exists(audio_path):
+            try: os.remove(audio_path)
+            except: pass
+

-# --- Endpoint 2: Format Shift (Audio -> Static) ---
-@app.route('/api/shift', methods=['POST'])
-def shift_format():
-    if 'file' not in request.files:
-        return jsonify({"error": "No file provided"}), 400
-        
-    try:
-        f_path = save_upload(request.files['file'])
-        img_path = processor.encode_shift(f_path)
-        return send_file(img_path, mimetype='image/png')
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500

 # --- Endpoint 3: Steganography (Audio + Custom Host) ---
@app.route('/api/hide', methods=['POST'])
@@ -68,14 +128,25 @@ def hide_data():
    if 'data' not in request.files or 'host' not in request.files:
        return jsonify({"error": "Requires 'data' and 'host' files"}), 400
        
+    data_path = None
+    host_path = None
    try:
        data_path = save_upload(request.files['data'])
        host_path = save_upload(request.files['host'])
        
        stego_path = processor.encode_stego(data_path, host_path)
        return send_file(stego_path, mimetype='image/png')
+    except ValueError as e:
+        return jsonify({"error": str(e)}), 400
    except Exception as e:
        return jsonify({"error": str(e)}), 500
+    finally:
+        if data_path and os.path.exists(data_path):
+            try: os.remove(data_path)
+            except: pass
+        if host_path and os.path.exists(host_path):
+            try: os.remove(host_path)
+            except: pass

 # --- Endpoint 4: Decode (Universal) ---
@app.route('/api/decode', methods=['POST'])
@@ -83,6 +154,7 @@ def decode():
    if 'image' not in request.files:
        return jsonify({"error": "No image provided"}), 400
        
+    img_path = None
    try:
        img_path = save_upload(request.files['image'])
        restored_path = processor.decode_image(img_path)
@@ -90,8 +162,118 @@ def decode():
        # Determine mimetype based on extension for browser friendliness
        filename = os.path.basename(restored_path)
        return send_file(restored_path, as_attachment=True, download_name=filename)
+    except ValueError as e:
+        return jsonify({"error": str(e)}), 400
    except Exception as e:
        return jsonify({"error": str(e)}), 500
+    finally:
+        if img_path and os.path.exists(img_path):
+            try: os.remove(img_path)
+            except: pass
+
+# --- Endpoint 4: Visualizer SSE Stream ---
+@app.route('/api/visualize', methods=['POST'])
+def visualize():
+    """
+    SSE endpoint that streams the spectrogram generation process.
+    Returns step-by-step updates for visualization.
+    """
+    if 'audio' not in request.files:
+        return jsonify({"error": "No audio file provided"}), 400
+    
+    audio_file = request.files['audio']
+    audio_path = None
+    
+    try:
+        audio_path = save_upload(audio_file)
+        file_size = os.path.getsize(audio_path)
+        min_pixels = int((file_size * 8 / 3) * 1.05)
+        
+        def generate_steps():
+            art_path = None
+            final_path = None
+            
+            try:
+                import base64
+                
+                # Step 1: Audio loaded
+                yield f"data: {json.dumps({'step': 1, 'status': 'loading', 'message': 'Loading audio file...', 'progress': 10})}\n\n"
+                time.sleep(0.8)
+                
+                yield f"data: {json.dumps({'step': 1, 'status': 'complete', 'message': f'Audio loaded: {audio_file.filename}', 'progress': 20, 'fileSize': file_size})}\n\n"
+                time.sleep(0.5)
+                
+                # Step 2: Analyzing audio
+                yield f"data: {json.dumps({'step': 2, 'status': 'loading', 'message': 'Analyzing audio frequencies...', 'progress': 30})}\n\n"
+                time.sleep(1.0)
+                
+                yield f"data: {json.dumps({'step': 2, 'status': 'complete', 'message': 'Frequency analysis complete', 'progress': 40})}\n\n"
+                time.sleep(0.5)
+                
+                # Step 3: Generating spectrogram
+                yield f"data: {json.dumps({'step': 3, 'status': 'loading', 'message': 'Generating spectrogram image...', 'progress': 50})}\n\n"
+                
+                print(f"[VISUALIZE] Starting spectrogram generation for {audio_path}")
+                art_path = processor.generate_spectrogram(audio_path, min_pixels=min_pixels)
+                print(f"[VISUALIZE] Spectrogram generated at {art_path}")
+                
+                # Read the spectrogram image and encode as base64
+                with open(art_path, 'rb') as img_file:
+                    spectrogram_b64 = base64.b64encode(img_file.read()).decode('utf-8')
+                print(f"[VISUALIZE] Spectrogram base64 length: {len(spectrogram_b64)}")
+                
+                yield f"data: {json.dumps({'step': 3, 'status': 'complete', 'message': 'Spectrogram generated!', 'progress': 70, 'spectrogramImage': f'data:image/png;base64,{spectrogram_b64}'})}\n\n"
+                print("[VISUALIZE] Sent spectrogram image")
+                time.sleep(2.0)  # Pause to let user see the spectrogram
+                
+                # Step 4: Embedding audio
+                yield f"data: {json.dumps({'step': 4, 'status': 'loading', 'message': 'Embedding audio into image (LSB steganography)...', 'progress': 80})}\n\n"
+                
+                final_path = processor.encode_stego(audio_path, art_path)
+                
+                # Read the final image and encode as base64
+                with open(final_path, 'rb') as img_file:
+                    final_b64 = base64.b64encode(img_file.read()).decode('utf-8')
+                
+                yield f"data: {json.dumps({'step': 4, 'status': 'complete', 'message': 'Audio embedded successfully!', 'progress': 95, 'finalImage': f'data:image/png;base64,{final_b64}'})}\n\n"
+                time.sleep(2.0)  # Pause to let user see the final image
+                
+                # Step 5: Complete - send the result URL
+                result_id = os.path.basename(final_path)
+                yield f"data: {json.dumps({'step': 5, 'status': 'complete', 'message': 'Process complete!', 'progress': 100, 'resultId': result_id})}\n\n"
+                
+            except Exception as e:
+                yield f"data: {json.dumps({'step': 0, 'status': 'error', 'message': str(e), 'progress': 0})}\n\n"
+            finally:
+                # Clean up intermediate files (but keep final)
+                if art_path and art_path != final_path and os.path.exists(art_path):
+                    try: os.remove(art_path)
+                    except: pass
+                if audio_path and os.path.exists(audio_path):
+                    try: os.remove(audio_path)
+                    except: pass
+        
+        response = Response(generate_steps(), mimetype='text/event-stream')
+        response.headers['Cache-Control'] = 'no-cache'
+        response.headers['X-Accel-Buffering'] = 'no'
+        response.headers['Connection'] = 'keep-alive'
+        return response
+    
+    except Exception as e:
+        if audio_path and os.path.exists(audio_path):
+            try: os.remove(audio_path)
+            except: pass
+        return jsonify({"error": str(e)}), 500
+
+
+@app.route('/api/result/<result_id>', methods=['GET'])
+def get_result(result_id):
+    """Serve the result image by ID."""
+    result_path = os.path.join(app.config['UPLOAD_FOLDER'], result_id)
+    if os.path.exists(result_path):
+        return send_file(result_path, mimetype='image/png', as_attachment=False)
+    return jsonify({"error": "Result not found"}), 404
+

 if __name__ == '__main__':
    # Threaded=True is important for processing images without blocking
--- a/server/processor.py
+++ b/server/processor.py
@@ -1,4 +1,5 @@
 import os
+import time
 import struct
 import math
 import numpy as np
@@ -36,49 +37,90 @@ class AudioImageProcessor:
        return struct.pack(HEADER_FMT, signature, file_size, len(ext_bytes)) + ext_bytes

    # --- Feature 1: Spectrogram Art ---
-    def generate_spectrogram(self, audio_path):
+    def generate_spectrogram(self, audio_path, min_pixels=0):
        """Generates a visual spectrogram from audio."""
+        try:
+            import torch
+            import torchaudio
+            has_torch = True
+        except ImportError:
+            has_torch = False
+
+        if has_torch and torch.cuda.is_available():
+            try:
+                # GPU Accelerated Path
+                device = "cuda"
+                waveform, sr = torchaudio.load(audio_path)
+                waveform = waveform.to(device)
+                
+                # Create transformation
+                # Mimic librosa defaults roughly: n_fft=2048, hop_length=512
+                n_fft = 2048
+                win_length = n_fft
+                hop_length = 512
+                n_mels = 128
+                
+                mel_spectrogram = torchaudio.transforms.MelSpectrogram(
+                    sample_rate=sr,
+                    n_fft=n_fft,
+                    win_length=win_length,
+                    hop_length=hop_length,
+                    n_mels=n_mels,
+                    f_max=8000
+                ).to(device)
+                
+                S = mel_spectrogram(waveform)
+                S_dB = torchaudio.transforms.AmplitudeToDB()(S)
+                
+                # Back to CPU for plotting
+                S_dB = S_dB.cpu().numpy()[0] # Take first channel
+                # Librosa display expects numpy
+            except Exception as e:
+                # Fallback to CPU/Librosa if any error occurs
+                print(f"GPU processing failed, falling back to CPU: {e}")
+                return self._generate_spectrogram_cpu(audio_path, min_pixels)
+        else:
+            return self._generate_spectrogram_cpu(audio_path, min_pixels)
+
+        # Plotting (Common)
+        return self._plot_spectrogram(S_dB, sr, min_pixels)
+
+    def _generate_spectrogram_cpu(self, audio_path, min_pixels=0):
        y, sr = librosa.load(audio_path)
-        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=256, fmax=8000)
+        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
        S_dB = librosa.power_to_db(S, ref=np.max)
-
-        plt.figure(figsize=(12, 6))
-        plt.axis('off')
-        plt.margins(0, 0)
-        plt.gca().xaxis.set_major_locator(plt.NullLocator())
-        plt.gca().yaxis.set_major_locator(plt.NullLocator())
+        return self._plot_spectrogram(S_dB, sr, min_pixels)
        
-        # 'magma' is a nice default, but you could parameterize this
-        librosa.display.specshow(S_dB, sr=sr, fmax=8000, cmap='magma')
+    def _plot_spectrogram(self, S_dB, sr, min_pixels=0):
+        # Calculate DPI dynamically to ensure we have enough pixels for steganography
+        dpi = 300
+        if min_pixels > 0:
+            # Figure is 12x6 inches. Area = 72 sq inches.
+            # Total Pixels = 72 * dpi^2
+            required_dpi = math.ceil((min_pixels / 72) ** 0.5)
+            # Add a small buffer
+            dpi = max(dpi, int(required_dpi * 1.05))

-        output_path = os.path.join(self.upload_folder, f"art_{os.path.basename(audio_path)}.png")
-        plt.savefig(output_path, bbox_inches='tight', pad_inches=0, dpi=300)
+        # Use exact dimensions without margins
+        width_in = 12
+        height_in = 6
+        fig = plt.figure(figsize=(width_in, height_in))
+        
+        # Add axes covering the entire figure [left, bottom, width, height]
+        ax = plt.axes([0, 0, 1, 1], frameon=False)
+        ax.set_axis_off()
+        
+        # 'magma' is a nice default
+        librosa.display.specshow(S_dB, sr=sr, fmax=8000, cmap='magma', ax=ax)
+
+        output_path = os.path.join(self.upload_folder, f"art_{int(time.time())}.png")
+        
+        # specific DPI, no bbox_inches='tight' (which shrinks the image)
+        plt.savefig(output_path, dpi=dpi)
        plt.close()
        return output_path

-    # --- Feature 2: Format Shift (Raw Data to Image) ---
-    def encode_shift(self, file_path):
-        file_data = self._get_bytes(file_path)
-        file_size = len(file_data)
-        
-        header = self._create_header(SIG_SHIFT, file_size, file_path)
-        payload = header + file_data
-        
-        # Calculate size
-        pixels = math.ceil(len(payload) / 3)
-        side = math.ceil(math.sqrt(pixels))
-        padding = (side * side * 3) - len(payload)
-        
-        # Pad and Reshape
-        arr = np.frombuffer(payload, dtype=np.uint8)
-        if padding > 0:
-            arr = np.pad(arr, (0, padding), 'constant')
-            
-        img = Image.fromarray(arr.reshape((side, side, 3)), 'RGB')
-        
-        output_path = os.path.join(self.upload_folder, f"shift_{os.path.basename(file_path)}.png")
-        img.save(output_path, "PNG")
-        return output_path
+

    # --- Feature 3: Steganography (Embed in Host) ---
    def encode_stego(self, data_path, host_path):
--- a/server/requirements.txt
+++ b/server/requirements.txt
@@ -4,3 +4,5 @@ numpy
 Pillow
 librosa
 matplotlib
+torch
+torchaudio