UI and Audio Processing Update

This commit is contained in:
2026-01-07 04:09:35 +00:00
parent 864ccabc6e
commit 585830103b
18 changed files with 2069 additions and 481 deletions

View File

@@ -1,4 +1,5 @@
import os
import time
import struct
import math
import numpy as np
@@ -36,49 +37,90 @@ class AudioImageProcessor:
return struct.pack(HEADER_FMT, signature, file_size, len(ext_bytes)) + ext_bytes
# --- Feature 1: Spectrogram Art ---
def generate_spectrogram(self, audio_path):
def generate_spectrogram(self, audio_path, min_pixels=0):
"""Generates a visual spectrogram from audio."""
try:
import torch
import torchaudio
has_torch = True
except ImportError:
has_torch = False
if has_torch and torch.cuda.is_available():
try:
# GPU Accelerated Path
device = "cuda"
waveform, sr = torchaudio.load(audio_path)
waveform = waveform.to(device)
# Create transformation
# Mimic librosa defaults roughly: n_fft=2048, hop_length=512
n_fft = 2048
win_length = n_fft
hop_length = 512
n_mels = 128
mel_spectrogram = torchaudio.transforms.MelSpectrogram(
sample_rate=sr,
n_fft=n_fft,
win_length=win_length,
hop_length=hop_length,
n_mels=n_mels,
f_max=8000
).to(device)
S = mel_spectrogram(waveform)
S_dB = torchaudio.transforms.AmplitudeToDB()(S)
# Back to CPU for plotting
S_dB = S_dB.cpu().numpy()[0] # Take first channel
# Librosa display expects numpy
except Exception as e:
# Fallback to CPU/Librosa if any error occurs
print(f"GPU processing failed, falling back to CPU: {e}")
return self._generate_spectrogram_cpu(audio_path, min_pixels)
else:
return self._generate_spectrogram_cpu(audio_path, min_pixels)
# Plotting (Common)
return self._plot_spectrogram(S_dB, sr, min_pixels)
def _generate_spectrogram_cpu(self, audio_path, min_pixels=0):
y, sr = librosa.load(audio_path)
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=256, fmax=8000)
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
S_dB = librosa.power_to_db(S, ref=np.max)
plt.figure(figsize=(12, 6))
plt.axis('off')
plt.margins(0, 0)
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator())
return self._plot_spectrogram(S_dB, sr, min_pixels)
# 'magma' is a nice default, but you could parameterize this
librosa.display.specshow(S_dB, sr=sr, fmax=8000, cmap='magma')
def _plot_spectrogram(self, S_dB, sr, min_pixels=0):
# Calculate DPI dynamically to ensure we have enough pixels for steganography
dpi = 300
if min_pixels > 0:
# Figure is 12x6 inches. Area = 72 sq inches.
# Total Pixels = 72 * dpi^2
required_dpi = math.ceil((min_pixels / 72) ** 0.5)
# Add a small buffer
dpi = max(dpi, int(required_dpi * 1.05))
output_path = os.path.join(self.upload_folder, f"art_{os.path.basename(audio_path)}.png")
plt.savefig(output_path, bbox_inches='tight', pad_inches=0, dpi=300)
# Use exact dimensions without margins
width_in = 12
height_in = 6
fig = plt.figure(figsize=(width_in, height_in))
# Add axes covering the entire figure [left, bottom, width, height]
ax = plt.axes([0, 0, 1, 1], frameon=False)
ax.set_axis_off()
# 'magma' is a nice default
librosa.display.specshow(S_dB, sr=sr, fmax=8000, cmap='magma', ax=ax)
output_path = os.path.join(self.upload_folder, f"art_{int(time.time())}.png")
# specific DPI, no bbox_inches='tight' (which shrinks the image)
plt.savefig(output_path, dpi=dpi)
plt.close()
return output_path
# --- Feature 2: Format Shift (Raw Data to Image) ---
def encode_shift(self, file_path):
file_data = self._get_bytes(file_path)
file_size = len(file_data)
header = self._create_header(SIG_SHIFT, file_size, file_path)
payload = header + file_data
# Calculate size
pixels = math.ceil(len(payload) / 3)
side = math.ceil(math.sqrt(pixels))
padding = (side * side * 3) - len(payload)
# Pad and Reshape
arr = np.frombuffer(payload, dtype=np.uint8)
if padding > 0:
arr = np.pad(arr, (0, padding), 'constant')
img = Image.fromarray(arr.reshape((side, side, 3)), 'RGB')
output_path = os.path.join(self.upload_folder, f"shift_{os.path.basename(file_path)}.png")
img.save(output_path, "PNG")
return output_path
# --- Feature 3: Steganography (Embed in Host) ---
def encode_stego(self, data_path, host_path):