UI and Audio Processing Update
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import time
|
||||
import struct
|
||||
import math
|
||||
import numpy as np
|
||||
@@ -36,49 +37,90 @@ class AudioImageProcessor:
|
||||
return struct.pack(HEADER_FMT, signature, file_size, len(ext_bytes)) + ext_bytes
|
||||
|
||||
# --- Feature 1: Spectrogram Art ---
|
||||
def generate_spectrogram(self, audio_path):
|
||||
def generate_spectrogram(self, audio_path, min_pixels=0):
|
||||
"""Generates a visual spectrogram from audio."""
|
||||
try:
|
||||
import torch
|
||||
import torchaudio
|
||||
has_torch = True
|
||||
except ImportError:
|
||||
has_torch = False
|
||||
|
||||
if has_torch and torch.cuda.is_available():
|
||||
try:
|
||||
# GPU Accelerated Path
|
||||
device = "cuda"
|
||||
waveform, sr = torchaudio.load(audio_path)
|
||||
waveform = waveform.to(device)
|
||||
|
||||
# Create transformation
|
||||
# Mimic librosa defaults roughly: n_fft=2048, hop_length=512
|
||||
n_fft = 2048
|
||||
win_length = n_fft
|
||||
hop_length = 512
|
||||
n_mels = 128
|
||||
|
||||
mel_spectrogram = torchaudio.transforms.MelSpectrogram(
|
||||
sample_rate=sr,
|
||||
n_fft=n_fft,
|
||||
win_length=win_length,
|
||||
hop_length=hop_length,
|
||||
n_mels=n_mels,
|
||||
f_max=8000
|
||||
).to(device)
|
||||
|
||||
S = mel_spectrogram(waveform)
|
||||
S_dB = torchaudio.transforms.AmplitudeToDB()(S)
|
||||
|
||||
# Back to CPU for plotting
|
||||
S_dB = S_dB.cpu().numpy()[0] # Take first channel
|
||||
# Librosa display expects numpy
|
||||
except Exception as e:
|
||||
# Fallback to CPU/Librosa if any error occurs
|
||||
print(f"GPU processing failed, falling back to CPU: {e}")
|
||||
return self._generate_spectrogram_cpu(audio_path, min_pixels)
|
||||
else:
|
||||
return self._generate_spectrogram_cpu(audio_path, min_pixels)
|
||||
|
||||
# Plotting (Common)
|
||||
return self._plot_spectrogram(S_dB, sr, min_pixels)
|
||||
|
||||
def _generate_spectrogram_cpu(self, audio_path, min_pixels=0):
|
||||
y, sr = librosa.load(audio_path)
|
||||
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=256, fmax=8000)
|
||||
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
|
||||
S_dB = librosa.power_to_db(S, ref=np.max)
|
||||
|
||||
plt.figure(figsize=(12, 6))
|
||||
plt.axis('off')
|
||||
plt.margins(0, 0)
|
||||
plt.gca().xaxis.set_major_locator(plt.NullLocator())
|
||||
plt.gca().yaxis.set_major_locator(plt.NullLocator())
|
||||
return self._plot_spectrogram(S_dB, sr, min_pixels)
|
||||
|
||||
# 'magma' is a nice default, but you could parameterize this
|
||||
librosa.display.specshow(S_dB, sr=sr, fmax=8000, cmap='magma')
|
||||
def _plot_spectrogram(self, S_dB, sr, min_pixels=0):
|
||||
# Calculate DPI dynamically to ensure we have enough pixels for steganography
|
||||
dpi = 300
|
||||
if min_pixels > 0:
|
||||
# Figure is 12x6 inches. Area = 72 sq inches.
|
||||
# Total Pixels = 72 * dpi^2
|
||||
required_dpi = math.ceil((min_pixels / 72) ** 0.5)
|
||||
# Add a small buffer
|
||||
dpi = max(dpi, int(required_dpi * 1.05))
|
||||
|
||||
output_path = os.path.join(self.upload_folder, f"art_{os.path.basename(audio_path)}.png")
|
||||
plt.savefig(output_path, bbox_inches='tight', pad_inches=0, dpi=300)
|
||||
# Use exact dimensions without margins
|
||||
width_in = 12
|
||||
height_in = 6
|
||||
fig = plt.figure(figsize=(width_in, height_in))
|
||||
|
||||
# Add axes covering the entire figure [left, bottom, width, height]
|
||||
ax = plt.axes([0, 0, 1, 1], frameon=False)
|
||||
ax.set_axis_off()
|
||||
|
||||
# 'magma' is a nice default
|
||||
librosa.display.specshow(S_dB, sr=sr, fmax=8000, cmap='magma', ax=ax)
|
||||
|
||||
output_path = os.path.join(self.upload_folder, f"art_{int(time.time())}.png")
|
||||
|
||||
# specific DPI, no bbox_inches='tight' (which shrinks the image)
|
||||
plt.savefig(output_path, dpi=dpi)
|
||||
plt.close()
|
||||
return output_path
|
||||
|
||||
# --- Feature 2: Format Shift (Raw Data to Image) ---
|
||||
def encode_shift(self, file_path):
|
||||
file_data = self._get_bytes(file_path)
|
||||
file_size = len(file_data)
|
||||
|
||||
header = self._create_header(SIG_SHIFT, file_size, file_path)
|
||||
payload = header + file_data
|
||||
|
||||
# Calculate size
|
||||
pixels = math.ceil(len(payload) / 3)
|
||||
side = math.ceil(math.sqrt(pixels))
|
||||
padding = (side * side * 3) - len(payload)
|
||||
|
||||
# Pad and Reshape
|
||||
arr = np.frombuffer(payload, dtype=np.uint8)
|
||||
if padding > 0:
|
||||
arr = np.pad(arr, (0, padding), 'constant')
|
||||
|
||||
img = Image.fromarray(arr.reshape((side, side, 3)), 'RGB')
|
||||
|
||||
output_path = os.path.join(self.upload_folder, f"shift_{os.path.basename(file_path)}.png")
|
||||
img.save(output_path, "PNG")
|
||||
return output_path
|
||||
|
||||
|
||||
# --- Feature 3: Steganography (Embed in Host) ---
|
||||
def encode_stego(self, data_path, host_path):
|
||||
|
||||
Reference in New Issue
Block a user