chore(phase-0): stabilise foundation for Studio build

- Extract WAV assembly (buildWav, mergeFloat32Arrays, decodeFloat32Chunk, SAMPLE_RATE) into web/lib/audio/wav.ts so it can be reused by the Studio playback engine and library waveform previews - Add server/waveform.py with compute_peaks() / write_peaks() — reads any WAV, mixes to mono, returns min/max peak arrays matching the WaveformPeaks TypeScript type - Add server/ids.py with prefixed URL-safe ID helpers (gen_id, proj_id, asset_id, etc.) using stdlib secrets — no new dependency - Add docs/studio-build-plan.md — full execution spec covering stack decisions, data models, API contract, component hierarchy, phase breakdown and acceptance criteria - Ignore data/ directory (generated audio, waveforms, SQLite DB)
2026-06-13 03:58:07 +00:00 · 2026-05-02 17:24:45 +01:00
parent 0236807928
commit 47e0c7e512
6 changed files with 1098 additions and 48 deletions
@@ -0,0 +1,77 @@
+"""Waveform peak generation for VibePod.
+
+Reads a WAV file and produces min/max peak arrays suitable for canvas rendering.
+The output format matches the WaveformPeaks TypeScript type in the frontend.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import numpy as np
+import soundfile as sf
+
+
+def compute_peaks(
+    audio_path: str | Path,
+    samples_per_pixel: int = 256,
+) -> dict:
+    """Compute min/max waveform peaks from a WAV file.
+
+    Args:
+        audio_path: Path to a WAV file (any bit depth, any channel count).
+        samples_per_pixel: How many audio samples are condensed into one peak pair.
+                           256 is a good default for a ~1000px wide waveform at
+                           standard podcast lengths.
+
+    Returns:
+        A dict matching the WaveformPeaks TypeScript type:
+        {
+            "sampleRate": int,
+            "durationSecs": float,
+            "channels": int,
+            "samplesPerPixel": int,
+            "length": int,           # number of peak pairs
+            "data": {
+                "min": [float, ...], # values in [-1.0, 0.0]
+                "max": [float, ...], # values in [0.0, 1.0]
+            }
+        }
+    """
+    samples, sample_rate = sf.read(str(audio_path), dtype="float32", always_2d=True)
+
+    # Mix to mono by averaging channels
+    mono = samples.mean(axis=1)
+    total_samples = len(mono)
+    duration_secs = total_samples / sample_rate
+    channels = samples.shape[1]
+
+    # Pad so total_samples is divisible by samples_per_pixel
+    remainder = total_samples % samples_per_pixel
+    if remainder:
+        pad = samples_per_pixel - remainder
+        mono = np.concatenate([mono, np.zeros(pad, dtype=np.float32)])
+
+    frames = mono.reshape(-1, samples_per_pixel)
+    peak_min = frames.min(axis=1).tolist()
+    peak_max = frames.max(axis=1).tolist()
+    length = len(peak_min)
+
+    return {
+        "sampleRate": int(sample_rate),
+        "durationSecs": round(duration_secs, 4),
+        "channels": int(channels),
+        "samplesPerPixel": samples_per_pixel,
+        "length": length,
+        "data": {
+            "min": [round(float(v), 5) for v in peak_min],
+            "max": [round(float(v), 5) for v in peak_max],
+        },
+    }
+
+
+def write_peaks(audio_path: str | Path, output_path: str | Path, samples_per_pixel: int = 256) -> None:
+    """Compute peaks and write them to a JSON file."""
+    peaks = compute_peaks(audio_path, samples_per_pixel)
+    Path(output_path).write_text(json.dumps(peaks, separators=(",", ":")), encoding="utf-8")