chore(phase-0): stabilise foundation for Studio build

- Extract WAV assembly (buildWav, mergeFloat32Arrays, decodeFloat32Chunk,
  SAMPLE_RATE) into web/lib/audio/wav.ts so it can be reused by the
  Studio playback engine and library waveform previews
- Add server/waveform.py with compute_peaks() / write_peaks() — reads
  any WAV, mixes to mono, returns min/max peak arrays matching the
  WaveformPeaks TypeScript type
- Add server/ids.py with prefixed URL-safe ID helpers (gen_id, proj_id,
  asset_id, etc.) using stdlib secrets — no new dependency
- Add docs/studio-build-plan.md — full execution spec covering stack
  decisions, data models, API contract, component hierarchy, phase
  breakdown and acceptance criteria
- Ignore data/ directory (generated audio, waveforms, SQLite DB)
This commit is contained in:
2026-05-02 17:24:45 +01:00
parent 0236807928
commit 47e0c7e512
6 changed files with 1098 additions and 48 deletions
+35
View File
@@ -0,0 +1,35 @@
"""Stable, URL-safe ID generation for VibePod entities."""
import secrets
def _make_id(prefix: str) -> str:
return f"{prefix}_{secrets.token_urlsafe(8)}"
def gen_id() -> str:
return _make_id("gen")
def proj_id() -> str:
return _make_id("proj")
def asset_id() -> str:
return _make_id("asset")
def track_id() -> str:
return _make_id("track")
def clip_id() -> str:
return _make_id("clip")
def block_id() -> str:
return _make_id("block")
def take_id() -> str:
return _make_id("take")
+77
View File
@@ -0,0 +1,77 @@
"""Waveform peak generation for VibePod.
Reads a WAV file and produces min/max peak arrays suitable for canvas rendering.
The output format matches the WaveformPeaks TypeScript type in the frontend.
"""
from __future__ import annotations
import json
from pathlib import Path
import numpy as np
import soundfile as sf
def compute_peaks(
audio_path: str | Path,
samples_per_pixel: int = 256,
) -> dict:
"""Compute min/max waveform peaks from a WAV file.
Args:
audio_path: Path to a WAV file (any bit depth, any channel count).
samples_per_pixel: How many audio samples are condensed into one peak pair.
256 is a good default for a ~1000px wide waveform at
standard podcast lengths.
Returns:
A dict matching the WaveformPeaks TypeScript type:
{
"sampleRate": int,
"durationSecs": float,
"channels": int,
"samplesPerPixel": int,
"length": int, # number of peak pairs
"data": {
"min": [float, ...], # values in [-1.0, 0.0]
"max": [float, ...], # values in [0.0, 1.0]
}
}
"""
samples, sample_rate = sf.read(str(audio_path), dtype="float32", always_2d=True)
# Mix to mono by averaging channels
mono = samples.mean(axis=1)
total_samples = len(mono)
duration_secs = total_samples / sample_rate
channels = samples.shape[1]
# Pad so total_samples is divisible by samples_per_pixel
remainder = total_samples % samples_per_pixel
if remainder:
pad = samples_per_pixel - remainder
mono = np.concatenate([mono, np.zeros(pad, dtype=np.float32)])
frames = mono.reshape(-1, samples_per_pixel)
peak_min = frames.min(axis=1).tolist()
peak_max = frames.max(axis=1).tolist()
length = len(peak_min)
return {
"sampleRate": int(sample_rate),
"durationSecs": round(duration_secs, 4),
"channels": int(channels),
"samplesPerPixel": samples_per_pixel,
"length": length,
"data": {
"min": [round(float(v), 5) for v in peak_min],
"max": [round(float(v), 5) for v in peak_max],
},
}
def write_peaks(audio_path: str | Path, output_path: str | Path, samples_per_pixel: int = 256) -> None:
"""Compute peaks and write them to a JSON file."""
peaks = compute_peaks(audio_path, samples_per_pixel)
Path(output_path).write_text(json.dumps(peaks, separators=(",", ":")), encoding="utf-8")