From 13085166fb84989c2f650d6d5da8f512c0133fcc Mon Sep 17 00:00:00 2001 From: LyAhn Date: Sat, 2 May 2026 23:05:11 +0100 Subject: [PATCH] feat(phase-1): persistent generation library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Save every completed generation to SQLite (generation_store.py) with WAV and waveform peaks written to data/generations// - Deferred DB write until success — cancelled/errored generations never touch the DB and never appear in the library - Fixed cancel+regenerate IndexError: _reset_scheduler_caches() now directly zeros scheduler._step_index and running state in addition to clearing VibePod cache dicts; same explicit resets added in the fresh path of prepare_noise_scheduler as belt-and-suspenders - Added /library page with GenerationCard, WaveformPreview, waveform fetch, play/pause, download, delete, pagination, empty + error states - Added generation API routes (list, single, audio stream, waveform, delete) proxying to Python server - Added Library nav link to Header with active state - Persist script/speaker/CFG to localStorage so generate page state survives navigation - Updated build plan: Phase 0+1 ticked off, better-sqlite3 moved to Phase 2, architectural note on Python owning all persistence --- docs/studio-build-plan.md | 46 ++--- server/generation_store.py | 133 +++++++++++++ server/vibevoice_server.py | 183 +++++++++++++++++- web/app/api/generations/[id]/audio/route.ts | 23 +++ web/app/api/generations/[id]/route.ts | 27 +++ .../api/generations/[id]/waveform/route.ts | 16 ++ web/app/api/generations/route.ts | 19 ++ web/app/library/page.tsx | 162 ++++++++++++++++ web/app/page.tsx | 33 +++- web/components/GenerationCard.tsx | 180 +++++++++++++++++ web/components/Header.tsx | 29 +++ web/components/WaveformPreview.tsx | 57 ++++++ web/lib/types/generation.ts | 34 ++++ 13 files changed, 913 insertions(+), 29 deletions(-) create mode 100644 server/generation_store.py create mode 100644 web/app/api/generations/[id]/audio/route.ts create mode 100644 web/app/api/generations/[id]/route.ts create mode 100644 web/app/api/generations/[id]/waveform/route.ts create mode 100644 web/app/api/generations/route.ts create mode 100644 web/app/library/page.tsx create mode 100644 web/components/GenerationCard.tsx create mode 100644 web/components/WaveformPreview.tsx create mode 100644 web/lib/types/generation.ts diff --git a/docs/studio-build-plan.md b/docs/studio-build-plan.md index 210a04a..d70e46d 100644 --- a/docs/studio-build-plan.md +++ b/docs/studio-build-plan.md @@ -137,7 +137,7 @@ Build inside the existing stack. New packages require a written reason in this d |---|---|---| | `zustand` | Studio editor state | Phase 2 | | `@dnd-kit/core` + `@dnd-kit/utilities` | Clip drag-and-drop in timeline | Phase 2 | -| `better-sqlite3` | SQLite for job and project persistence | Phase 1 | +| `better-sqlite3` | SQLite for project/studio persistence (Next.js layer) | Phase 2 | **Conditionally approved (evaluate at phase start):** @@ -712,11 +712,11 @@ Render runs in a background thread. The client polls `GET /api/projects/:id/rend **Goal:** Clean foundation. No new features. Tasks: -- [ ] Extract WAV assembly from `useStreamingGeneration.ts` into `web/lib/audio/wav.ts` -- [ ] Extract waveform peak generation into `server/waveform.py` -- [ ] Confirm generation cancellation works cleanly (stream abort + server cancel_event) -- [ ] Add `nanoid` to backend for stable generation IDs -- [ ] Add `data/` directory to `.gitignore` +- [x] Extract WAV assembly from `useStreamingGeneration.ts` into `web/lib/audio/wav.ts` +- [x] Extract waveform peak generation into `server/waveform.py` +- [x] Confirm generation cancellation works cleanly (stream abort + server cancel_event) +- [x] Add stable generation IDs to backend (`server/ids.py` via `secrets.token_urlsafe`) +- [x] Add `data/` directory to `.gitignore` **Acceptance:** WAV assembly is a pure function with unit tests. Generation IDs are stable. @@ -727,25 +727,23 @@ Tasks: **Goal:** Every generation is saved. Users can browse, play, and download past generations. **Backend tasks:** -- [ ] Add SQLite setup (`data/db/vibepod.db`, schema migration 001) -- [ ] `generations` table: `id, created_at, status, script, speaker, cfg_scale, inference_steps, duration_secs, sample_rate, audio_path, waveform_path, error_message` -- [ ] On generation complete: save WAV to `data/generations//audio.wav` -- [ ] On generation complete: compute and save waveform peaks to `data/generations//waveform.json` -- [ ] Implement `GET /api/generations` (list, paginated) -- [ ] Implement `GET /api/generations/:id` (single) -- [ ] Implement `GET /api/generations/:id/audio` (stream file) -- [ ] Implement `GET /api/generations/:id/waveform` (peaks JSON) -- [ ] Implement `DELETE /api/generations/:id` (delete row + files) +- [x] Add SQLite setup (`data/db/vibepod.db`) +- [x] `generations` table: `id, created_at, status, script, speaker, cfg_scale, inference_steps, duration_secs, sample_rate, audio_path, waveform_path, error_message` +- [x] On generation complete: save WAV to `data/generations//audio.wav` +- [x] On generation complete: compute and save waveform peaks to `data/generations//waveform.json` +- [x] Implement `GET /api/generations` (list, paginated) +- [x] Implement `GET /api/generations/:id` (single) +- [x] Implement `GET /api/generations/:id/audio` (stream file) +- [x] Implement `GET /api/generations/:id/waveform` (peaks JSON) +- [x] Implement `DELETE /api/generations/:id` (delete row + files) **Frontend tasks:** -- [ ] Install `better-sqlite3` + types -- [ ] Create `web/lib/db/` — schema, migration runner, query helpers -- [ ] Create `/library` route and `LibraryPage` component -- [ ] `GenerationCard` component: waveform preview canvas, metadata, play/download/delete actions -- [ ] `WaveformPreview` component: draws peaks on canvas (static, no playback) -- [ ] Mini audio player for library card playback (reuse `useAudioPlayer` hook) -- [ ] Link "Open in Studio" button (navigates to `/studio/new?fromGeneration=`) -- [ ] Add "Library" link to `Header` +- [x] Create `/library` route and `LibraryPage` component +- [x] `GenerationCard` component: waveform preview canvas, metadata, play/download/delete actions +- [x] `WaveformPreview` component: draws peaks on canvas (static, no playback) +- [x] Mini audio player for library card playback +- [ ] Link "Open in Studio" button (navigates to `/studio/new?fromGeneration=`) — deferred to Phase 2 +- [x] Add "Library" link to `Header` **Acceptance:** - Generate audio → close browser → reopen → generation appears in library with waveform @@ -771,6 +769,8 @@ Tasks: - [ ] Implement basic render endpoint (single voice track, WAV out only) **Frontend tasks:** +- [ ] Install `better-sqlite3` + types +- [ ] Create `web/lib/db/` — schema, migration runner, query helpers - [ ] Install `zustand`, `@dnd-kit/core`, `@dnd-kit/utilities` - [ ] Create Studio Zustand store (`web/stores/studioStore.ts`) - Project state, selected clip, playhead time, zoom, isPlaying, undo stack diff --git a/server/generation_store.py b/server/generation_store.py new file mode 100644 index 0000000..801d088 --- /dev/null +++ b/server/generation_store.py @@ -0,0 +1,133 @@ +"""SQLite persistence for VibePod generation jobs. + +Schema lives here. The database is created on first use at: + /data/db/vibepod.db + +All writes go through this module. The Next.js layer reads the same file +via better-sqlite3 for project-level data in later phases. +""" + +from __future__ import annotations + +import json +import shutil +import sqlite3 +from datetime import datetime, timezone +from pathlib import Path + +# Paths relative to the repo root (one level up from this file's directory). +_REPO_ROOT = Path(__file__).parent.parent +DATA_DIR = _REPO_ROOT / "data" +DB_PATH = DATA_DIR / "db" / "vibepod.db" +GENERATIONS_DIR = DATA_DIR / "generations" + +_CREATE_GENERATIONS = """ +CREATE TABLE IF NOT EXISTS generations ( + id TEXT PRIMARY KEY, + created_at TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'generating', + script TEXT NOT NULL, + speaker TEXT NOT NULL, + cfg_scale REAL NOT NULL, + inference_steps INTEGER, + duration_secs REAL, + sample_rate INTEGER, + audio_path TEXT, + waveform_path TEXT, + error_message TEXT +) +""" + + +def _connect() -> sqlite3.Connection: + conn = sqlite3.connect(str(DB_PATH)) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA journal_mode=WAL") + conn.execute("PRAGMA foreign_keys=ON") + return conn + + +def init_db() -> None: + """Create the database directory, database file, and tables if they don't exist.""" + DB_PATH.parent.mkdir(parents=True, exist_ok=True) + GENERATIONS_DIR.mkdir(parents=True, exist_ok=True) + with _connect() as conn: + conn.execute(_CREATE_GENERATIONS) + + +def save_completed_job( + job_id: str, + script: str, + speaker: str, + cfg_scale: float, + inference_steps: int | None, + duration_secs: float, + sample_rate: int, + audio_path: str, + waveform_path: str, +) -> None: + """Insert a completed generation in a single write — no intermediate 'generating' row.""" + created_at = datetime.now(timezone.utc).isoformat() + with _connect() as conn: + conn.execute( + """ + INSERT INTO generations + (id, created_at, status, script, speaker, cfg_scale, inference_steps, + duration_secs, sample_rate, audio_path, waveform_path) + VALUES (?, ?, 'complete', ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + job_id, created_at, script, speaker, cfg_scale, inference_steps, + round(duration_secs, 3), sample_rate, audio_path, waveform_path, + ), + ) + + +def cancel_job(job_id: str) -> None: + with _connect() as conn: + conn.execute( + "UPDATE generations SET status = 'cancelled' WHERE id = ?", + (job_id,), + ) + + +def fail_job(job_id: str, error_message: str) -> None: + with _connect() as conn: + conn.execute( + "UPDATE generations SET status = 'error', error_message = ? WHERE id = ?", + (error_message[:2000], job_id), + ) + + +def list_jobs(limit: int = 50, offset: int = 0) -> list[dict]: + with _connect() as conn: + rows = conn.execute( + "SELECT * FROM generations ORDER BY created_at DESC LIMIT ? OFFSET ?", + (limit, offset), + ).fetchall() + return [dict(row) for row in rows] + + +def get_job(job_id: str) -> dict | None: + with _connect() as conn: + row = conn.execute( + "SELECT * FROM generations WHERE id = ?", (job_id,) + ).fetchone() + return dict(row) if row else None + + +def delete_job(job_id: str) -> bool: + """Delete the job record and its files. Returns True if the record existed.""" + job_dir = GENERATIONS_DIR / job_id + if job_dir.exists(): + shutil.rmtree(job_dir) + + with _connect() as conn: + result = conn.execute( + "DELETE FROM generations WHERE id = ?", (job_id,) + ) + return result.rowcount > 0 + + +def job_dir(job_id: str) -> Path: + return GENERATIONS_DIR / job_id diff --git a/server/vibevoice_server.py b/server/vibevoice_server.py index 0c32e6c..160de7d 100644 --- a/server/vibevoice_server.py +++ b/server/vibevoice_server.py @@ -22,6 +22,7 @@ import asyncio import base64 import concurrent.futures import copy +import struct import functools import importlib.util import json @@ -37,10 +38,16 @@ from contextlib import asynccontextmanager from pathlib import Path from typing import Literal +import numpy as np +import soundfile as sf import torch from fastapi import FastAPI, HTTPException, Request -from fastapi.responses import StreamingResponse +from fastapi.responses import FileResponse, StreamingResponse from pydantic import BaseModel, Field, field_validator + +import generation_store +import ids +import waveform as waveform_module from tqdm import tqdm as _BaseTqdm logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") @@ -49,6 +56,32 @@ logger = logging.getLogger(__name__) MODEL_ID = "microsoft/VibeVoice-Realtime-0.5B" SAMPLE_RATE = 24_000 + +def _write_float32_wav(path: Path, samples: np.ndarray, sample_rate: int) -> None: + """Write a mono float32 WAV without relying on libsndfile. + + Uses the same RIFF/IEEE-float layout as the browser's buildWav(), so the + file is playable by anything that understands IEEE-float WAV (codec tag 3). + """ + flat = samples.flatten().astype(np.float32) + data = flat.tobytes() + data_size = len(data) + with open(path, "wb") as f: + f.write(b"RIFF") + f.write(struct.pack(" None: + """Clear VibePod scheduler caches and reset all scheduler running state. + + Called on every cancel/timeout path so the next generation starts from a + completely clean slate. We do two things: + + 1. Clear the VibePod cache dicts so prepare_noise_scheduler takes the fresh + path and calls set_timesteps(), which re-initialises sigmas/timesteps. + + 2. Directly zero out the scheduler's running counters (_step_index, + model_outputs, lower_order_nums, _begin_index). This is belt-and- + suspenders: VibeVoice's set_timesteps() *does* reset these fields, but + if a cancelled thread left _step_index=N and the new generation's + _init_step_index guard (``if self.step_index is None``) sees a non-None + value it skips initialisation entirely, causing an out-of-bounds access + on sigmas[step_index + 1] at the very first step. + """ + if _model is None: + return + for attr in ("_vibepod_scheduler_cache", "_vibepod_t_batch_cache"): + if hasattr(_model, attr): + setattr(_model, attr, {}) + try: + scheduler = _model.model.noise_scheduler + scheduler._step_index = None + scheduler._begin_index = None + scheduler.model_outputs = [None] * scheduler.config.solver_order + scheduler.lower_order_nums = 0 + except Exception: + pass + + # Config defaults (can be overridden by env vars) # These are populated in _load_model_sync once the device is known. _config = { @@ -446,6 +512,14 @@ def _install_generation_optimizations(model: object) -> None: if cached is None: scheduler.set_timesteps(self.ddpm_inference_steps) + # Belt-and-suspenders: explicitly reset running state even though + # set_timesteps() should do it, because a prior cancelled generation + # may have left _step_index non-None, causing _init_step_index to + # be skipped and triggering an out-of-bounds access in step(). + scheduler._step_index = None + scheduler._begin_index = None + scheduler.model_outputs = [None] * scheduler.config.solver_order + scheduler.lower_order_nums = 0 cached = { "num_inference_steps": scheduler.num_inference_steps, "timesteps": scheduler.timesteps, @@ -664,6 +738,7 @@ def _load_model_sync() -> None: @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: + generation_store.init_db() thread = threading.Thread(target=_load_model_sync, daemon=True, name="model-loader") thread.start() yield @@ -839,12 +914,14 @@ async def generate(req: GenerateRequest, request: Request) -> StreamingResponse: ) self.finished_flags[idx] = True + job_id = ids.gen_id() start = time.monotonic() streamer = NonBlockingAudioStreamer(batch_size=1) cancel_event = threading.Event() accum_size = max(1, _config["chunk_accum"]) accumulated_chunks = [] + all_save_chunks: list[torch.Tensor] = [] chunk_count = 0 audio_samples = 0 first_chunk_at: float | None = None @@ -866,14 +943,22 @@ async def generate(req: GenerateRequest, request: Request) -> StreamingResponse: chunk = await asyncio.wait_for(streamer.audio_queues[0].get(), timeout=120.0) except asyncio.TimeoutError: cancel_event.set() - future.cancel() + try: + await asyncio.wait_for(asyncio.wrap_future(future), timeout=15.0) + except Exception: + pass + _reset_scheduler_caches() yield _sse({"type": "error", "message": "Generation timed out"}) return if await request.is_disconnected(): cancel_event.set() - future.cancel() - logger.info("Generation client disconnected; stream cancelled.") + logger.info("Client disconnected; waiting for inference thread to stop.") + try: + await asyncio.wait_for(asyncio.wrap_future(future), timeout=15.0) + except Exception: + pass + _reset_scheduler_caches() return if chunk is None: # stop signal @@ -895,6 +980,7 @@ async def generate(req: GenerateRequest, request: Request) -> StreamingResponse: .to("cpu", dtype=torch.float32) .contiguous() ) + all_save_chunks.append(combined) chunk_count += 1 audio_samples += combined.numel() pcm_b64 = base64.b64encode(combined.numpy().tobytes()).decode() @@ -916,6 +1002,7 @@ async def generate(req: GenerateRequest, request: Request) -> StreamingResponse: .to("cpu", dtype=torch.float32) .contiguous() ) + all_save_chunks.append(combined) chunk_count += 1 audio_samples += combined.numel() pcm_b64 = base64.b64encode(combined.numpy().tobytes()).decode() @@ -924,7 +1011,13 @@ async def generate(req: GenerateRequest, request: Request) -> StreamingResponse: try: speaker = await future except asyncio.CancelledError: - logger.info("Generation cancelled.") + cancel_event.set() + logger.info("Generation cancelled; waiting for inference thread to stop.") + try: + await asyncio.wait_for(asyncio.wrap_future(future), timeout=15.0) + except Exception: + pass + _reset_scheduler_caches() yield _sse({"type": "cancelled"}) return except Exception as exc: @@ -944,8 +1037,38 @@ async def generate(req: GenerateRequest, request: Request) -> StreamingResponse: if profile is not None: logger.info("Generation profile: %s", profile) logger.info("Generation complete in %.1fs", elapsed) + + # Persist audio and waveform peaks after streaming is done. + audio_path: str | None = None + waveform_path: str | None = None + try: + out_dir = generation_store.job_dir(job_id) + out_dir.mkdir(parents=True, exist_ok=True) + wav_path = out_dir / "audio.wav" + peaks_path = out_dir / "waveform.json" + if all_save_chunks: + all_audio = torch.cat(all_save_chunks).numpy() + _write_float32_wav(wav_path, all_audio, SAMPLE_RATE) + waveform_module.write_peaks(wav_path, peaks_path) + audio_path = str(wav_path) + waveform_path = str(peaks_path) + generation_store.save_completed_job( + job_id, + script=req.text, + speaker=speaker, + cfg_scale=req.cfg_scale, + inference_steps=req.inference_steps, + duration_secs=audio_secs, + sample_rate=SAMPLE_RATE, + audio_path=audio_path or "", + waveform_path=waveform_path or "", + ) + except Exception: + logger.exception("Failed to persist generation %s", job_id) + complete_event = { "type": "complete", + "job_id": job_id, "elapsed": elapsed, "speaker": speaker, "audio_secs": round(audio_secs, 2), @@ -969,3 +1092,53 @@ async def generate(req: GenerateRequest, request: Request) -> StreamingResponse: "X-Content-Type-Options": "nosniff", }, ) + + +# ── Generation library endpoints ──────────────────────────────────────────────── + + +@app.get("/generations") +async def list_generations(limit: int = 50, offset: int = 0) -> dict: + jobs = generation_store.list_jobs(limit=min(limit, 200), offset=offset) + return {"items": jobs, "limit": limit, "offset": offset} + + +@app.get("/generations/{job_id}") +async def get_generation(job_id: str) -> dict: + job = generation_store.get_job(job_id) + if not job: + raise HTTPException(status_code=404, detail="Generation not found") + return job + + +@app.get("/generations/{job_id}/audio") +async def get_generation_audio(job_id: str) -> FileResponse: + job = generation_store.get_job(job_id) + if not job or not job.get("audio_path"): + raise HTTPException(status_code=404, detail="Audio not found") + audio_path = Path(job["audio_path"]) + if not audio_path.exists(): + raise HTTPException(status_code=404, detail="Audio file missing from disk") + return FileResponse( + str(audio_path), + media_type="audio/wav", + filename=f"{job_id}.wav", + ) + + +@app.get("/generations/{job_id}/waveform") +async def get_generation_waveform(job_id: str) -> dict: + job = generation_store.get_job(job_id) + if not job or not job.get("waveform_path"): + raise HTTPException(status_code=404, detail="Waveform not found") + peaks_path = Path(job["waveform_path"]) + if not peaks_path.exists(): + raise HTTPException(status_code=404, detail="Waveform file missing from disk") + return json.loads(peaks_path.read_text(encoding="utf-8")) + + +@app.delete("/generations/{job_id}", status_code=204) +async def delete_generation(job_id: str) -> None: + deleted = generation_store.delete_job(job_id) + if not deleted: + raise HTTPException(status_code=404, detail="Generation not found") diff --git a/web/app/api/generations/[id]/audio/route.ts b/web/app/api/generations/[id]/audio/route.ts new file mode 100644 index 0000000..fcf5b0b --- /dev/null +++ b/web/app/api/generations/[id]/audio/route.ts @@ -0,0 +1,23 @@ +import { NextRequest, NextResponse } from "next/server"; + +export const dynamic = "force-dynamic"; + +const pythonUrl = () => process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000"; + +export async function GET(_: NextRequest, { params }: { params: Promise<{ id: string }> }) { + const { id } = await params; + try { + const res = await fetch(`${pythonUrl()}/generations/${id}/audio`); + if (!res.ok) return NextResponse.json({ error: "Audio not found" }, { status: res.status }); + return new NextResponse(res.body, { + status: 200, + headers: { + "Content-Type": "audio/wav", + "Content-Disposition": `attachment; filename="${id}.wav"`, + "Cache-Control": "public, max-age=31536000, immutable", + }, + }); + } catch { + return NextResponse.json({ error: "Failed to reach server" }, { status: 502 }); + } +} diff --git a/web/app/api/generations/[id]/route.ts b/web/app/api/generations/[id]/route.ts new file mode 100644 index 0000000..2fc875c --- /dev/null +++ b/web/app/api/generations/[id]/route.ts @@ -0,0 +1,27 @@ +import { NextRequest, NextResponse } from "next/server"; + +const pythonUrl = () => process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000"; + +export async function GET(_: NextRequest, { params }: { params: Promise<{ id: string }> }) { + const { id } = await params; + try { + const res = await fetch(`${pythonUrl()}/generations/${id}`, { cache: "no-store" }); + if (!res.ok) return NextResponse.json({ error: "Not found" }, { status: res.status }); + return NextResponse.json(await res.json()); + } catch { + return NextResponse.json({ error: "Failed to reach server" }, { status: 502 }); + } +} + +export async function DELETE(_: NextRequest, { params }: { params: Promise<{ id: string }> }) { + const { id } = await params; + try { + const res = await fetch(`${pythonUrl()}/generations/${id}`, { method: "DELETE" }); + if (res.status === 404) + return NextResponse.json({ error: "Not found" }, { status: 404 }); + if (!res.ok) return NextResponse.json({ error: "Upstream error" }, { status: res.status }); + return new NextResponse(null, { status: 204 }); + } catch { + return NextResponse.json({ error: "Failed to reach server" }, { status: 502 }); + } +} diff --git a/web/app/api/generations/[id]/waveform/route.ts b/web/app/api/generations/[id]/waveform/route.ts new file mode 100644 index 0000000..4d940da --- /dev/null +++ b/web/app/api/generations/[id]/waveform/route.ts @@ -0,0 +1,16 @@ +import { NextRequest, NextResponse } from "next/server"; + +const pythonUrl = () => process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000"; + +export async function GET(_: NextRequest, { params }: { params: Promise<{ id: string }> }) { + const { id } = await params; + try { + const res = await fetch(`${pythonUrl()}/generations/${id}/waveform`, { cache: "no-store" }); + if (!res.ok) return NextResponse.json({ error: "Waveform not found" }, { status: res.status }); + return NextResponse.json(await res.json(), { + headers: { "Cache-Control": "public, max-age=31536000, immutable" }, + }); + } catch { + return NextResponse.json({ error: "Failed to reach server" }, { status: 502 }); + } +} diff --git a/web/app/api/generations/route.ts b/web/app/api/generations/route.ts new file mode 100644 index 0000000..9e041b8 --- /dev/null +++ b/web/app/api/generations/route.ts @@ -0,0 +1,19 @@ +import { NextRequest, NextResponse } from "next/server"; + +const pythonUrl = () => process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000"; + +export async function GET(request: NextRequest) { + const { searchParams } = new URL(request.url); + const limit = searchParams.get("limit") ?? "50"; + const offset = searchParams.get("offset") ?? "0"; + + try { + const res = await fetch(`${pythonUrl()}/generations?limit=${limit}&offset=${offset}`, { + cache: "no-store", + }); + if (!res.ok) return NextResponse.json({ error: "Upstream error" }, { status: res.status }); + return NextResponse.json(await res.json()); + } catch { + return NextResponse.json({ error: "Failed to reach server" }, { status: 502 }); + } +} diff --git a/web/app/library/page.tsx b/web/app/library/page.tsx new file mode 100644 index 0000000..aacb627 --- /dev/null +++ b/web/app/library/page.tsx @@ -0,0 +1,162 @@ +"use client"; + +import { useCallback, useEffect, useState } from "react"; +import Header from "@/components/Header"; +import GenerationCard from "@/components/GenerationCard"; +import type { GenerationJob, GenerationsListResponse } from "@/lib/types/generation"; + +const PAGE_SIZE = 24; + +export default function LibraryPage() { + const [jobs, setJobs] = useState([]); + const [offset, setOffset] = useState(0); + const [hasMore, setHasMore] = useState(true); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + const fetchJobs = useCallback(async (currentOffset: number, replace: boolean) => { + setLoading(true); + setError(null); + try { + const res = await fetch( + `/api/generations?limit=${PAGE_SIZE}&offset=${currentOffset}`, + { cache: "no-store" } + ); + if (!res.ok) throw new Error(`Server returned ${res.status}`); + const data = (await res.json()) as GenerationsListResponse; + setJobs((prev) => (replace ? data.items : [...prev, ...data.items])); + setHasMore(data.items.length === PAGE_SIZE); + } catch (err) { + setError(err instanceof Error ? err.message : "Failed to load generations"); + } finally { + setLoading(false); + } + }, []); + + useEffect(() => { + fetchJobs(0, true); + }, [fetchJobs]); + + function handleDelete(id: string) { + setJobs((prev) => prev.filter((j) => j.id !== id)); + } + + function handleLoadMore() { + const next = offset + PAGE_SIZE; + setOffset(next); + fetchJobs(next, false); + } + + return ( +
+
+ +
+ {/* Page header */} +
+
+

+ Generation Library +

+

+ Every completed generation is saved here. +

+
+ + + New Generation + +
+ + {/* Error state */} + {error && ( +
+ {error} —{" "} + +
+ )} + + {/* Empty state */} + {!loading && jobs.length === 0 && !error && ( +
+

🎙

+

+ No generations yet +

+

+ Generate some audio and it will appear here automatically. +

+ + Go generate something + +
+ )} + + {/* Grid */} + {jobs.length > 0 && ( +
+ {jobs.map((job) => ( + + ))} +
+ )} + + {/* Load more */} + {hasMore && !loading && jobs.length > 0 && ( +
+ +
+ )} + + {/* Loading spinner */} + {loading && ( +
+ Loading… +
+ )} +
+
+ ); +} diff --git a/web/app/page.tsx b/web/app/page.tsx index 128824a..a04ca3a 100644 --- a/web/app/page.tsx +++ b/web/app/page.tsx @@ -171,8 +171,39 @@ const initialState: AppState = { serverConfig: null, }; +const STORAGE_KEY = "vibepod_form"; + +function loadSavedForm(): Partial> { + if (typeof window === "undefined") return {}; + try { + const raw = localStorage.getItem(STORAGE_KEY); + if (!raw) return {}; + return JSON.parse(raw) as Partial>; + } catch { + return {}; + } +} + export default function HomePage() { - const [state, dispatch] = useReducer(reducer, initialState); + const [state, dispatch] = useReducer(reducer, initialState, (base) => { + const saved = loadSavedForm(); + return { + ...base, + ...(saved.script !== undefined && { script: saved.script }), + ...(saved.speaker !== undefined && { speaker: saved.speaker }), + ...(typeof saved.cfgScale === "number" && { cfgScale: saved.cfgScale }), + }; + }); + + // Persist user-editable form fields across navigation. + useEffect(() => { + try { + localStorage.setItem( + STORAGE_KEY, + JSON.stringify({ script: state.script, speaker: state.speaker, cfgScale: state.cfgScale }) + ); + } catch {} + }, [state.script, state.speaker, state.cfgScale]); const wordCount = state.script.trim() === "" ? 0 : state.script.trim().split(/\s+/).length; diff --git a/web/components/GenerationCard.tsx b/web/components/GenerationCard.tsx new file mode 100644 index 0000000..0c91dab --- /dev/null +++ b/web/components/GenerationCard.tsx @@ -0,0 +1,180 @@ +"use client"; + +import { useEffect, useRef, useState } from "react"; +import type { GenerationJob, WaveformPeaks } from "@/lib/types/generation"; +import WaveformPreview from "./WaveformPreview"; + +interface GenerationCardProps { + job: GenerationJob; + onDelete: (id: string) => void; +} + +function formatDuration(secs: number | null): string { + if (secs === null) return "—"; + const m = Math.floor(secs / 60); + const s = Math.floor(secs % 60); + return m > 0 ? `${m}m ${s}s` : `${s}s`; +} + +function formatDate(iso: string): string { + return new Date(iso).toLocaleString(undefined, { + month: "short", + day: "numeric", + hour: "2-digit", + minute: "2-digit", + }); +} + +function truncate(text: string, max: number): string { + return text.length > max ? text.slice(0, max) + "…" : text; +} + +export default function GenerationCard({ job, onDelete }: GenerationCardProps) { + const [peaks, setPeaks] = useState(null); + const [isPlaying, setIsPlaying] = useState(false); + const [isDeleting, setIsDeleting] = useState(false); + const audioRef = useRef(null); + + useEffect(() => { + if (job.status !== "complete" || !job.waveform_path) return; + fetch(`/api/generations/${job.id}/waveform`) + .then((r) => r.json()) + .then((data: WaveformPeaks) => setPeaks(data)) + .catch(() => {}); + }, [job.id, job.status, job.waveform_path]); + + function handlePlayPause() { + if (!audioRef.current) { + audioRef.current = new Audio(`/api/generations/${job.id}/audio`); + audioRef.current.onended = () => setIsPlaying(false); + } + if (isPlaying) { + audioRef.current.pause(); + setIsPlaying(false); + } else { + audioRef.current.play().catch(() => setIsPlaying(false)); + setIsPlaying(true); + } + } + + async function handleDelete() { + if (!confirm("Delete this generation?")) return; + setIsDeleting(true); + try { + await fetch(`/api/generations/${job.id}`, { method: "DELETE" }); + onDelete(job.id); + } catch { + setIsDeleting(false); + } + } + + const isComplete = job.status === "complete"; + + const statusColors: Record = { + complete: "var(--success)", + generating: "var(--status-loading)", + error: "var(--error)", + cancelled: "var(--muted)", + }; + + return ( +
+ {/* Waveform or placeholder */} +
+ {peaks ? ( + + ) : ( +
+ + {job.status === "generating" ? "Generating…" : "No waveform"} + +
+ )} +
+ + {/* Script preview */} +

+ {truncate(job.script, 120)} +

+ + {/* Metadata row */} +
+ + {job.speaker} + + + {formatDuration(job.duration_secs)} + + + CFG {job.cfg_scale} + + + {job.status} + +
+ + {/* Date */} +

+ {formatDate(job.created_at)} +

+ + {/* Actions */} +
+ {isComplete && ( + <> + + + ↓ Download + + + )} + +
+
+ ); +} diff --git a/web/components/Header.tsx b/web/components/Header.tsx index 484d351..e55e167 100644 --- a/web/components/Header.tsx +++ b/web/components/Header.tsx @@ -1,6 +1,8 @@ "use client"; +import Link from "next/link"; import { useEffect, useRef, useState } from "react"; +import { usePathname } from "next/navigation"; type ServerStatus = "checking" | "downloading" | "loading" | "online" | "error" | "offline"; type Device = "cpu" | "cuda" | null; @@ -9,7 +11,13 @@ type Device = "cpu" | "cuda" | null; const FAST_INTERVAL_MS = 3000; // while checking / loading const SLOW_INTERVAL_MS = 30000; // once online +const NAV_LINKS = [ + { href: "/", label: "Generate" }, + { href: "/library", label: "Library" }, +]; + export default function Header() { + const pathname = usePathname(); const [status, setStatus] = useState("checking"); const [device, setDevice] = useState(null); const [message, setMessage] = useState(); @@ -123,6 +131,7 @@ export default function Header() { >
+
+ + {/* Nav links */} +
diff --git a/web/components/WaveformPreview.tsx b/web/components/WaveformPreview.tsx new file mode 100644 index 0000000..9e47c9d --- /dev/null +++ b/web/components/WaveformPreview.tsx @@ -0,0 +1,57 @@ +"use client"; + +import { useEffect, useRef } from "react"; +import type { WaveformPeaks } from "@/lib/types/generation"; + +interface WaveformPreviewProps { + peaks: WaveformPeaks; + color?: string; + height?: number; + className?: string; +} + +export default function WaveformPreview({ + peaks, + color = "#2dd4bf", + height = 48, + className = "", +}: WaveformPreviewProps) { + const canvasRef = useRef(null); + + useEffect(() => { + const canvas = canvasRef.current; + if (!canvas) return; + + const ctx = canvas.getContext("2d"); + if (!ctx) return; + + const { width } = canvas; + const midY = height / 2; + const { min, max } = peaks.data; + const len = peaks.length; + + ctx.clearRect(0, 0, width, height); + ctx.strokeStyle = color; + ctx.lineWidth = 1; + + for (let x = 0; x < width; x++) { + const peakIndex = Math.floor((x / width) * len); + const minY = midY - min[peakIndex] * midY; + const maxY = midY - max[peakIndex] * midY; + ctx.beginPath(); + ctx.moveTo(x + 0.5, Math.min(minY, maxY)); + ctx.lineTo(x + 0.5, Math.max(minY, maxY)); + ctx.stroke(); + } + }, [peaks, color, height]); + + return ( + + ); +} diff --git a/web/lib/types/generation.ts b/web/lib/types/generation.ts new file mode 100644 index 0000000..ca3483d --- /dev/null +++ b/web/lib/types/generation.ts @@ -0,0 +1,34 @@ +export type GenerationStatus = "generating" | "complete" | "error" | "cancelled"; + +export interface GenerationJob { + id: string; + created_at: string; + status: GenerationStatus; + script: string; + speaker: string; + cfg_scale: number; + inference_steps: number | null; + duration_secs: number | null; + sample_rate: number | null; + audio_path: string | null; + waveform_path: string | null; + error_message: string | null; +} + +export interface WaveformPeaks { + sampleRate: number; + durationSecs: number; + channels: number; + samplesPerPixel: number; + length: number; + data: { + min: number[]; + max: number[]; + }; +} + +export interface GenerationsListResponse { + items: GenerationJob[]; + limit: number; + offset: number; +}