feat: add studio roadmap and streaming cleanup

This commit is contained in:
2026-04-28 00:09:15 +01:00
parent 11ffc7df7c
commit 34ec879cdb
45 changed files with 5899 additions and 2659 deletions
+48
View File
@@ -0,0 +1,48 @@
import { NextRequest, NextResponse } from "next/server";
export async function POST(request: NextRequest) {
const pythonServerUrl = process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000";
try {
const body = await request.json() as {
text: string;
speaker?: string;
cfg_scale?: number;
inference_steps?: number;
};
if (!body.text?.trim()) {
return NextResponse.json({ error: "Missing or empty text field" }, { status: 400 });
}
const upstream = await fetch(`${pythonServerUrl}/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
text: body.text.trim(),
speaker: body.speaker ?? "carter",
cfg_scale: body.cfg_scale ?? 1.5,
inference_steps: body.inference_steps ?? 10,
}),
});
if (!upstream.ok) {
const text = await upstream.text().catch(() => "Unknown error");
return NextResponse.json({ error: text }, { status: upstream.status });
}
// Proxy the SSE stream through to the browser
return new NextResponse(upstream.body, {
status: 200,
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
});
} catch (err) {
const message = err instanceof Error ? err.message : "Failed to connect to VibeVoice server";
return NextResponse.json({ error: message }, { status: 502 });
}
}
+40
View File
@@ -0,0 +1,40 @@
import { NextResponse } from "next/server";
export async function GET() {
const pythonServerUrl =
process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000";
try {
const res = await fetch(`${pythonServerUrl}/health`, {
method: "GET",
signal: AbortSignal.timeout(4000),
// Don't cache health checks
cache: "no-store",
});
if (res.ok) {
const data = await res.json().catch(() => ({}));
// Pass through the exact status the Python server reports:
// "online" | "loading" | "error"
const status: string = data.status ?? "online";
return NextResponse.json(
{
status,
message: data.message,
progress: data.progress ?? null,
voices: data.voices ?? [],
},
{ headers: { "Cache-Control": "no-store" } }
);
}
return NextResponse.json(
{ status: "offline" },
{ headers: { "Cache-Control": "no-store" } }
);
} catch {
return NextResponse.json(
{ status: "offline" },
{ headers: { "Cache-Control": "no-store" } }
);
}
}
Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

+87
View File
@@ -0,0 +1,87 @@
@import "tailwindcss";
:root {
--background: #0d1117;
--foreground: #e2e8f0;
--card-bg: #161b22;
--border: #21262d;
--accent-teal: #2dd4bf;
--accent-violet: #a78bfa;
--accent-teal-dim: #0d9488;
--accent-violet-dim: #7c3aed;
--muted: #64748b;
--success: #22c55e;
--error: #ef4444;
--font-sans: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
--font-mono: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, "Liberation Mono", monospace;
}
@theme inline {
--color-background: var(--background);
--color-foreground: var(--foreground);
--font-sans: var(--font-sans);
--font-mono: var(--font-mono);
}
body {
background: var(--background);
color: var(--foreground);
font-family: var(--font-sans);
min-height: 100vh;
}
/* Custom scrollbar */
::-webkit-scrollbar {
width: 6px;
height: 6px;
}
::-webkit-scrollbar-track {
background: var(--card-bg);
}
::-webkit-scrollbar-thumb {
background: var(--border);
border-radius: 3px;
}
::-webkit-scrollbar-thumb:hover {
background: var(--muted);
}
/* Range input styling */
input[type="range"] {
-webkit-appearance: none;
appearance: none;
background: transparent;
cursor: pointer;
}
input[type="range"]::-webkit-slider-runnable-track {
background: var(--border);
height: 4px;
border-radius: 2px;
}
input[type="range"]::-webkit-slider-thumb {
-webkit-appearance: none;
appearance: none;
width: 16px;
height: 16px;
border-radius: 50%;
background: var(--accent-teal);
margin-top: -6px;
box-shadow: 0 0 6px rgba(45, 212, 191, 0.4);
transition: box-shadow 0.15s ease;
}
input[type="range"]:hover::-webkit-slider-thumb {
box-shadow: 0 0 10px rgba(45, 212, 191, 0.7);
}
input[type="range"]::-moz-range-track {
background: var(--border);
height: 4px;
border-radius: 2px;
}
input[type="range"]::-moz-range-thumb {
width: 16px;
height: 16px;
border-radius: 50%;
background: var(--accent-teal);
border: none;
box-shadow: 0 0 6px rgba(45, 212, 191, 0.4);
}
+21
View File
@@ -0,0 +1,21 @@
import type { Metadata } from "next";
import "./globals.css";
export const metadata: Metadata = {
title: "VibePod — TTS Podcast Generator",
description: "Generate podcast audio using Microsoft VibeVoice 0.5B",
};
export default function RootLayout({
children,
}: Readonly<{
children: React.ReactNode;
}>) {
return (
<html lang="en">
<body style={{ background: "var(--background)", color: "var(--foreground)" }}>
{children}
</body>
</html>
);
}
+221
View File
@@ -0,0 +1,221 @@
"use client";
import { useReducer, useCallback, useEffect } from "react";
import Header from "@/components/Header";
import TextInputPanel from "@/components/TextInputPanel";
import GenerationControls from "@/components/GenerationControls";
import AudioPlayer from "@/components/AudioPlayer";
import StatusLog from "@/components/StatusLog";
import { useStreamingGeneration } from "@/hooks/useStreamingGeneration";
export type ServerStatus = "offline" | "downloading" | "loading" | "online" | "error";
export interface DownloadProgress {
done: number;
total: number;
}
interface AppState {
script: string;
speaker: string;
cfgScale: number;
inferenceSteps: number;
isGenerating: boolean;
genElapsed: number;
genPct: number | null;
audioUrl: string | null;
logs: string[];
serverStatus: ServerStatus;
downloadProgress: DownloadProgress | null;
availableVoices: string[];
}
type AppAction =
| { type: "SET_SCRIPT"; payload: string }
| { type: "SET_SPEAKER"; payload: string }
| { type: "SET_CFG_SCALE"; payload: number }
| { type: "SET_INFERENCE_STEPS"; payload: number }
| { type: "START_GENERATION" }
| { type: "GEN_PROGRESS"; elapsed: number; pct: number | null }
| { type: "GENERATION_SUCCESS"; payload: string }
| { type: "GENERATION_CANCELLED" }
| { type: "GENERATION_ERROR" }
| { type: "ADD_LOG"; payload: string }
| {
type: "SET_SERVER_STATUS";
payload: { status: ServerStatus; progress?: DownloadProgress | null; voices?: string[] };
};
function reducer(state: AppState, action: AppAction): AppState {
switch (action.type) {
case "SET_SCRIPT": return { ...state, script: action.payload };
case "SET_SPEAKER": return { ...state, speaker: action.payload };
case "SET_CFG_SCALE": return { ...state, cfgScale: action.payload };
case "SET_INFERENCE_STEPS": return { ...state, inferenceSteps: action.payload };
case "START_GENERATION":
return { ...state, isGenerating: true, audioUrl: null, logs: [], genElapsed: 0, genPct: null };
case "GEN_PROGRESS":
return { ...state, genElapsed: action.elapsed, genPct: action.pct };
case "GENERATION_SUCCESS":
return { ...state, isGenerating: false, genElapsed: 0, genPct: null, audioUrl: action.payload };
case "GENERATION_CANCELLED":
case "GENERATION_ERROR":
return { ...state, isGenerating: false, genElapsed: 0, genPct: null };
case "ADD_LOG":
return { ...state, logs: [...state.logs, action.payload] };
case "SET_SERVER_STATUS":
return {
...state,
serverStatus: action.payload.status,
downloadProgress: action.payload.progress ?? null,
availableVoices:
action.payload.voices?.length ? action.payload.voices : state.availableVoices,
};
default: return state;
}
}
const initialState: AppState = {
script: "",
speaker: "carter",
cfgScale: 1.5,
inferenceSteps: 10,
isGenerating: false,
genElapsed: 0,
genPct: null,
audioUrl: null,
logs: [],
serverStatus: "offline",
downloadProgress: null,
availableVoices: [],
};
export default function HomePage() {
const [state, dispatch] = useReducer(reducer, initialState);
const wordCount = state.script.trim() === "" ? 0 : state.script.trim().split(/\s+/).length;
const addLog = useCallback((msg: string) => dispatch({ type: "ADD_LOG", payload: msg }), []);
const handleGenerationStart = useCallback(() => dispatch({ type: "START_GENERATION" }), []);
const handleGenerationProgress = useCallback((elapsed: number, pct: number | null) => {
dispatch({ type: "GEN_PROGRESS", elapsed, pct });
}, []);
const handleGenerationSuccess = useCallback((audioUrl: string) => {
dispatch({ type: "GENERATION_SUCCESS", payload: audioUrl });
}, []);
const handleGenerationCancel = useCallback(() => dispatch({ type: "GENERATION_CANCELLED" }), []);
const handleGenerationError = useCallback(() => dispatch({ type: "GENERATION_ERROR" }), []);
const {
generate,
pauseStream,
resumeStream,
stop,
isStreamPaused,
} = useStreamingGeneration({
onLog: addLog,
onStart: handleGenerationStart,
onProgress: handleGenerationProgress,
onSuccess: handleGenerationSuccess,
onCancel: handleGenerationCancel,
onError: handleGenerationError,
});
// Server health polling — fast while not ready, slow when online
useEffect(() => {
let timeoutId: ReturnType<typeof setTimeout>;
let cancelled = false;
async function poll() {
if (cancelled) return;
let nextStatus: ServerStatus = "offline";
let nextProgress: DownloadProgress | null = null;
let nextVoices: string[] = [];
try {
const res = await fetch("/api/health", { cache: "no-store" });
const data = await res.json() as {
status: ServerStatus;
progress?: DownloadProgress | null;
voices?: string[];
};
nextStatus = data.status ?? "offline";
nextProgress = data.progress ?? null;
nextVoices = data.voices ?? [];
} catch {
nextStatus = "offline";
}
if (!cancelled) {
dispatch({ type: "SET_SERVER_STATUS", payload: { status: nextStatus, progress: nextProgress, voices: nextVoices } });
timeoutId = setTimeout(poll, nextStatus === "online" ? 15_000 : 2_000);
}
}
poll();
return () => { cancelled = true; clearTimeout(timeoutId); };
}, []);
const handleGenerate = useCallback(async () => {
if (!state.script.trim() || state.isGenerating) return;
addLog(`${wordCount} words queued`);
await generate({
text: state.script,
speaker: state.speaker,
cfgScale: state.cfgScale,
inferenceSteps: state.inferenceSteps,
});
}, [
addLog,
generate,
state.cfgScale,
state.inferenceSteps,
state.isGenerating,
state.script,
state.speaker,
wordCount,
]);
return (
<div className="min-h-screen flex flex-col" style={{ background: "var(--background)" }}>
<Header />
<main className="flex-1 container mx-auto px-4 py-6 max-w-6xl">
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
{/* Left: script + audio player */}
<div className="lg:col-span-2 flex flex-col gap-6">
<TextInputPanel
value={state.script}
onChange={(text) => dispatch({ type: "SET_SCRIPT", payload: text })}
/>
{state.audioUrl && <AudioPlayer audioUrl={state.audioUrl} />}
</div>
{/* Right: controls + log */}
<div className="flex flex-col gap-6">
<GenerationControls
speaker={state.speaker}
availableVoices={state.availableVoices}
onSpeakerChange={(v) => dispatch({ type: "SET_SPEAKER", payload: v })}
cfgScale={state.cfgScale}
onCfgScaleChange={(v) => dispatch({ type: "SET_CFG_SCALE", payload: v })}
inferenceSteps={state.inferenceSteps}
onInferenceStepsChange={(v) => dispatch({ type: "SET_INFERENCE_STEPS", payload: v })}
onGenerate={handleGenerate}
onStop={stop}
onPauseStream={pauseStream}
onResumeStream={resumeStream}
isStreamPaused={isStreamPaused}
isGenerating={state.isGenerating}
genElapsed={state.genElapsed}
genPct={state.genPct}
wordCount={wordCount}
serverStatus={state.serverStatus}
downloadProgress={state.downloadProgress}
/>
<StatusLog messages={state.logs} />
</div>
</div>
</main>
</div>
);
}