Files
vibepod/web/components/GenerationControls.tsx
T
LyAhn a39ec536fd Improve CPU Inference Stability: Adaptive Buffering & Chunk Accumulation (#11)
* Improve CPU Inference Stability: Implement Adaptive Buffering and Chunk Accumulation

This change addresses audio stuttering issues when running on CPU-only hardware by:
- Implementing server-side audio chunk accumulation to reduce SSE overhead.
- Introducing device-aware default configurations for buffering and inference steps.
- Exposing key performance parameters as environment variables.
- Enabling the frontend to adaptively adjust its buffering thresholds based on the server's configuration.

Changes:
- Modified `server/vibevoice_server.py` to support accumulation and provide config via `/health`.
- Updated `web/hooks/useStreamingGeneration.ts` to accept configurable buffering parameters.
- Updated `web/app/page.tsx` to fetch and apply server-side configuration.

Verified on CPU mode in the development environment.

Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com>

* Improve CPU Inference Stability: Implement Adaptive Buffering and Chunk Accumulation

This change addresses audio stuttering issues when running on CPU-only hardware by:
- Implementing server-side audio chunk accumulation to reduce SSE overhead.
- Introducing device-aware default configurations for buffering and inference steps.
- Exposing key performance parameters as environment variables.
- Enabling the frontend to adaptively adjust its buffering thresholds based on the server's configuration.

Changes:
- Modified `server/vibevoice_server.py` to support accumulation and provide config via `/health`.
- Updated `web/hooks/useStreamingGeneration.ts` to accept configurable buffering parameters.
- Updated `web/app/page.tsx` to fetch and apply server-side configuration.

Verified on CPU mode in the development environment.

Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com>

* Improve CPU Inference Stability: Adaptive Buffering UI & Logic

This change enhances the initial CPU stability fix by:
- Exposing adaptive buffering settings (Pre-buffer, Re-buffer Threshold, Resume Threshold) in a new "Advanced Buffering" UI section.
- Managing buffering settings in the application state to allow for manual overrides.
- Implementing robust re-initialization of buffering and inference defaults whenever the server's device (CPU/CUDA) changes.
- Including the active device in the server's config object for reliable client-side detection.

Verified with frontend screenshots and full build. Responds to PR feedback regarding actioning the adaptive logic.

Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com>

* Refine adaptive buffering: env helpers, threshold validation, a11y fixes

- Extract _env_int/_env_float helpers in server to validate env-var config
  with graceful fallback instead of bare int/float casts
- Fix inference_steps falsy-check (0 is valid) to use explicit None guard
- Enforce rebufferThresholdSecs < resumeThresholdSecs in both the hook
  (with console.warn + clamp) and the GenerationControls UI (sliders block
  invalid states by auto-bumping or ignoring the drag)
- Add type="button", aria-expanded, aria-controls, htmlFor, and input id
  attributes to GenerationControls for accessibility
- Add .vscode/settings.json to .gitignore; sort package.json scripts

---------

Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
2026-04-30 16:03:35 +01:00

433 lines
15 KiB
TypeScript

"use client";
import { useState } from "react";
import type { ServerStatus, DownloadProgress } from "@/app/page";
const FALLBACK_VOICES = ["carter", "davis", "emma", "frank", "grace", "mike"];
interface GenerationControlsProps {
speaker: string;
availableVoices: string[];
onSpeakerChange: (v: string) => void;
cfgScale: number;
onCfgScaleChange: (v: number) => void;
inferenceSteps: number;
onInferenceStepsChange: (v: number) => void;
prebufferSecs: number;
onPrebufferSecsChange: (v: number) => void;
rebufferThresholdSecs: number;
onRebufferThresholdChange: (v: number) => void;
resumeThresholdSecs: number;
onResumeThresholdChange: (v: number) => void;
onGenerate: () => void;
onStop: () => void;
onPauseStream: () => void;
onResumeStream: () => void;
isStreamPaused: boolean;
isGenerating: boolean;
genElapsed: number;
genPct: number | null;
wordCount: number;
serverStatus: ServerStatus;
downloadProgress: DownloadProgress | null;
}
const STATUS_CONFIG: Record<
Exclude<ServerStatus, "online">,
{ color: string; label: (p: DownloadProgress | null) => string }
> = {
offline: { color: "var(--error)", label: () => "Server offline — waiting for connection..." },
downloading: { color: "#60a5fa", label: (p) => p && p.total > 0 ? `Downloading model... (${p.done} / ${p.total} files)` : "Downloading model (~1 GB)..." },
loading: { color: "#fbbf24", label: () => "Loading model into memory..." },
error: { color: "var(--error)", label: () => "Server error — check the terminal for details." },
};
function SpinnerIcon() {
return (
<svg className="animate-spin w-4 h-4" viewBox="0 0 24 24" fill="none">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
</svg>
);
}
export default function GenerationControls({
speaker,
availableVoices,
onSpeakerChange,
cfgScale,
onCfgScaleChange,
inferenceSteps,
onInferenceStepsChange,
prebufferSecs,
onPrebufferSecsChange,
rebufferThresholdSecs,
onRebufferThresholdChange,
resumeThresholdSecs,
onResumeThresholdChange,
onGenerate,
onStop,
onPauseStream,
onResumeStream,
isStreamPaused,
isGenerating,
genElapsed,
genPct,
wordCount,
serverStatus,
downloadProgress,
}: GenerationControlsProps) {
const [showAdvanced, setShowAdvanced] = useState(false);
const voices = availableVoices.length > 0 ? availableVoices : FALLBACK_VOICES;
const serverReady = serverStatus === "online";
const buttonDisabled = isGenerating || wordCount === 0 || !serverReady;
const downloadPct =
downloadProgress && downloadProgress.total > 0
? Math.round((downloadProgress.done / downloadProgress.total) * 100)
: 0;
return (
<div
className="rounded-xl border p-5 flex flex-col gap-5"
style={{ background: "var(--card-bg)", borderColor: "var(--border)" }}
>
<h2
className="text-sm font-semibold uppercase tracking-wider"
style={{ color: "var(--accent-teal)" }}
>
Generation Settings
</h2>
{/* Voice selector */}
<div className="flex flex-col gap-2">
<label className="text-sm font-medium" style={{ color: "var(--foreground)" }}>
Voice
</label>
<select
value={speaker}
onChange={(e) => onSpeakerChange(e.target.value)}
disabled={!serverReady}
className="w-full px-3 py-2 rounded-lg text-sm font-medium appearance-none cursor-pointer disabled:cursor-not-allowed"
style={{
background: "var(--background)",
border: "1px solid var(--border)",
color: serverReady ? "var(--foreground)" : "var(--muted)",
}}
>
{voices.map((v) => (
<option key={v} value={v}>
{v.charAt(0).toUpperCase() + v.slice(1)}
</option>
))}
</select>
</div>
{/* CFG Scale slider */}
<div className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<label className="text-sm font-medium" style={{ color: "var(--foreground)" }}>
Voice Expressiveness
</label>
<span
className="text-sm font-mono px-2 py-0.5 rounded"
style={{ background: "var(--background)", color: "var(--accent-teal)" }}
>
{cfgScale.toFixed(1)}
</span>
</div>
<input
type="range"
min={0.5}
max={4.0}
step={0.1}
value={cfgScale}
onChange={(e) => onCfgScaleChange(parseFloat(e.target.value))}
className="w-full"
/>
<div className="flex items-center justify-between text-xs" style={{ color: "var(--muted)" }}>
<span>Flat (0.5)</span>
<span>CFG Scale</span>
<span>Expressive (4.0)</span>
</div>
</div>
{/* Inference Steps slider */}
<div className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<label className="text-sm font-medium" style={{ color: "var(--foreground)" }}>
Quality vs Speed
</label>
<span
className="text-sm font-mono px-2 py-0.5 rounded"
style={{ background: "var(--background)", color: "var(--accent-violet)" }}
>
{inferenceSteps}
</span>
</div>
<input
type="range"
min={5}
max={20}
step={1}
value={inferenceSteps}
onChange={(e) => onInferenceStepsChange(parseInt(e.target.value, 10))}
className="w-full"
style={{ "--thumb-color": "var(--accent-violet)" } as React.CSSProperties}
/>
<div className="flex items-center justify-between text-xs" style={{ color: "var(--muted)" }}>
<span>Faster (5)</span>
<span>Diffusion Steps</span>
<span>Better (20)</span>
</div>
</div>
{/* Advanced Buffering toggle */}
<div className="pt-2">
<button
type="button"
onClick={() => setShowAdvanced(!showAdvanced)}
aria-expanded={showAdvanced}
aria-controls="advanced-buffering-panel"
className="flex items-center gap-2 text-xs font-semibold uppercase tracking-wider cursor-pointer transition-colors"
style={{ color: showAdvanced ? "var(--accent-teal)" : "var(--muted)" }}
>
<svg
className={`w-3 h-3 transition-transform ${showAdvanced ? "rotate-90" : ""}`}
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="3"
>
<polyline points="9 18 15 12 9 6" />
</svg>
Advanced Buffering
</button>
</div>
{showAdvanced && (
<div id="advanced-buffering-panel" className="flex flex-col gap-4 pl-2 border-l" style={{ borderColor: "var(--border)" }}>
{/* Pre-buffer */}
<div className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<label className="text-xs font-medium" style={{ color: "var(--foreground)" }}>
Initial Pre-buffer
</label>
<span className="text-xs font-mono" style={{ color: "var(--accent-teal)" }}>
{prebufferSecs.toFixed(1)}s
</span>
</div>
<input
type="range"
min={0.5}
max={10.0}
step={0.5}
value={prebufferSecs}
onChange={(e) => onPrebufferSecsChange(parseFloat(e.target.value))}
className="w-full h-1"
/>
</div>
{/* Re-buffer threshold */}
<div className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<label htmlFor="rebuffer-threshold" className="text-xs font-medium" style={{ color: "var(--foreground)" }}>
Re-buffer Threshold
</label>
<span className="text-xs font-mono" style={{ color: "var(--accent-teal)" }}>
{rebufferThresholdSecs.toFixed(1)}s
</span>
</div>
<input
id="rebuffer-threshold"
type="range"
min={0.1}
max={3.0}
step={0.1}
value={rebufferThresholdSecs}
onChange={(e) => {
const next = parseFloat(e.target.value);
onRebufferThresholdChange(next);
if (resumeThresholdSecs <= next) {
onResumeThresholdChange(parseFloat((next + 0.5).toFixed(1)));
}
}}
className="w-full h-1"
/>
</div>
{/* Resume threshold */}
<div className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<label htmlFor="resume-threshold" className="text-xs font-medium" style={{ color: "var(--foreground)" }}>
Resume Threshold
</label>
<span className="text-xs font-mono" style={{ color: "var(--accent-teal)" }}>
{resumeThresholdSecs.toFixed(1)}s
</span>
</div>
<input
id="resume-threshold"
type="range"
min={0.5}
max={5.0}
step={0.1}
value={resumeThresholdSecs}
onChange={(e) => {
const next = parseFloat(e.target.value);
if (next <= rebufferThresholdSecs) return;
onResumeThresholdChange(next);
}}
className="w-full h-1"
/>
</div>
</div>
)}
{/* Server status banner */}
{!serverReady && (
<div
className="flex flex-col gap-2 px-3 py-3 rounded-lg text-sm"
style={{ background: "var(--background)", border: "1px solid var(--border)" }}
>
<div className="flex items-center gap-2">
<span
className={`w-2 h-2 rounded-full shrink-0 ${serverStatus === "offline" || serverStatus === "error" ? "" : "animate-pulse"}`}
style={{ background: STATUS_CONFIG[serverStatus].color }}
/>
<span style={{ color: STATUS_CONFIG[serverStatus].color }}>
{STATUS_CONFIG[serverStatus].label(downloadProgress)}
</span>
</div>
{serverStatus === "downloading" && (
<div className="w-full rounded-full h-1.5 overflow-hidden" style={{ background: "var(--border)" }}>
<div
className="h-1.5 rounded-full transition-all duration-500"
style={{
width: `${downloadPct}%`,
background: "linear-gradient(90deg, #60a5fa, var(--accent-teal))",
minWidth: downloadPct > 0 ? "4px" : "0",
}}
/>
</div>
)}
{serverStatus === "loading" && (
<div className="w-full rounded-full h-1.5 overflow-hidden" style={{ background: "var(--border)" }}>
<div
className="h-1.5 rounded-full animate-pulse"
style={{ width: "60%", background: "linear-gradient(90deg, #fbbf24, var(--accent-teal))" }}
/>
</div>
)}
</div>
)}
{/* Generation progress bar */}
{isGenerating && (
<div className="flex flex-col gap-1.5">
<div className="flex items-center justify-between text-xs" style={{ color: "var(--muted)" }}>
<span>{genElapsed}s elapsed</span>
<span>{genPct !== null ? `${genPct}%` : "starting..."}</span>
</div>
<div className="w-full rounded-full h-1.5 overflow-hidden" style={{ background: "var(--border)" }}>
<div
className="h-1.5 rounded-full transition-all duration-500"
style={{
width: genPct !== null ? `${genPct}%` : "0%",
background: "linear-gradient(90deg, var(--accent-teal), var(--accent-violet))",
minWidth: genPct !== null && genPct > 0 ? "4px" : "0",
}}
/>
</div>
</div>
)}
{/* Generate / Stop buttons */}
<div className="flex gap-2">
<button
onClick={onGenerate}
disabled={buttonDisabled}
className="flex-1 py-3 rounded-xl font-semibold text-sm transition-all cursor-pointer disabled:cursor-not-allowed flex items-center justify-center gap-2"
style={
buttonDisabled
? { background: "var(--border)", color: "var(--muted)" }
: {
background: "linear-gradient(135deg, var(--accent-teal-dim), var(--accent-violet-dim))",
color: "#fff",
boxShadow: "0 4px 15px rgba(45, 212, 191, 0.2)",
}
}
>
{isGenerating ? (
<>
<SpinnerIcon />
Generating...
</>
) : !serverReady ? (
<>
<SpinnerIcon />
{serverStatus === "downloading" ? "Downloading model..." : "Waiting for server..."}
</>
) : (
<>
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
<polygon points="5 3 19 12 5 21 5 3" />
</svg>
Generate Audio
</>
)}
</button>
{isGenerating && (
<>
<button
onClick={isStreamPaused ? onResumeStream : onPauseStream}
className="px-4 py-3 rounded-xl font-semibold text-sm transition-all cursor-pointer flex items-center justify-center gap-1.5"
style={{
background: "var(--background)",
border: `1px solid ${isStreamPaused ? "var(--accent-teal)" : "#fbbf24"}`,
color: isStreamPaused ? "var(--accent-teal)" : "#fbbf24",
}}
>
{isStreamPaused ? (
<>
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="currentColor">
<polygon points="5 3 19 12 5 21 5 3" />
</svg>
Resume
</>
) : (
<>
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="currentColor">
<rect x="6" y="4" width="4" height="16" />
<rect x="14" y="4" width="4" height="16" />
</svg>
Pause
</>
)}
</button>
<button
onClick={onStop}
className="px-4 py-3 rounded-xl font-semibold text-sm transition-all cursor-pointer flex items-center justify-center gap-1.5"
style={{
background: "var(--background)",
border: "1px solid var(--error)",
color: "var(--error)",
}}
>
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="currentColor">
<rect x="4" y="4" width="16" height="16" rx="2" />
</svg>
Stop
</button>
</>
)}
</div>
</div>
);
}