mirror of
https://github.com/JezzWTF/vibepod.git
synced 2026-06-13 03:58:07 +00:00
Improve CPU Inference Stability: Adaptive Buffering & Chunk Accumulation (#11)
* Improve CPU Inference Stability: Implement Adaptive Buffering and Chunk Accumulation This change addresses audio stuttering issues when running on CPU-only hardware by: - Implementing server-side audio chunk accumulation to reduce SSE overhead. - Introducing device-aware default configurations for buffering and inference steps. - Exposing key performance parameters as environment variables. - Enabling the frontend to adaptively adjust its buffering thresholds based on the server's configuration. Changes: - Modified `server/vibevoice_server.py` to support accumulation and provide config via `/health`. - Updated `web/hooks/useStreamingGeneration.ts` to accept configurable buffering parameters. - Updated `web/app/page.tsx` to fetch and apply server-side configuration. Verified on CPU mode in the development environment. Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com> * Improve CPU Inference Stability: Implement Adaptive Buffering and Chunk Accumulation This change addresses audio stuttering issues when running on CPU-only hardware by: - Implementing server-side audio chunk accumulation to reduce SSE overhead. - Introducing device-aware default configurations for buffering and inference steps. - Exposing key performance parameters as environment variables. - Enabling the frontend to adaptively adjust its buffering thresholds based on the server's configuration. Changes: - Modified `server/vibevoice_server.py` to support accumulation and provide config via `/health`. - Updated `web/hooks/useStreamingGeneration.ts` to accept configurable buffering parameters. - Updated `web/app/page.tsx` to fetch and apply server-side configuration. Verified on CPU mode in the development environment. Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com> * Improve CPU Inference Stability: Adaptive Buffering UI & Logic This change enhances the initial CPU stability fix by: - Exposing adaptive buffering settings (Pre-buffer, Re-buffer Threshold, Resume Threshold) in a new "Advanced Buffering" UI section. - Managing buffering settings in the application state to allow for manual overrides. - Implementing robust re-initialization of buffering and inference defaults whenever the server's device (CPU/CUDA) changes. - Including the active device in the server's config object for reliable client-side detection. Verified with frontend screenshots and full build. Responds to PR feedback regarding actioning the adaptive logic. Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com> * Refine adaptive buffering: env helpers, threshold validation, a11y fixes - Extract _env_int/_env_float helpers in server to validate env-var config with graceful fallback instead of bare int/float casts - Fix inference_steps falsy-check (0 is valid) to use explicit None guard - Enforce rebufferThresholdSecs < resumeThresholdSecs in both the hook (with console.warn + clamp) and the GenerationControls UI (sliders block invalid states by auto-bumping or ignoring the drag) - Add type="button", aria-expanded, aria-controls, htmlFor, and input id attributes to GenerationControls for accessibility - Add .vscode/settings.json to .gitignore; sort package.json scripts --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
This commit is contained in:
@@ -3,9 +3,9 @@
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
const SAMPLE_RATE = 24_000;
|
||||
const PREBUFFER_SECS = 2.0;
|
||||
const REBUFFER_THRESHOLD_SECS = 0.4;
|
||||
const RESUME_THRESHOLD_SECS = 1.5;
|
||||
const DEFAULT_PREBUFFER_SECS = 2.0;
|
||||
const DEFAULT_REBUFFER_THRESHOLD_SECS = 0.4;
|
||||
const DEFAULT_RESUME_THRESHOLD_SECS = 1.5;
|
||||
|
||||
interface GenerateOptions {
|
||||
text: string;
|
||||
@@ -21,6 +21,12 @@ interface UseStreamingGenerationOptions {
|
||||
onSuccess: (audioUrl: string) => void;
|
||||
onCancel: () => void;
|
||||
onError: () => void;
|
||||
/** Seconds of audio to buffer before playback starts. */
|
||||
prebufferSecs?: number;
|
||||
/** Buffer lookahead (seconds) below which playback suspends to refill. */
|
||||
rebufferThresholdSecs?: number;
|
||||
/** Buffer lookahead (seconds) at or above which suspended playback resumes. Must be > rebufferThresholdSecs. */
|
||||
resumeThresholdSecs?: number;
|
||||
}
|
||||
|
||||
function mergeFloat32Arrays(chunks: Float32Array<ArrayBuffer>[]): Float32Array<ArrayBuffer> {
|
||||
@@ -77,7 +83,18 @@ export function useStreamingGeneration({
|
||||
onSuccess,
|
||||
onCancel,
|
||||
onError,
|
||||
prebufferSecs = DEFAULT_PREBUFFER_SECS,
|
||||
rebufferThresholdSecs: rawRebufferThresholdSecs = DEFAULT_REBUFFER_THRESHOLD_SECS,
|
||||
resumeThresholdSecs: rawResumeThresholdSecs = DEFAULT_RESUME_THRESHOLD_SECS,
|
||||
}: UseStreamingGenerationOptions) {
|
||||
let rebufferThresholdSecs = rawRebufferThresholdSecs;
|
||||
let resumeThresholdSecs = rawResumeThresholdSecs;
|
||||
if (resumeThresholdSecs <= rebufferThresholdSecs) {
|
||||
console.warn(
|
||||
`[useStreamingGeneration] resumeThresholdSecs (${resumeThresholdSecs}) must be greater than rebufferThresholdSecs (${rebufferThresholdSecs}). Clamping resumeThresholdSecs to ${rebufferThresholdSecs + 0.5}.`,
|
||||
);
|
||||
resumeThresholdSecs = rebufferThresholdSecs + 0.5;
|
||||
}
|
||||
const [isStreamPaused, setIsStreamPaused] = useState(false);
|
||||
const abortRef = useRef<AbortController | null>(null);
|
||||
const audioCtxRef = useRef<AudioContext | null>(null);
|
||||
@@ -144,7 +161,7 @@ export function useStreamingGeneration({
|
||||
|
||||
if (!hasStartedPlaybackRef.current) {
|
||||
const bufferedSecs = chunksRef.current.reduce((sum, c) => sum + c.length, 0) / SAMPLE_RATE;
|
||||
if (bufferedSecs >= PREBUFFER_SECS) {
|
||||
if (bufferedSecs >= prebufferSecs) {
|
||||
flushBufferedAudio();
|
||||
}
|
||||
return;
|
||||
@@ -154,18 +171,18 @@ export function useStreamingGeneration({
|
||||
if (isUserPausedRef.current) return;
|
||||
|
||||
const ahead = nextStartTimeRef.current - ctx.currentTime;
|
||||
if (ctx.state === "running" && ahead < REBUFFER_THRESHOLD_SECS) {
|
||||
if (ctx.state === "running" && ahead < rebufferThresholdSecs) {
|
||||
ctx.suspend().catch(() => {});
|
||||
isAutoBufferingRef.current = true;
|
||||
} else if (
|
||||
ctx.state === "suspended" &&
|
||||
isAutoBufferingRef.current &&
|
||||
ahead >= RESUME_THRESHOLD_SECS
|
||||
ahead >= resumeThresholdSecs
|
||||
) {
|
||||
ctx.resume().catch(() => {});
|
||||
isAutoBufferingRef.current = false;
|
||||
}
|
||||
}, [enqueue, flushBufferedAudio]);
|
||||
}, [enqueue, flushBufferedAudio, prebufferSecs, rebufferThresholdSecs, resumeThresholdSecs]);
|
||||
|
||||
const generate = useCallback(async (options: GenerateOptions) => {
|
||||
if (!options.text.trim()) return;
|
||||
|
||||
Reference in New Issue
Block a user