Improve CPU Inference Stability: Adaptive Buffering & Chunk Accumulation (#11)

* Improve CPU Inference Stability: Implement Adaptive Buffering and Chunk Accumulation This change addresses audio stuttering issues when running on CPU-only hardware by: - Implementing server-side audio chunk accumulation to reduce SSE overhead. - Introducing device-aware default configurations for buffering and inference steps. - Exposing key performance parameters as environment variables. - Enabling the frontend to adaptively adjust its buffering thresholds based on the server's configuration. Changes: - Modified `server/vibevoice_server.py` to support accumulation and provide config via `/health`. - Updated `web/hooks/useStreamingGeneration.ts` to accept configurable buffering parameters. - Updated `web/app/page.tsx` to fetch and apply server-side configuration. Verified on CPU mode in the development environment. Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com> * Improve CPU Inference Stability: Implement Adaptive Buffering and Chunk Accumulation This change addresses audio stuttering issues when running on CPU-only hardware by: - Implementing server-side audio chunk accumulation to reduce SSE overhead. - Introducing device-aware default configurations for buffering and inference steps. - Exposing key performance parameters as environment variables. - Enabling the frontend to adaptively adjust its buffering thresholds based on the server's configuration. Changes: - Modified `server/vibevoice_server.py` to support accumulation and provide config via `/health`. - Updated `web/hooks/useStreamingGeneration.ts` to accept configurable buffering parameters. - Updated `web/app/page.tsx` to fetch and apply server-side configuration. Verified on CPU mode in the development environment. Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com> * Improve CPU Inference Stability: Adaptive Buffering UI & Logic This change enhances the initial CPU stability fix by: - Exposing adaptive buffering settings (Pre-buffer, Re-buffer Threshold, Resume Threshold) in a new "Advanced Buffering" UI section. - Managing buffering settings in the application state to allow for manual overrides. - Implementing robust re-initialization of buffering and inference defaults whenever the server's device (CPU/CUDA) changes. - Including the active device in the server's config object for reliable client-side detection. Verified with frontend screenshots and full build. Responds to PR feedback regarding actioning the adaptive logic. Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com> * Refine adaptive buffering: env helpers, threshold validation, a11y fixes - Extract _env_int/_env_float helpers in server to validate env-var config with graceful fallback instead of bare int/float casts - Fix inference_steps falsy-check (0 is valid) to use explicit None guard - Enforce rebufferThresholdSecs < resumeThresholdSecs in both the hook (with console.warn + clamp) and the GenerationControls UI (sliders block invalid states by auto-bumping or ignoring the drag) - Add type="button", aria-expanded, aria-controls, htmlFor, and input id attributes to GenerationControls for accessibility - Add .vscode/settings.json to .gitignore; sort package.json scripts --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
2026-07-31 13:07:06 +00:00 · 2026-04-30 16:03:35 +01:00
parent 87185e6289
commit a39ec536fd
7 changed files with 339 additions and 49 deletions
@@ -15,11 +15,23 @@ export interface DownloadProgress {
  total: number;
 }

+export interface ServerConfig {
+  device: string;
+  chunk_accum: number;
+  prebuffer_secs: number;
+  rebuffer_threshold_secs: number;
+  resume_threshold_secs: number;
+  default_inference_steps: number;
+}
+
 interface AppState {
  script: string;
  speaker: string;
  cfgScale: number;
  inferenceSteps: number;
+  prebufferSecs: number;
+  rebufferThresholdSecs: number;
+  resumeThresholdSecs: number;
  isGenerating: boolean;
  genElapsed: number;
  genPct: number | null;
@@ -28,6 +40,7 @@ interface AppState {
  serverStatus: ServerStatus;
  downloadProgress: DownloadProgress | null;
  availableVoices: string[];
+  serverConfig: ServerConfig | null;
 }

 type AppAction =
@@ -35,6 +48,9 @@ type AppAction =
  | { type: "SET_SPEAKER"; payload: string }
  | { type: "SET_CFG_SCALE"; payload: number }
  | { type: "SET_INFERENCE_STEPS"; payload: number }
+  | { type: "SET_PREBUFFER_SECS"; payload: number }
+  | { type: "SET_REBUFFER_THRESHOLD"; payload: number }
+  | { type: "SET_RESUME_THRESHOLD"; payload: number }
  | { type: "START_GENERATION" }
  | { type: "GEN_PROGRESS"; elapsed: number; pct: number | null }
  | { type: "GENERATION_SUCCESS"; payload: string }
@@ -43,7 +59,12 @@ type AppAction =
  | { type: "ADD_LOG"; payload: string }
  | {
      type: "SET_SERVER_STATUS";
-      payload: { status: ServerStatus; progress?: DownloadProgress | null; voices?: string[] };
+      payload: {
+        status: ServerStatus;
+        progress?: DownloadProgress | null;
+        voices?: string[];
+        config?: ServerConfig | null;
+      };
    };

 function reducer(state: AppState, action: AppAction): AppState {
@@ -52,6 +73,9 @@ function reducer(state: AppState, action: AppAction): AppState {
    case "SET_SPEAKER":        return { ...state, speaker: action.payload };
    case "SET_CFG_SCALE":      return { ...state, cfgScale: action.payload };
    case "SET_INFERENCE_STEPS": return { ...state, inferenceSteps: action.payload };
+    case "SET_PREBUFFER_SECS": return { ...state, prebufferSecs: action.payload };
+    case "SET_REBUFFER_THRESHOLD": return { ...state, rebufferThresholdSecs: action.payload };
+    case "SET_RESUME_THRESHOLD": return { ...state, resumeThresholdSecs: action.payload };
    case "START_GENERATION":
      return { ...state, isGenerating: true, audioUrl: null, logs: [], genElapsed: 0, genPct: null };
    case "GEN_PROGRESS":
@@ -63,14 +87,40 @@ function reducer(state: AppState, action: AppAction): AppState {
      return { ...state, isGenerating: false, genElapsed: 0, genPct: null };
    case "ADD_LOG":
      return { ...state, logs: [...state.logs, action.payload] };
-    case "SET_SERVER_STATUS":
+    case "SET_SERVER_STATUS": {
+      const isNewConfig = !state.serverConfig && action.payload.config;
+      const deviceChanged = !!(state.serverConfig && action.payload.config && state.serverConfig.device !== action.payload.config.device);
+
+      const nextSteps = (isNewConfig || deviceChanged)
+          ? action.payload.config!.default_inference_steps
+          : state.inferenceSteps;
+
+      const nextPrebuffer = (isNewConfig || deviceChanged)
+          ? action.payload.config!.prebuffer_secs
+          : state.prebufferSecs;
+
+      const nextRebuffer = (isNewConfig || deviceChanged)
+          ? action.payload.config!.rebuffer_threshold_secs
+          : state.rebufferThresholdSecs;
+
+      const nextResume = (isNewConfig || deviceChanged)
+          ? action.payload.config!.resume_threshold_secs
+          : state.resumeThresholdSecs;
+
      return {
        ...state,
        serverStatus: action.payload.status,
        downloadProgress: action.payload.progress ?? null,
-        availableVoices:
-          action.payload.voices?.length ? action.payload.voices : state.availableVoices,
+        availableVoices: action.payload.voices?.length
+          ? action.payload.voices
+          : state.availableVoices,
+        serverConfig: action.payload.config ?? state.serverConfig,
+        inferenceSteps: nextSteps,
+        prebufferSecs: nextPrebuffer,
+        rebufferThresholdSecs: nextRebuffer,
+        resumeThresholdSecs: nextResume,
      };
+    }
    default: return state;
  }
 }
@@ -80,6 +130,9 @@ const initialState: AppState = {
  speaker: "carter",
  cfgScale: 1.5,
  inferenceSteps: 10,
+  prebufferSecs: 2.0,
+  rebufferThresholdSecs: 0.4,
+  resumeThresholdSecs: 1.5,
  isGenerating: false,
  genElapsed: 0,
  genPct: null,
@@ -88,6 +141,7 @@ const initialState: AppState = {
  serverStatus: "offline",
  downloadProgress: null,
  availableVoices: [],
+  serverConfig: null,
 };

 export default function HomePage() {
@@ -106,19 +160,16 @@ export default function HomePage() {
  const handleGenerationCancel = useCallback(() => dispatch({ type: "GENERATION_CANCELLED" }), []);
  const handleGenerationError = useCallback(() => dispatch({ type: "GENERATION_ERROR" }), []);

-  const {
-    generate,
-    pauseStream,
-    resumeStream,
-    stop,
-    isStreamPaused,
-  } = useStreamingGeneration({
+  const { generate, pauseStream, resumeStream, stop, isStreamPaused } = useStreamingGeneration({
    onLog: addLog,
    onStart: handleGenerationStart,
    onProgress: handleGenerationProgress,
    onSuccess: handleGenerationSuccess,
    onCancel: handleGenerationCancel,
    onError: handleGenerationError,
+    prebufferSecs: state.prebufferSecs,
+    rebufferThresholdSecs: state.rebufferThresholdSecs,
+    resumeThresholdSecs: state.resumeThresholdSecs,
  });

  // Server health polling — fast while not ready, slow when online
@@ -131,21 +182,32 @@ export default function HomePage() {
      let nextStatus: ServerStatus = "offline";
      let nextProgress: DownloadProgress | null = null;
      let nextVoices: string[] = [];
+      let nextConfig: ServerConfig | null = null;
      try {
        const res = await fetch("/api/health", { cache: "no-store" });
-        const data = await res.json() as {
+        const data = (await res.json()) as {
          status: ServerStatus;
          progress?: DownloadProgress | null;
          voices?: string[];
+          config?: ServerConfig;
        };
        nextStatus = data.status ?? "offline";
        nextProgress = data.progress ?? null;
        nextVoices = data.voices ?? [];
+        nextConfig = data.config ?? null;
      } catch {
        nextStatus = "offline";
      }
      if (!cancelled) {
-        dispatch({ type: "SET_SERVER_STATUS", payload: { status: nextStatus, progress: nextProgress, voices: nextVoices } });
+        dispatch({
+          type: "SET_SERVER_STATUS",
+          payload: {
+            status: nextStatus,
+            progress: nextProgress,
+            voices: nextVoices,
+            config: nextConfig,
+          },
+        });
        timeoutId = setTimeout(poll, nextStatus === "online" ? 15_000 : 2_000);
      }
    }
@@ -199,6 +261,12 @@ export default function HomePage() {
              onCfgScaleChange={(v) => dispatch({ type: "SET_CFG_SCALE", payload: v })}
              inferenceSteps={state.inferenceSteps}
              onInferenceStepsChange={(v) => dispatch({ type: "SET_INFERENCE_STEPS", payload: v })}
+            prebufferSecs={state.prebufferSecs}
+            onPrebufferSecsChange={(v) => dispatch({ type: "SET_PREBUFFER_SECS", payload: v })}
+            rebufferThresholdSecs={state.rebufferThresholdSecs}
+            onRebufferThresholdChange={(v) => dispatch({ type: "SET_REBUFFER_THRESHOLD", payload: v })}
+            resumeThresholdSecs={state.resumeThresholdSecs}
+            onResumeThresholdChange={(v) => dispatch({ type: "SET_RESUME_THRESHOLD", payload: v })}
              onGenerate={handleGenerate}
              onStop={stop}
              onPauseStream={pauseStream}