perf: improve streaming generation pipeline

Add CUDA inference hot-path optimizations, safer attention fallback handling, and generation profiling hooks. Improve SSE streaming, browser buffering telemetry, and playback recovery while preserving default audio quality settings.
This commit is contained in:
2026-04-30 18:54:14 +01:00
parent a39ec536fd
commit 75b84b211b
9 changed files with 459 additions and 48 deletions
+6 -1
View File
@@ -1,5 +1,8 @@
import { NextRequest, NextResponse } from "next/server";
export const dynamic = "force-dynamic";
export const runtime = "nodejs";
export async function POST(request: NextRequest) {
const pythonServerUrl = process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000";
@@ -24,6 +27,7 @@ export async function POST(request: NextRequest) {
cfg_scale: body.cfg_scale ?? 1.5,
inference_steps: body.inference_steps ?? 10,
}),
signal: request.signal,
});
if (!upstream.ok) {
@@ -36,8 +40,9 @@ export async function POST(request: NextRequest) {
status: 200,
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"Cache-Control": "no-cache, no-transform",
"Connection": "keep-alive",
"X-Content-Type-Options": "nosniff",
"X-Accel-Buffering": "no",
},
});
+1
View File
@@ -27,6 +27,7 @@ export async function GET() {
message: data.message,
progress: data.progress ?? null,
voices: data.voices ?? [],
config: data.config ?? null,
},
COMMON_OPTIONS
);
+3 -3
View File
@@ -130,9 +130,9 @@ const initialState: AppState = {
speaker: "carter",
cfgScale: 1.5,
inferenceSteps: 10,
prebufferSecs: 2.0,
rebufferThresholdSecs: 0.4,
resumeThresholdSecs: 1.5,
prebufferSecs: 5.0,
rebufferThresholdSecs: 1.0,
resumeThresholdSecs: 3.0,
isGenerating: false,
genElapsed: 0,
genPct: null,
+61 -14
View File
@@ -3,9 +3,10 @@
import { useCallback, useEffect, useRef, useState } from "react";
const SAMPLE_RATE = 24_000;
const DEFAULT_PREBUFFER_SECS = 2.0;
const DEFAULT_REBUFFER_THRESHOLD_SECS = 0.4;
const DEFAULT_RESUME_THRESHOLD_SECS = 1.5;
const DEFAULT_PREBUFFER_SECS = 5.0;
const DEFAULT_REBUFFER_THRESHOLD_SECS = 1.0;
const DEFAULT_RESUME_THRESHOLD_SECS = 3.0;
const MAX_ADAPTIVE_RESUME_SECS = 18.0;
interface GenerateOptions {
text: string;
@@ -104,6 +105,10 @@ export function useStreamingGeneration({
const isAutoBufferingRef = useRef(false);
const isUserPausedRef = useRef(false);
const audioUrlRef = useRef<string | null>(null);
const firstChunkSeenRef = useRef(false);
const underrunCountRef = useRef(0);
const totalAudioSamplesRef = useRef(0);
const adaptiveResumeSecsRef = useRef(DEFAULT_RESUME_THRESHOLD_SECS);
const revokeCurrentUrl = useCallback(() => {
if (audioUrlRef.current) {
@@ -122,8 +127,12 @@ export function useStreamingGeneration({
hasStartedPlaybackRef.current = false;
isAutoBufferingRef.current = false;
isUserPausedRef.current = false;
firstChunkSeenRef.current = false;
underrunCountRef.current = 0;
totalAudioSamplesRef.current = 0;
adaptiveResumeSecsRef.current = resumeThresholdSecs;
setIsStreamPaused(false);
}, []);
}, [resumeThresholdSecs]);
useEffect(() => {
return () => {
@@ -158,10 +167,17 @@ export function useStreamingGeneration({
if (!ctx) return;
chunksRef.current.push(chunk);
totalAudioSamplesRef.current += chunk.length;
if (!firstChunkSeenRef.current) {
firstChunkSeenRef.current = true;
onLog("First audio chunk received");
}
if (!hasStartedPlaybackRef.current) {
const bufferedSecs = chunksRef.current.reduce((sum, c) => sum + c.length, 0) / SAMPLE_RATE;
if (bufferedSecs >= prebufferSecs) {
onLog(`Playback started after ${bufferedSecs.toFixed(1)}s buffered`);
flushBufferedAudio();
}
return;
@@ -171,18 +187,30 @@ export function useStreamingGeneration({
if (isUserPausedRef.current) return;
const ahead = nextStartTimeRef.current - ctx.currentTime;
if (ctx.state === "running" && ahead < rebufferThresholdSecs) {
ctx.suspend().catch(() => {});
isAutoBufferingRef.current = true;
} else if (
ctx.state === "suspended" &&
isAutoBufferingRef.current &&
ahead >= resumeThresholdSecs
if (
ctx.state === "running" &&
!isAutoBufferingRef.current &&
ahead < rebufferThresholdSecs
) {
isAutoBufferingRef.current = true;
underrunCountRef.current += 1;
adaptiveResumeSecsRef.current = Math.min(
MAX_ADAPTIVE_RESUME_SECS,
Math.max(resumeThresholdSecs, prebufferSecs + underrunCountRef.current * 2),
);
ctx.suspend().catch(() => {});
onLog(
`Buffer underrun ${underrunCountRef.current}; refilling to ${adaptiveResumeSecsRef.current.toFixed(1)}s`,
);
} else if (
isAutoBufferingRef.current &&
ahead >= adaptiveResumeSecsRef.current
) {
ctx.resume().catch(() => {});
isAutoBufferingRef.current = false;
ctx.resume().catch(() => {});
onLog(`Buffer recovered with ${ahead.toFixed(1)}s queued`);
}
}, [enqueue, flushBufferedAudio, prebufferSecs, rebufferThresholdSecs, resumeThresholdSecs]);
}, [enqueue, flushBufferedAudio, onLog, prebufferSecs, rebufferThresholdSecs, resumeThresholdSecs]);
const generate = useCallback(async (options: GenerateOptions) => {
if (!options.text.trim()) return;
@@ -239,6 +267,11 @@ export function useStreamingGeneration({
type: "audio_chunk" | "complete" | "error" | "cancelled";
data?: string;
elapsed?: number;
audio_secs?: number;
realtime_factor?: number | null;
chunks?: number;
first_chunk_secs?: number | null;
max_chunk_gap_secs?: number;
message?: string;
};
@@ -247,12 +280,26 @@ export function useStreamingGeneration({
} else if (event.type === "complete") {
if (!hasStartedPlaybackRef.current) {
flushBufferedAudio();
} else if (isAutoBufferingRef.current) {
isAutoBufferingRef.current = false;
audioCtxRef.current?.resume().catch(() => {});
}
const wavBlob = buildWav(mergeFloat32Arrays(chunksRef.current), SAMPLE_RATE);
const audioUrl = URL.createObjectURL(wavBlob);
audioUrlRef.current = audioUrl;
const kb = (wavBlob.size / 1024).toFixed(0);
onLog(`Done in ${event.elapsed}s - ${kb} KB`);
const audioSecs = event.audio_secs ?? totalAudioSamplesRef.current / SAMPLE_RATE;
const realtimeFactor =
event.realtime_factor ??
(event.elapsed && event.elapsed > 0 ? audioSecs / event.elapsed : null);
const speedText =
realtimeFactor === null ? "" : ` - ${realtimeFactor.toFixed(2)}x realtime`;
onLog(`Done in ${event.elapsed}s - ${audioSecs.toFixed(1)}s audio${speedText} - ${kb} KB`);
if (event.chunks && event.first_chunk_secs !== undefined) {
onLog(
`Stream: first chunk ${event.first_chunk_secs}s, ${event.chunks} chunks, max gap ${event.max_chunk_gap_secs}s`,
);
}
onSuccess(audioUrl);
} else if (event.type === "cancelled") {
throw new DOMException("Generation cancelled", "AbortError");
+1 -1
View File
@@ -4,7 +4,7 @@
"private": true,
"scripts": {
"dev": "next dev --turbopack",
"build": "next build --turbopack",
"build": "next build",
"start": "next start"
},
"dependencies": {