mirror of
https://github.com/JezzWTF/vibepod.git
synced 2026-06-01 15:22:14 +00:00
perf: improve streaming generation pipeline
Add CUDA inference hot-path optimizations, safer attention fallback handling, and generation profiling hooks. Improve SSE streaming, browser buffering telemetry, and playback recovery while preserving default audio quality settings.
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
|
||||
export const dynamic = "force-dynamic";
|
||||
export const runtime = "nodejs";
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
const pythonServerUrl = process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000";
|
||||
|
||||
@@ -24,6 +27,7 @@ export async function POST(request: NextRequest) {
|
||||
cfg_scale: body.cfg_scale ?? 1.5,
|
||||
inference_steps: body.inference_steps ?? 10,
|
||||
}),
|
||||
signal: request.signal,
|
||||
});
|
||||
|
||||
if (!upstream.ok) {
|
||||
@@ -36,8 +40,9 @@ export async function POST(request: NextRequest) {
|
||||
status: 200,
|
||||
headers: {
|
||||
"Content-Type": "text/event-stream",
|
||||
"Cache-Control": "no-cache",
|
||||
"Cache-Control": "no-cache, no-transform",
|
||||
"Connection": "keep-alive",
|
||||
"X-Content-Type-Options": "nosniff",
|
||||
"X-Accel-Buffering": "no",
|
||||
},
|
||||
});
|
||||
|
||||
@@ -27,6 +27,7 @@ export async function GET() {
|
||||
message: data.message,
|
||||
progress: data.progress ?? null,
|
||||
voices: data.voices ?? [],
|
||||
config: data.config ?? null,
|
||||
},
|
||||
COMMON_OPTIONS
|
||||
);
|
||||
|
||||
+3
-3
@@ -130,9 +130,9 @@ const initialState: AppState = {
|
||||
speaker: "carter",
|
||||
cfgScale: 1.5,
|
||||
inferenceSteps: 10,
|
||||
prebufferSecs: 2.0,
|
||||
rebufferThresholdSecs: 0.4,
|
||||
resumeThresholdSecs: 1.5,
|
||||
prebufferSecs: 5.0,
|
||||
rebufferThresholdSecs: 1.0,
|
||||
resumeThresholdSecs: 3.0,
|
||||
isGenerating: false,
|
||||
genElapsed: 0,
|
||||
genPct: null,
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
const SAMPLE_RATE = 24_000;
|
||||
const DEFAULT_PREBUFFER_SECS = 2.0;
|
||||
const DEFAULT_REBUFFER_THRESHOLD_SECS = 0.4;
|
||||
const DEFAULT_RESUME_THRESHOLD_SECS = 1.5;
|
||||
const DEFAULT_PREBUFFER_SECS = 5.0;
|
||||
const DEFAULT_REBUFFER_THRESHOLD_SECS = 1.0;
|
||||
const DEFAULT_RESUME_THRESHOLD_SECS = 3.0;
|
||||
const MAX_ADAPTIVE_RESUME_SECS = 18.0;
|
||||
|
||||
interface GenerateOptions {
|
||||
text: string;
|
||||
@@ -104,6 +105,10 @@ export function useStreamingGeneration({
|
||||
const isAutoBufferingRef = useRef(false);
|
||||
const isUserPausedRef = useRef(false);
|
||||
const audioUrlRef = useRef<string | null>(null);
|
||||
const firstChunkSeenRef = useRef(false);
|
||||
const underrunCountRef = useRef(0);
|
||||
const totalAudioSamplesRef = useRef(0);
|
||||
const adaptiveResumeSecsRef = useRef(DEFAULT_RESUME_THRESHOLD_SECS);
|
||||
|
||||
const revokeCurrentUrl = useCallback(() => {
|
||||
if (audioUrlRef.current) {
|
||||
@@ -122,8 +127,12 @@ export function useStreamingGeneration({
|
||||
hasStartedPlaybackRef.current = false;
|
||||
isAutoBufferingRef.current = false;
|
||||
isUserPausedRef.current = false;
|
||||
firstChunkSeenRef.current = false;
|
||||
underrunCountRef.current = 0;
|
||||
totalAudioSamplesRef.current = 0;
|
||||
adaptiveResumeSecsRef.current = resumeThresholdSecs;
|
||||
setIsStreamPaused(false);
|
||||
}, []);
|
||||
}, [resumeThresholdSecs]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
@@ -158,10 +167,17 @@ export function useStreamingGeneration({
|
||||
if (!ctx) return;
|
||||
|
||||
chunksRef.current.push(chunk);
|
||||
totalAudioSamplesRef.current += chunk.length;
|
||||
|
||||
if (!firstChunkSeenRef.current) {
|
||||
firstChunkSeenRef.current = true;
|
||||
onLog("First audio chunk received");
|
||||
}
|
||||
|
||||
if (!hasStartedPlaybackRef.current) {
|
||||
const bufferedSecs = chunksRef.current.reduce((sum, c) => sum + c.length, 0) / SAMPLE_RATE;
|
||||
if (bufferedSecs >= prebufferSecs) {
|
||||
onLog(`Playback started after ${bufferedSecs.toFixed(1)}s buffered`);
|
||||
flushBufferedAudio();
|
||||
}
|
||||
return;
|
||||
@@ -171,18 +187,30 @@ export function useStreamingGeneration({
|
||||
if (isUserPausedRef.current) return;
|
||||
|
||||
const ahead = nextStartTimeRef.current - ctx.currentTime;
|
||||
if (ctx.state === "running" && ahead < rebufferThresholdSecs) {
|
||||
ctx.suspend().catch(() => {});
|
||||
isAutoBufferingRef.current = true;
|
||||
} else if (
|
||||
ctx.state === "suspended" &&
|
||||
isAutoBufferingRef.current &&
|
||||
ahead >= resumeThresholdSecs
|
||||
if (
|
||||
ctx.state === "running" &&
|
||||
!isAutoBufferingRef.current &&
|
||||
ahead < rebufferThresholdSecs
|
||||
) {
|
||||
isAutoBufferingRef.current = true;
|
||||
underrunCountRef.current += 1;
|
||||
adaptiveResumeSecsRef.current = Math.min(
|
||||
MAX_ADAPTIVE_RESUME_SECS,
|
||||
Math.max(resumeThresholdSecs, prebufferSecs + underrunCountRef.current * 2),
|
||||
);
|
||||
ctx.suspend().catch(() => {});
|
||||
onLog(
|
||||
`Buffer underrun ${underrunCountRef.current}; refilling to ${adaptiveResumeSecsRef.current.toFixed(1)}s`,
|
||||
);
|
||||
} else if (
|
||||
isAutoBufferingRef.current &&
|
||||
ahead >= adaptiveResumeSecsRef.current
|
||||
) {
|
||||
ctx.resume().catch(() => {});
|
||||
isAutoBufferingRef.current = false;
|
||||
ctx.resume().catch(() => {});
|
||||
onLog(`Buffer recovered with ${ahead.toFixed(1)}s queued`);
|
||||
}
|
||||
}, [enqueue, flushBufferedAudio, prebufferSecs, rebufferThresholdSecs, resumeThresholdSecs]);
|
||||
}, [enqueue, flushBufferedAudio, onLog, prebufferSecs, rebufferThresholdSecs, resumeThresholdSecs]);
|
||||
|
||||
const generate = useCallback(async (options: GenerateOptions) => {
|
||||
if (!options.text.trim()) return;
|
||||
@@ -239,6 +267,11 @@ export function useStreamingGeneration({
|
||||
type: "audio_chunk" | "complete" | "error" | "cancelled";
|
||||
data?: string;
|
||||
elapsed?: number;
|
||||
audio_secs?: number;
|
||||
realtime_factor?: number | null;
|
||||
chunks?: number;
|
||||
first_chunk_secs?: number | null;
|
||||
max_chunk_gap_secs?: number;
|
||||
message?: string;
|
||||
};
|
||||
|
||||
@@ -247,12 +280,26 @@ export function useStreamingGeneration({
|
||||
} else if (event.type === "complete") {
|
||||
if (!hasStartedPlaybackRef.current) {
|
||||
flushBufferedAudio();
|
||||
} else if (isAutoBufferingRef.current) {
|
||||
isAutoBufferingRef.current = false;
|
||||
audioCtxRef.current?.resume().catch(() => {});
|
||||
}
|
||||
const wavBlob = buildWav(mergeFloat32Arrays(chunksRef.current), SAMPLE_RATE);
|
||||
const audioUrl = URL.createObjectURL(wavBlob);
|
||||
audioUrlRef.current = audioUrl;
|
||||
const kb = (wavBlob.size / 1024).toFixed(0);
|
||||
onLog(`Done in ${event.elapsed}s - ${kb} KB`);
|
||||
const audioSecs = event.audio_secs ?? totalAudioSamplesRef.current / SAMPLE_RATE;
|
||||
const realtimeFactor =
|
||||
event.realtime_factor ??
|
||||
(event.elapsed && event.elapsed > 0 ? audioSecs / event.elapsed : null);
|
||||
const speedText =
|
||||
realtimeFactor === null ? "" : ` - ${realtimeFactor.toFixed(2)}x realtime`;
|
||||
onLog(`Done in ${event.elapsed}s - ${audioSecs.toFixed(1)}s audio${speedText} - ${kb} KB`);
|
||||
if (event.chunks && event.first_chunk_secs !== undefined) {
|
||||
onLog(
|
||||
`Stream: first chunk ${event.first_chunk_secs}s, ${event.chunks} chunks, max gap ${event.max_chunk_gap_secs}s`,
|
||||
);
|
||||
}
|
||||
onSuccess(audioUrl);
|
||||
} else if (event.type === "cancelled") {
|
||||
throw new DOMException("Generation cancelled", "AbortError");
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev --turbopack",
|
||||
"build": "next build --turbopack",
|
||||
"build": "next build",
|
||||
"start": "next start"
|
||||
},
|
||||
"dependencies": {
|
||||
|
||||
Reference in New Issue
Block a user