perf: improve streaming generation pipeline

Add CUDA inference hot-path optimizations, safer attention fallback handling, and generation profiling hooks. Improve SSE streaming, browser buffering telemetry, and playback recovery while preserving default audio quality settings.
This commit is contained in:
2026-04-30 18:54:14 +01:00
parent a39ec536fd
commit 75b84b211b
9 changed files with 459 additions and 48 deletions
+3 -3
View File
@@ -130,9 +130,9 @@ const initialState: AppState = {
speaker: "carter",
cfgScale: 1.5,
inferenceSteps: 10,
prebufferSecs: 2.0,
rebufferThresholdSecs: 0.4,
resumeThresholdSecs: 1.5,
prebufferSecs: 5.0,
rebufferThresholdSecs: 1.0,
resumeThresholdSecs: 3.0,
isGenerating: false,
genElapsed: 0,
genPct: null,