mirror of
https://github.com/JezzWTF/vibepod.git
synced 2026-06-01 15:22:14 +00:00
Improve code documentation and maintainer notes
- Add a top-level doc comment to useStreamingGeneration.ts and document the streaming lifecycle. - Add docstrings to helper functions in useStreamingGeneration.ts. - Add section comments to web/app/page.tsx around reducer state, server health polling, and generation handling. - Add file-level comments to API proxy routes explaining the security architecture. - Add a file map / maintainer guide comment to server/vibevoice_server.py. - Add docstrings for key internal helpers in server/vibevoice_server.py. - Document environment variables used by the server in server/vibevoice_server.py. - Add comments identifying VibePod-specific patches around VibeVoice internals. - Format server/vibevoice_server.py with black. Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com>
This commit is contained in:
@@ -1,3 +1,14 @@
|
||||
/**
|
||||
* API Proxy Route: POST /api/generate
|
||||
*
|
||||
* This route proxies requests from the frontend to the FastAPI backend's /generate endpoint.
|
||||
*
|
||||
* Security Architecture:
|
||||
* The FastAPI backend is configured to bind only to localhost (127.0.0.1). This prevents
|
||||
* unauthenticated public access to the model inference engine. Next.js acts as a secure
|
||||
* proxy, allowing the frontend to interact with the backend while maintaining a
|
||||
* single public-facing origin.
|
||||
*/
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
|
||||
export const dynamic = "force-dynamic";
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
/**
|
||||
* API Proxy Route: GET /api/health
|
||||
*
|
||||
* This route proxies health check requests from the frontend to the FastAPI backend's /health endpoint.
|
||||
*
|
||||
* Security Architecture:
|
||||
* The FastAPI backend is configured to bind only to localhost (127.0.0.1). This prevents
|
||||
* unauthenticated public access to the server status and configuration. Next.js acts as a secure
|
||||
* proxy, allowing the frontend to poll for server readiness and adaptive configuration
|
||||
* while maintaining a single public-facing origin.
|
||||
*/
|
||||
import { NextResponse } from "next/server";
|
||||
|
||||
const OFFLINE_RESPONSE = { status: "offline" };
|
||||
|
||||
@@ -24,6 +24,8 @@ export interface ServerConfig {
|
||||
default_inference_steps: number;
|
||||
}
|
||||
|
||||
// --- State Management ---
|
||||
|
||||
interface AppState {
|
||||
script: string;
|
||||
speaker: string;
|
||||
@@ -199,6 +201,8 @@ export default function HomePage() {
|
||||
resumeThresholdSecs: state.resumeThresholdSecs,
|
||||
});
|
||||
|
||||
// --- Server Health & Status Polling ---
|
||||
|
||||
// Server health polling — fast while not ready, slow when online
|
||||
useEffect(() => {
|
||||
let timeoutId: ReturnType<typeof setTimeout>;
|
||||
@@ -246,6 +250,8 @@ export default function HomePage() {
|
||||
};
|
||||
}, []);
|
||||
|
||||
// --- Generation Handling ---
|
||||
|
||||
const handleGenerate = useCallback(async () => {
|
||||
if (!state.script.trim() || state.isGenerating) return;
|
||||
addLog(`${wordCount} words queued`);
|
||||
|
||||
@@ -1,5 +1,16 @@
|
||||
"use client";
|
||||
|
||||
/**
|
||||
* Hook for managing real-time streaming audio generation from the VibeVoice server.
|
||||
*
|
||||
* Streaming Lifecycle:
|
||||
* 1. fetch /api/generate: Initiates a POST request to the generation endpoint.
|
||||
* 2. parse SSE chunks: Listens for Server-Sent Events (SSE) containing audio data or status updates.
|
||||
* 3. decode base64 float32 PCM: Converts incoming base64-encoded strings into raw Float32 audio samples.
|
||||
* 4. schedule Web Audio playback: Enqueues audio chunks into an AudioContext for low-latency playback.
|
||||
* 5. handle adaptive buffering: Monitors playback progress and pauses to refill the buffer if an underrun is detected.
|
||||
* 6. assemble final WAV Blob: Combines all received chunks into a single WAV file once generation is complete.
|
||||
*/
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
const SAMPLE_RATE = 24_000;
|
||||
@@ -30,6 +41,9 @@ interface UseStreamingGenerationOptions {
|
||||
resumeThresholdSecs?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Concatenates multiple Float32Array chunks into a single Float32Array.
|
||||
*/
|
||||
function mergeFloat32Arrays(chunks: Float32Array<ArrayBuffer>[]): Float32Array<ArrayBuffer> {
|
||||
const total = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
|
||||
const out = new Float32Array(total);
|
||||
@@ -41,6 +55,9 @@ function mergeFloat32Arrays(chunks: Float32Array<ArrayBuffer>[]): Float32Array<A
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps Float32 PCM samples into a WAV file Blob with a standard header.
|
||||
*/
|
||||
function buildWav(samples: Float32Array<ArrayBuffer>, sampleRate: number): Blob {
|
||||
const dataSize = samples.length * 4;
|
||||
const buffer = new ArrayBuffer(44 + dataSize);
|
||||
@@ -68,6 +85,9 @@ function buildWav(samples: Float32Array<ArrayBuffer>, sampleRate: number): Blob
|
||||
return new Blob([buffer], { type: "audio/wav" });
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a base64-encoded string into a Float32Array of PCM samples.
|
||||
*/
|
||||
function decodeFloat32Chunk(data: string): Float32Array<ArrayBuffer> {
|
||||
const raw = atob(data);
|
||||
const bytes = new Uint8Array(raw.length);
|
||||
@@ -141,6 +161,9 @@ export function useStreamingGeneration({
|
||||
};
|
||||
}, [resetPlayback, revokeCurrentUrl]);
|
||||
|
||||
/**
|
||||
* Creates an AudioBuffer from a chunk and schedules it for playback in the AudioContext.
|
||||
*/
|
||||
const enqueue = useCallback((ctx: AudioContext, chunk: Float32Array<ArrayBuffer>) => {
|
||||
const audioBuffer = ctx.createBuffer(1, chunk.length, SAMPLE_RATE);
|
||||
audioBuffer.copyToChannel(chunk, 0);
|
||||
@@ -152,6 +175,9 @@ export function useStreamingGeneration({
|
||||
nextStartTimeRef.current = startAt + audioBuffer.duration;
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Resets the playback timing and enqueues all currently buffered chunks for immediate playback.
|
||||
*/
|
||||
const flushBufferedAudio = useCallback(() => {
|
||||
const ctx = audioCtxRef.current;
|
||||
if (!ctx || chunksRef.current.length === 0) return;
|
||||
@@ -162,6 +188,10 @@ export function useStreamingGeneration({
|
||||
hasStartedPlaybackRef.current = true;
|
||||
}, [enqueue]);
|
||||
|
||||
/**
|
||||
* Processes a new audio chunk, either buffering it for initial playback or enqueuing it for
|
||||
* immediate playback with adaptive buffering logic.
|
||||
*/
|
||||
const handleAudioChunk = useCallback(
|
||||
(chunk: Float32Array<ArrayBuffer>) => {
|
||||
const ctx = audioCtxRef.current;
|
||||
|
||||
Reference in New Issue
Block a user