Improve code documentation and maintainer notes

- Add a top-level doc comment to useStreamingGeneration.ts and document the streaming lifecycle. - Add docstrings to helper functions in useStreamingGeneration.ts. - Add section comments to web/app/page.tsx around reducer state, server health polling, and generation handling. - Add file-level comments to API proxy routes explaining the security architecture. - Add a file map / maintainer guide comment to server/vibevoice_server.py. - Add docstrings for key internal helpers in server/vibevoice_server.py. - Document environment variables used by the server in server/vibevoice_server.py. - Add comments identifying VibePod-specific patches around VibeVoice internals. - Format server/vibevoice_server.py with black. Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com>
2026-07-31 13:07:06 +00:00 · 2026-05-02 16:44:38 +00:00
parent 0236807928
commit e64048e500
5 changed files with 219 additions and 50 deletions
@@ -1,3 +1,14 @@
+/**
+ * API Proxy Route: POST /api/generate
+ *
+ * This route proxies requests from the frontend to the FastAPI backend's /generate endpoint.
+ *
+ * Security Architecture:
+ * The FastAPI backend is configured to bind only to localhost (127.0.0.1). This prevents
+ * unauthenticated public access to the model inference engine. Next.js acts as a secure
+ * proxy, allowing the frontend to interact with the backend while maintaining a
+ * single public-facing origin.
+ */
 import { NextRequest, NextResponse } from "next/server";

 export const dynamic = "force-dynamic";
@@ -1,3 +1,14 @@
+/**
+ * API Proxy Route: GET /api/health
+ *
+ * This route proxies health check requests from the frontend to the FastAPI backend's /health endpoint.
+ *
+ * Security Architecture:
+ * The FastAPI backend is configured to bind only to localhost (127.0.0.1). This prevents
+ * unauthenticated public access to the server status and configuration. Next.js acts as a secure
+ * proxy, allowing the frontend to poll for server readiness and adaptive configuration
+ * while maintaining a single public-facing origin.
+ */
 import { NextResponse } from "next/server";

 const OFFLINE_RESPONSE = { status: "offline" };
@@ -24,6 +24,8 @@ export interface ServerConfig {
  default_inference_steps: number;
 }

+// --- State Management ---
+
 interface AppState {
  script: string;
  speaker: string;
@@ -199,6 +201,8 @@ export default function HomePage() {
    resumeThresholdSecs: state.resumeThresholdSecs,
  });

+  // --- Server Health & Status Polling ---
+
  // Server health polling — fast while not ready, slow when online
  useEffect(() => {
    let timeoutId: ReturnType<typeof setTimeout>;
@@ -246,6 +250,8 @@ export default function HomePage() {
    };
  }, []);

+  // --- Generation Handling ---
+
  const handleGenerate = useCallback(async () => {
    if (!state.script.trim() || state.isGenerating) return;
    addLog(`${wordCount} words queued`);
@@ -1,5 +1,16 @@
 "use client";

+/**
+ * Hook for managing real-time streaming audio generation from the VibeVoice server.
+ *
+ * Streaming Lifecycle:
+ * 1. fetch /api/generate: Initiates a POST request to the generation endpoint.
+ * 2. parse SSE chunks: Listens for Server-Sent Events (SSE) containing audio data or status updates.
+ * 3. decode base64 float32 PCM: Converts incoming base64-encoded strings into raw Float32 audio samples.
+ * 4. schedule Web Audio playback: Enqueues audio chunks into an AudioContext for low-latency playback.
+ * 5. handle adaptive buffering: Monitors playback progress and pauses to refill the buffer if an underrun is detected.
+ * 6. assemble final WAV Blob: Combines all received chunks into a single WAV file once generation is complete.
+ */
 import { useCallback, useEffect, useRef, useState } from "react";

 const SAMPLE_RATE = 24_000;
@@ -30,6 +41,9 @@ interface UseStreamingGenerationOptions {
  resumeThresholdSecs?: number;
 }

+/**
+ * Concatenates multiple Float32Array chunks into a single Float32Array.
+ */
 function mergeFloat32Arrays(chunks: Float32Array<ArrayBuffer>[]): Float32Array<ArrayBuffer> {
  const total = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
  const out = new Float32Array(total);
@@ -41,6 +55,9 @@ function mergeFloat32Arrays(chunks: Float32Array<ArrayBuffer>[]): Float32Array<A
  return out;
 }

+/**
+ * Wraps Float32 PCM samples into a WAV file Blob with a standard header.
+ */
 function buildWav(samples: Float32Array<ArrayBuffer>, sampleRate: number): Blob {
  const dataSize = samples.length * 4;
  const buffer = new ArrayBuffer(44 + dataSize);
@@ -68,6 +85,9 @@ function buildWav(samples: Float32Array<ArrayBuffer>, sampleRate: number): Blob
  return new Blob([buffer], { type: "audio/wav" });
 }

+/**
+ * Decodes a base64-encoded string into a Float32Array of PCM samples.
+ */
 function decodeFloat32Chunk(data: string): Float32Array<ArrayBuffer> {
  const raw = atob(data);
  const bytes = new Uint8Array(raw.length);
@@ -141,6 +161,9 @@ export function useStreamingGeneration({
    };
  }, [resetPlayback, revokeCurrentUrl]);

+  /**
+   * Creates an AudioBuffer from a chunk and schedules it for playback in the AudioContext.
+   */
  const enqueue = useCallback((ctx: AudioContext, chunk: Float32Array<ArrayBuffer>) => {
    const audioBuffer = ctx.createBuffer(1, chunk.length, SAMPLE_RATE);
    audioBuffer.copyToChannel(chunk, 0);
@@ -152,6 +175,9 @@ export function useStreamingGeneration({
    nextStartTimeRef.current = startAt + audioBuffer.duration;
  }, []);

+  /**
+   * Resets the playback timing and enqueues all currently buffered chunks for immediate playback.
+   */
  const flushBufferedAudio = useCallback(() => {
    const ctx = audioCtxRef.current;
    if (!ctx || chunksRef.current.length === 0) return;
@@ -162,6 +188,10 @@ export function useStreamingGeneration({
    hasStartedPlaybackRef.current = true;
  }, [enqueue]);

+  /**
+   * Processes a new audio chunk, either buffering it for initial playback or enqueuing it for
+   * immediate playback with adaptive buffering logic.
+   */
  const handleAudioChunk = useCallback(
    (chunk: Float32Array<ArrayBuffer>) => {
      const ctx = audioCtxRef.current;