Improve dev startup: model download script, loading state in health check, faster polling

Agent-Logs-Url: https://github.com/JezzWTF/vibepod/sessions/3c05c740-b0a3-497d-88f1-dfa63121424d Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com>
2026-07-31 13:07:06 +00:00 · 2026-04-27 16:00:53 +00:00
parent 3974a4cf69
commit 11ffc7df7c
8 changed files with 546 additions and 46 deletions
@@ -8,13 +8,28 @@ export async function GET() {
    const res = await fetch(`${pythonServerUrl}/health`, {
      method: "GET",
      signal: AbortSignal.timeout(4000),
      // Don't cache health checks
      cache: "no-store",
    });
    if (res.ok) {
-      return NextResponse.json({ status: "online" });
+      const data = await res.json().catch(() => ({}));
      // Pass through the exact status the Python server reports:
      // "online" | "loading" | "error"
      const status: string = data.status ?? "online";
      return NextResponse.json(
        { status, message: data.message },
        { headers: { "Cache-Control": "no-store" } }
      );
    }
-    return NextResponse.json({ status: "offline" });
+    return NextResponse.json(
      { status: "offline" },
      { headers: { "Cache-Control": "no-store" } }
    );
  } catch {
-    return NextResponse.json({ status: "offline" });
+    return NextResponse.json(
      { status: "offline" },
      { headers: { "Cache-Control": "no-store" } }
    );
  }
 }
@@ -1,46 +1,84 @@
 "use client";
-import { useEffect, useState } from "react";
+import { useEffect, useRef, useState } from "react";
-type ServerStatus = "checking" | "online" | "offline";
+type ServerStatus = "checking" | "loading" | "online" | "error" | "offline";
 // Polling intervals: poll quickly until the server is online, then slow down.
 const FAST_INTERVAL_MS = 3000;   // while checking / loading
 const SLOW_INTERVAL_MS = 30000;  // once online
 export default function Header() {
  const [status, setStatus] = useState<ServerStatus>("checking");
  const [message, setMessage] = useState<string | undefined>();
  const intervalRef = useRef<ReturnType<typeof setInterval> | null>(null);
  useEffect(() => {
    const checkHealth = async () => {
      try {
-        const res = await fetch("/api/health");
+        const res = await fetch("/api/health", { cache: "no-store" });
        const data = await res.json();
-        setStatus(data.status === "online" ? "online" : "offline");
+        const newStatus: ServerStatus = (data.status as ServerStatus) ?? "offline";
        setStatus(newStatus);
        setMessage(data.message);
        // Switch to slow polling once we know the server is online
        if (newStatus === "online" && intervalRef.current) {
          clearInterval(intervalRef.current);
          intervalRef.current = setInterval(checkHealth, SLOW_INTERVAL_MS);
        }
        // Switch to fast polling if we detect the server went offline/loading
        if ((newStatus === "offline" || newStatus === "loading") && intervalRef.current) {
          clearInterval(intervalRef.current);
          intervalRef.current = setInterval(checkHealth, FAST_INTERVAL_MS);
        }
      } catch {
        setStatus("offline");
        setMessage(undefined);
      }
    };
    // Start with a fast poll — the server may still be loading the model
    checkHealth();
-    const interval = setInterval(checkHealth, 30000);
+    intervalRef.current = setInterval(checkHealth, FAST_INTERVAL_MS);
-    return () => clearInterval(interval);
+    return () => {
      if (intervalRef.current) clearInterval(intervalRef.current);
    };
  }, []);
-  const statusConfig = {
+  const statusConfig: Record<
    ServerStatus,
    { color: string; label: string; pulse: boolean; ring: string }
  > = {
    checking: {
      color: "bg-yellow-500",
-      label: "Checking...",
+      label: "Checking…",
      textColor: "text-yellow-400",
      pulse: true,
      ring: "border-yellow-500/30",
    },
    loading: {
      color: "bg-blue-400",
      label: "Loading model…",
      pulse: true,
      ring: "border-blue-400/30",
    },
    online: {
      color: "bg-green-500",
      label: "Server Online",
      textColor: "text-green-400",
      pulse: false,
      ring: "border-green-500/30",
    },
    error: {
      color: "bg-orange-500",
      label: "Model Error",
      pulse: false,
      ring: "border-orange-500/30",
    },
    offline: {
      color: "bg-red-500",
      label: "Server Offline",
      textColor: "text-red-400",
      pulse: false,
      ring: "border-red-500/30",
    },
  };
@@ -85,16 +123,19 @@ export default function Header() {
      </div>
      <div
-        className="flex items-center gap-2 px-3 py-1.5 rounded-full text-xs font-medium border"
+        className={`flex items-center gap-2 px-3 py-1.5 rounded-full text-xs font-medium border ${cfg.ring}`}
        style={{
          background: "var(--background)",
          borderColor: "var(--border)",
        }}
        title={message}
      >
        <span className="relative flex h-2 w-2">
-          <span
+          {cfg.pulse && (
-            className={`${cfg.pulse ? "animate-ping absolute inline-flex h-full w-full rounded-full opacity-75 " + cfg.color : "hidden"}`}
+            <span
-          />
+              className={`animate-ping absolute inline-flex h-full w-full rounded-full opacity-75 ${cfg.color}`}
            />
          )}
          <span
            className={`relative inline-flex rounded-full h-2 w-2 ${cfg.color}`}
          />
@@ -104,3 +145,4 @@ export default function Header() {
    </header>
  );
 }
@@ -17,6 +17,7 @@
        "@types/node": "^20",
        "@types/react": "^19",
        "@types/react-dom": "^19",
        "concurrently": "^9.2.1",
        "tailwindcss": "^4",
        "typescript": "^5"
      }
@@ -1004,6 +1005,32 @@
        "@types/react": "^19.2.0"
      }
    },
    "node_modules/ansi-regex": {
      "version": "5.0.1",
      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
      "dev": true,
      "license": "MIT",
      "engines": {
        "node": ">=8"
      }
    },
    "node_modules/ansi-styles": {
      "version": "4.3.0",
      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "color-convert": "^2.0.1"
      },
      "engines": {
        "node": ">=8"
      },
      "funding": {
        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
      }
    },
    "node_modules/caniuse-lite": {
      "version": "1.0.30001791",
      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001791.tgz",
@@ -1024,12 +1051,102 @@
      ],
      "license": "CC-BY-4.0"
    },
    "node_modules/chalk": {
      "version": "4.1.2",
      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
      "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "ansi-styles": "^4.1.0",
        "supports-color": "^7.1.0"
      },
      "engines": {
        "node": ">=10"
      },
      "funding": {
        "url": "https://github.com/chalk/chalk?sponsor=1"
      }
    },
    "node_modules/chalk/node_modules/supports-color": {
      "version": "7.2.0",
      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
      "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "has-flag": "^4.0.0"
      },
      "engines": {
        "node": ">=8"
      }
    },
    "node_modules/client-only": {
      "version": "0.0.1",
      "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
      "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==",
      "license": "MIT"
    },
    "node_modules/cliui": {
      "version": "8.0.1",
      "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
      "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
      "dev": true,
      "license": "ISC",
      "dependencies": {
        "string-width": "^4.2.0",
        "strip-ansi": "^6.0.1",
        "wrap-ansi": "^7.0.0"
      },
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/color-convert": {
      "version": "2.0.1",
      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "color-name": "~1.1.4"
      },
      "engines": {
        "node": ">=7.0.0"
      }
    },
    "node_modules/color-name": {
      "version": "1.1.4",
      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
      "dev": true,
      "license": "MIT"
    },
    "node_modules/concurrently": {
      "version": "9.2.1",
      "resolved": "https://registry.npmjs.org/concurrently/-/concurrently-9.2.1.tgz",
      "integrity": "sha512-fsfrO0MxV64Znoy8/l1vVIjjHa29SZyyqPgQBwhiDcaW8wJc2W3XWVOGx4M3oJBnv/zdUZIIp1gDeS98GzP8Ng==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "chalk": "4.1.2",
        "rxjs": "7.8.2",
        "shell-quote": "1.8.3",
        "supports-color": "8.1.1",
        "tree-kill": "1.2.2",
        "yargs": "17.7.2"
      },
      "bin": {
        "conc": "dist/bin/concurrently.js",
        "concurrently": "dist/bin/concurrently.js"
      },
      "engines": {
        "node": ">=18"
      },
      "funding": {
        "url": "https://github.com/open-cli-tools/concurrently?sponsor=1"
      }
    },
    "node_modules/csstype": {
      "version": "3.2.3",
      "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
@@ -1047,6 +1164,13 @@
        "node": ">=8"
      }
    },
    "node_modules/emoji-regex": {
      "version": "8.0.0",
      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
      "dev": true,
      "license": "MIT"
    },
    "node_modules/enhanced-resolve": {
      "version": "5.21.0",
      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.21.0.tgz",
@@ -1061,6 +1185,26 @@
        "node": ">=10.13.0"
      }
    },
    "node_modules/escalade": {
      "version": "3.2.0",
      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
      "dev": true,
      "license": "MIT",
      "engines": {
        "node": ">=6"
      }
    },
    "node_modules/get-caller-file": {
      "version": "2.0.5",
      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
      "dev": true,
      "license": "ISC",
      "engines": {
        "node": "6.* || 8.* || >= 10.*"
      }
    },
    "node_modules/graceful-fs": {
      "version": "4.2.11",
      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
@@ -1068,6 +1212,26 @@
      "dev": true,
      "license": "ISC"
    },
    "node_modules/has-flag": {
      "version": "4.0.0",
      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
      "dev": true,
      "license": "MIT",
      "engines": {
        "node": ">=8"
      }
    },
    "node_modules/is-fullwidth-code-point": {
      "version": "3.0.0",
      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
      "dev": true,
      "license": "MIT",
      "engines": {
        "node": ">=8"
      }
    },
    "node_modules/jiti": {
      "version": "2.6.1",
      "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz",
@@ -1503,6 +1667,26 @@
        "react": "^19.1.0"
      }
    },
    "node_modules/require-directory": {
      "version": "2.1.1",
      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
      "dev": true,
      "license": "MIT",
      "engines": {
        "node": ">=0.10.0"
      }
    },
    "node_modules/rxjs": {
      "version": "7.8.2",
      "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-7.8.2.tgz",
      "integrity": "sha512-dhKf903U/PQZY6boNNtAGdWbG85WAbjT/1xYoZIC7FAY0yWapOBQVsVrDl58W86//e1VpMNBtRV4MaXfdMySFA==",
      "dev": true,
      "license": "Apache-2.0",
      "dependencies": {
        "tslib": "^2.1.0"
      }
    },
    "node_modules/scheduler": {
      "version": "0.26.0",
      "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz",
@@ -1567,6 +1751,19 @@
        "@img/sharp-win32-x64": "0.34.5"
      }
    },
    "node_modules/shell-quote": {
      "version": "1.8.3",
      "resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.3.tgz",
      "integrity": "sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==",
      "dev": true,
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      },
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/source-map-js": {
      "version": "1.2.1",
      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
@@ -1576,6 +1773,34 @@
        "node": ">=0.10.0"
      }
    },
    "node_modules/string-width": {
      "version": "4.2.3",
      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "emoji-regex": "^8.0.0",
        "is-fullwidth-code-point": "^3.0.0",
        "strip-ansi": "^6.0.1"
      },
      "engines": {
        "node": ">=8"
      }
    },
    "node_modules/strip-ansi": {
      "version": "6.0.1",
      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "ansi-regex": "^5.0.1"
      },
      "engines": {
        "node": ">=8"
      }
    },
    "node_modules/styled-jsx": {
      "version": "5.1.6",
      "resolved": "https://registry.npmjs.org/styled-jsx/-/styled-jsx-5.1.6.tgz",
@@ -1599,6 +1824,22 @@
        }
      }
    },
    "node_modules/supports-color": {
      "version": "8.1.1",
      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz",
      "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "has-flag": "^4.0.0"
      },
      "engines": {
        "node": ">=10"
      },
      "funding": {
        "url": "https://github.com/chalk/supports-color?sponsor=1"
      }
    },
    "node_modules/tailwindcss": {
      "version": "4.2.4",
      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.4.tgz",
@@ -1620,6 +1861,16 @@
        "url": "https://opencollective.com/webpack"
      }
    },
    "node_modules/tree-kill": {
      "version": "1.2.2",
      "resolved": "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz",
      "integrity": "sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==",
      "dev": true,
      "license": "MIT",
      "bin": {
        "tree-kill": "cli.js"
      }
    },
    "node_modules/tslib": {
      "version": "2.8.1",
      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
@@ -1646,6 +1897,63 @@
      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
      "dev": true,
      "license": "MIT"
    },
    "node_modules/wrap-ansi": {
      "version": "7.0.0",
      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "ansi-styles": "^4.0.0",
        "string-width": "^4.1.0",
        "strip-ansi": "^6.0.0"
      },
      "engines": {
        "node": ">=10"
      },
      "funding": {
        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
      }
    },
    "node_modules/y18n": {
      "version": "5.0.8",
      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
      "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
      "dev": true,
      "license": "ISC",
      "engines": {
        "node": ">=10"
      }
    },
    "node_modules/yargs": {
      "version": "17.7.2",
      "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
      "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "cliui": "^8.0.1",
        "escalade": "^3.1.1",
        "get-caller-file": "^2.0.5",
        "require-directory": "^2.1.1",
        "string-width": "^4.2.3",
        "y18n": "^5.0.5",
        "yargs-parser": "^21.1.1"
      },
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/yargs-parser": {
      "version": "21.1.1",
      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
      "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
      "dev": true,
      "license": "ISC",
      "engines": {
        "node": ">=12"
      }
    }
  }
 }
@@ -4,20 +4,23 @@
  "private": true,
  "scripts": {
    "dev": "next dev --turbopack",
    "dev:all": "concurrently --names \"TTS,NEXT\" --prefix-colors \"cyan,magenta\" \"cd server && bash start.sh\" \"next dev --turbopack\"",
    "build": "next build --turbopack",
-    "start": "next start"
+    "start": "next start",
    "server": "cd server && bash start.sh"
  },
  "dependencies": {
    "next": "15.5.15",
    "react": "19.1.0",
-    "react-dom": "19.1.0",
+    "react-dom": "19.1.0"
    "next": "15.5.15"
  },
  "devDependencies": {
-    "typescript": "^5",
+    "@tailwindcss/postcss": "^4",
    "@types/node": "^20",
    "@types/react": "^19",
    "@types/react-dom": "^19",
-    "@tailwindcss/postcss": "^4",
+    "concurrently": "^9.2.1",
-    "tailwindcss": "^4"
+    "tailwindcss": "^4",
    "typescript": "^5"
  }
 }
@@ -0,0 +1,57 @@
 #!/usr/bin/env python3
 """
 Download microsoft/VibeVoice-Realtime-0.5B to the local HuggingFace cache.
 Run once before starting the server:
    python download_model.py
 Set HF_HOME or HUGGINGFACE_HUB_CACHE to control where the model is stored.
 Set HF_TOKEN (or HUGGINGFACE_TOKEN) if you need an access token.
 """
 import os
 import sys
 import time
 MODEL_ID = "microsoft/VibeVoice-Realtime-0.5B"
 # Patterns that are not needed for PyTorch inference
 _IGNORE = [
    "*.msgpack",
    "flax_model*",
    "tf_model*",
    "rust_model*",
    "*.ot",
 ]
 def download() -> str:
    try:
        from huggingface_hub import snapshot_download
    except ImportError:
        print(
            "ERROR: huggingface_hub is not installed.\n"
            "Run: pip install huggingface_hub",
            file=sys.stderr,
        )
        sys.exit(1)
    token: str | None = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
    print(f"Checking / downloading model: {MODEL_ID}")
    print("(This may take several minutes on first run — the model is ~1 GB)")
    start = time.time()
    cache_path = snapshot_download(
        repo_id=MODEL_ID,
        ignore_patterns=_IGNORE,
        token=token or None,
    )
    elapsed = time.time() - start
    print(f"Model ready in {elapsed:.1f}s → {cache_path}")
    return cache_path
 if __name__ == "__main__":
    download()
@@ -9,3 +9,4 @@ soundfile>=0.12.1
 scipy>=1.13.0
 numpy>=1.26.0
 pydantic>=2.7.0
 huggingface_hub>=0.23.0
@@ -0,0 +1,35 @@
 #!/usr/bin/env bash
 # VibePod TTS server startup script
 # Usage: ./start.sh [uvicorn options]
 #
 # Downloads the model on first run, then starts the FastAPI server.
 # Set HF_TOKEN env var if a HuggingFace access token is required.
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd "$SCRIPT_DIR"
 echo "================================================"
 echo "  VibePod TTS Server"
 echo "================================================"
 # 1. Ensure Python deps are available
 if ! python -c "import fastapi" &>/dev/null; then
  echo "Installing Python dependencies..."
  pip install -r requirements.txt
 fi
 # 2. Download model if not already cached
 echo ""
 echo "--> Checking model cache..."
 python download_model.py
 # 3. Start the server
 echo ""
 echo "--> Starting uvicorn on http://0.0.0.0:8000"
 exec uvicorn vibevoice_server:app \
  --host 0.0.0.0 \
  --port 8000 \
  --log-level info \
  "$@"
@@ -6,13 +6,16 @@ exposes a POST /generate endpoint that accepts { text, cfg_scale, inference_step
 and returns a WAV audio blob.
 Start with:
    ./start.sh
  or directly:
    uvicorn vibevoice_server:app --host 0.0.0.0 --port 8000
 """
 import io
 import logging
 import threading
 from contextlib import asynccontextmanager
-from typing import AsyncGenerator, Optional
+from typing import AsyncGenerator, Literal, Optional
 import numpy as np
 import soundfile as sf
@@ -26,36 +29,54 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(mess
 logger = logging.getLogger(__name__)
 MODEL_ID = "microsoft/VibeVoice-Realtime-0.5B"
 DEFAULT_SAMPLE_RATE = 24_000  # fallback sample rate when not specified by model config
 # ─── Global model state ────────────────────────────────────────────────────────
 ModelStatus = Literal["loading", "online", "error"]
 _processor: Optional[object] = None
 _model: Optional[object] = None
 _device: str = "cpu"
 _model_status: ModelStatus = "loading"
 _model_error: Optional[str] = None
 _load_lock = threading.Lock()
-def _load_model() -> None:
+def _load_model_sync() -> None:
-    global _processor, _model, _device
+    """Load the model synchronously.  Called from a background thread at startup."""
    global _processor, _model, _device, _model_status, _model_error
-    if _model is not None:
+    with _load_lock:
-        return
+        if _model is not None:
            return
-    _device = "cuda" if torch.cuda.is_available() else "cpu"
+        _device = "cuda" if torch.cuda.is_available() else "cpu"
-    logger.info("Loading %s on %s …", MODEL_ID, _device)
+        logger.info("Loading %s on %s …", MODEL_ID, _device)
-    _processor = AutoProcessor.from_pretrained(MODEL_ID)
+        try:
-    _model = AutoModel.from_pretrained(
+            _processor = AutoProcessor.from_pretrained(MODEL_ID)
-        MODEL_ID,
+            _model = AutoModel.from_pretrained(
-        torch_dtype=torch.float16 if _device == "cuda" else torch.float32,
+                MODEL_ID,
-    )
+                torch_dtype=torch.float16 if _device == "cuda" else torch.float32,
-    _model = _model.to(_device)
+            )
-    _model.eval()
+            _model = _model.to(_device)
            _model.eval()
-    logger.info("Model loaded successfully.")
+            _model_status = "online"
            logger.info("Model loaded successfully on %s.", _device)
        except Exception as exc:
            _model_status = "error"
            _model_error = str(exc)
            logger.exception("Failed to load model: %s", exc)
@asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
-    _load_model()
+    # Start model loading in a background thread so the server answers
    # health-check requests immediately (status="loading") rather than
    # blocking startup for the full model download/load time.
    thread = threading.Thread(target=_load_model_sync, daemon=True, name="model-loader")
    thread.start()
    yield
@@ -81,8 +102,16 @@ class GenerateRequest(BaseModel):
@app.get("/health")
 async def health() -> dict:
-    """Liveness probe used by the Next.js /api/health route."""
+    """
-    return {"status": "online", "model": MODEL_ID}
+    Liveness / readiness probe used by the Next.js /api/health route.
    Returns:
        { status: "loading" | "online" | "error", model: str, message?: str }
    """
    body: dict = {"status": _model_status, "model": MODEL_ID}
    if _model_error:
        body["message"] = _model_error
    return body
@app.post("/generate")
@@ -90,8 +119,16 @@ async def generate(req: GenerateRequest) -> StreamingResponse:
    """
    Generate speech from text and return a WAV audio stream.
    """
-    if _model is None or _processor is None:
+    if _model_status == "loading":
-        raise HTTPException(status_code=503, detail="Model not loaded yet — please retry in a moment.")
+        raise HTTPException(
            status_code=503,
            detail="Model is still loading — please retry in a moment.",
        )
    if _model_status == "error" or _model is None or _processor is None:
        raise HTTPException(
            status_code=503,
            detail=f"Model failed to load: {_model_error or 'unknown error'}",
        )
    logger.info(
        "Generating audio for %d chars (cfg=%.1f, steps=%d)",
@@ -113,7 +150,8 @@ async def generate(req: GenerateRequest) -> StreamingResponse:
        # output is typically a tensor of shape (1, num_samples) or (num_samples,)
        audio_array = output.squeeze().cpu().numpy()
-        # Normalise to [-1, 1] float32 for WAV
+        # Normalise to [-1, 1] float32 for WAV.
        # astype() may copy the array, but we need float32 for soundfile — this is intentional.
        if audio_array.dtype != np.float32:
            audio_array = audio_array.astype(np.float32)
        peak = np.abs(audio_array).max()
@@ -124,7 +162,7 @@ async def generate(req: GenerateRequest) -> StreamingResponse:
        sample_rate: int = (
            getattr(_model.config, "sampling_rate", None)
            or getattr(_model.config, "sample_rate", None)
-            or 24_000
+            or DEFAULT_SAMPLE_RATE
        )
        buf = io.BytesIO()
@@ -148,3 +186,4 @@ async def generate(req: GenerateRequest) -> StreamingResponse:
        logger.exception("Generation failed: %s", exc)
        raise HTTPException(status_code=500, detail=str(exc)) from exc