diff --git a/podcast-forge/app/api/health/route.ts b/podcast-forge/app/api/health/route.ts index 60d4347..8b6a0b7 100644 --- a/podcast-forge/app/api/health/route.ts +++ b/podcast-forge/app/api/health/route.ts @@ -8,13 +8,28 @@ export async function GET() { const res = await fetch(`${pythonServerUrl}/health`, { method: "GET", signal: AbortSignal.timeout(4000), + // Don't cache health checks + cache: "no-store", }); if (res.ok) { - return NextResponse.json({ status: "online" }); + const data = await res.json().catch(() => ({})); + // Pass through the exact status the Python server reports: + // "online" | "loading" | "error" + const status: string = data.status ?? "online"; + return NextResponse.json( + { status, message: data.message }, + { headers: { "Cache-Control": "no-store" } } + ); } - return NextResponse.json({ status: "offline" }); + return NextResponse.json( + { status: "offline" }, + { headers: { "Cache-Control": "no-store" } } + ); } catch { - return NextResponse.json({ status: "offline" }); + return NextResponse.json( + { status: "offline" }, + { headers: { "Cache-Control": "no-store" } } + ); } } diff --git a/podcast-forge/components/Header.tsx b/podcast-forge/components/Header.tsx index b60d278..28106ca 100644 --- a/podcast-forge/components/Header.tsx +++ b/podcast-forge/components/Header.tsx @@ -1,46 +1,84 @@ "use client"; -import { useEffect, useState } from "react"; +import { useEffect, useRef, useState } from "react"; -type ServerStatus = "checking" | "online" | "offline"; +type ServerStatus = "checking" | "loading" | "online" | "error" | "offline"; + +// Polling intervals: poll quickly until the server is online, then slow down. +const FAST_INTERVAL_MS = 3000; // while checking / loading +const SLOW_INTERVAL_MS = 30000; // once online export default function Header() { const [status, setStatus] = useState("checking"); + const [message, setMessage] = useState(); + const intervalRef = useRef | null>(null); useEffect(() => { const checkHealth = async () => { try { - const res = await fetch("/api/health"); + const res = await fetch("/api/health", { cache: "no-store" }); const data = await res.json(); - setStatus(data.status === "online" ? "online" : "offline"); + const newStatus: ServerStatus = (data.status as ServerStatus) ?? "offline"; + setStatus(newStatus); + setMessage(data.message); + + // Switch to slow polling once we know the server is online + if (newStatus === "online" && intervalRef.current) { + clearInterval(intervalRef.current); + intervalRef.current = setInterval(checkHealth, SLOW_INTERVAL_MS); + } + // Switch to fast polling if we detect the server went offline/loading + if ((newStatus === "offline" || newStatus === "loading") && intervalRef.current) { + clearInterval(intervalRef.current); + intervalRef.current = setInterval(checkHealth, FAST_INTERVAL_MS); + } } catch { setStatus("offline"); + setMessage(undefined); } }; + // Start with a fast poll — the server may still be loading the model checkHealth(); - const interval = setInterval(checkHealth, 30000); - return () => clearInterval(interval); + intervalRef.current = setInterval(checkHealth, FAST_INTERVAL_MS); + return () => { + if (intervalRef.current) clearInterval(intervalRef.current); + }; }, []); - const statusConfig = { + const statusConfig: Record< + ServerStatus, + { color: string; label: string; pulse: boolean; ring: string } + > = { checking: { color: "bg-yellow-500", - label: "Checking...", - textColor: "text-yellow-400", + label: "Checking…", pulse: true, + ring: "border-yellow-500/30", + }, + loading: { + color: "bg-blue-400", + label: "Loading model…", + pulse: true, + ring: "border-blue-400/30", }, online: { color: "bg-green-500", label: "Server Online", - textColor: "text-green-400", pulse: false, + ring: "border-green-500/30", + }, + error: { + color: "bg-orange-500", + label: "Model Error", + pulse: false, + ring: "border-orange-500/30", }, offline: { color: "bg-red-500", label: "Server Offline", - textColor: "text-red-400", pulse: false, + ring: "border-red-500/30", }, }; @@ -85,16 +123,19 @@ export default function Header() {
- + {cfg.pulse && ( + + )} @@ -104,3 +145,4 @@ export default function Header() { ); } + diff --git a/podcast-forge/package-lock.json b/podcast-forge/package-lock.json index 781ba29..3dcd02a 100644 --- a/podcast-forge/package-lock.json +++ b/podcast-forge/package-lock.json @@ -17,6 +17,7 @@ "@types/node": "^20", "@types/react": "^19", "@types/react-dom": "^19", + "concurrently": "^9.2.1", "tailwindcss": "^4", "typescript": "^5" } @@ -1004,6 +1005,32 @@ "@types/react": "^19.2.0" } }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, "node_modules/caniuse-lite": { "version": "1.0.30001791", "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001791.tgz", @@ -1024,12 +1051,102 @@ ], "license": "CC-BY-4.0" }, + "node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/chalk/node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/client-only": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz", "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==", "license": "MIT" }, + "node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "dev": true, + "license": "ISC", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true, + "license": "MIT" + }, + "node_modules/concurrently": { + "version": "9.2.1", + "resolved": "https://registry.npmjs.org/concurrently/-/concurrently-9.2.1.tgz", + "integrity": "sha512-fsfrO0MxV64Znoy8/l1vVIjjHa29SZyyqPgQBwhiDcaW8wJc2W3XWVOGx4M3oJBnv/zdUZIIp1gDeS98GzP8Ng==", + "dev": true, + "license": "MIT", + "dependencies": { + "chalk": "4.1.2", + "rxjs": "7.8.2", + "shell-quote": "1.8.3", + "supports-color": "8.1.1", + "tree-kill": "1.2.2", + "yargs": "17.7.2" + }, + "bin": { + "conc": "dist/bin/concurrently.js", + "concurrently": "dist/bin/concurrently.js" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/open-cli-tools/concurrently?sponsor=1" + } + }, "node_modules/csstype": { "version": "3.2.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", @@ -1047,6 +1164,13 @@ "node": ">=8" } }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true, + "license": "MIT" + }, "node_modules/enhanced-resolve": { "version": "5.21.0", "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.21.0.tgz", @@ -1061,6 +1185,26 @@ "node": ">=10.13.0" } }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "dev": true, + "license": "ISC", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -1068,6 +1212,26 @@ "dev": true, "license": "ISC" }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/jiti": { "version": "2.6.1", "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz", @@ -1503,6 +1667,26 @@ "react": "^19.1.0" } }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/rxjs": { + "version": "7.8.2", + "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-7.8.2.tgz", + "integrity": "sha512-dhKf903U/PQZY6boNNtAGdWbG85WAbjT/1xYoZIC7FAY0yWapOBQVsVrDl58W86//e1VpMNBtRV4MaXfdMySFA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.1.0" + } + }, "node_modules/scheduler": { "version": "0.26.0", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz", @@ -1567,6 +1751,19 @@ "@img/sharp-win32-x64": "0.34.5" } }, + "node_modules/shell-quote": { + "version": "1.8.3", + "resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.3.tgz", + "integrity": "sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -1576,6 +1773,34 @@ "node": ">=0.10.0" } }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/styled-jsx": { "version": "5.1.6", "resolved": "https://registry.npmjs.org/styled-jsx/-/styled-jsx-5.1.6.tgz", @@ -1599,6 +1824,22 @@ } } }, + "node_modules/supports-color": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/supports-color?sponsor=1" + } + }, "node_modules/tailwindcss": { "version": "4.2.4", "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.4.tgz", @@ -1620,6 +1861,16 @@ "url": "https://opencollective.com/webpack" } }, + "node_modules/tree-kill": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz", + "integrity": "sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==", + "dev": true, + "license": "MIT", + "bin": { + "tree-kill": "cli.js" + } + }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", @@ -1646,6 +1897,63 @@ "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", "dev": true, "license": "MIT" + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "dev": true, + "license": "MIT", + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=12" + } } } } diff --git a/podcast-forge/package.json b/podcast-forge/package.json index efcc214..d974614 100644 --- a/podcast-forge/package.json +++ b/podcast-forge/package.json @@ -4,20 +4,23 @@ "private": true, "scripts": { "dev": "next dev --turbopack", + "dev:all": "concurrently --names \"TTS,NEXT\" --prefix-colors \"cyan,magenta\" \"cd server && bash start.sh\" \"next dev --turbopack\"", "build": "next build --turbopack", - "start": "next start" + "start": "next start", + "server": "cd server && bash start.sh" }, "dependencies": { + "next": "15.5.15", "react": "19.1.0", - "react-dom": "19.1.0", - "next": "15.5.15" + "react-dom": "19.1.0" }, "devDependencies": { - "typescript": "^5", + "@tailwindcss/postcss": "^4", "@types/node": "^20", "@types/react": "^19", "@types/react-dom": "^19", - "@tailwindcss/postcss": "^4", - "tailwindcss": "^4" + "concurrently": "^9.2.1", + "tailwindcss": "^4", + "typescript": "^5" } } diff --git a/podcast-forge/server/download_model.py b/podcast-forge/server/download_model.py new file mode 100644 index 0000000..0e09e88 --- /dev/null +++ b/podcast-forge/server/download_model.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +Download microsoft/VibeVoice-Realtime-0.5B to the local HuggingFace cache. + +Run once before starting the server: + python download_model.py + +Set HF_HOME or HUGGINGFACE_HUB_CACHE to control where the model is stored. +Set HF_TOKEN (or HUGGINGFACE_TOKEN) if you need an access token. +""" + +import os +import sys +import time + +MODEL_ID = "microsoft/VibeVoice-Realtime-0.5B" + +# Patterns that are not needed for PyTorch inference +_IGNORE = [ + "*.msgpack", + "flax_model*", + "tf_model*", + "rust_model*", + "*.ot", +] + + +def download() -> str: + try: + from huggingface_hub import snapshot_download + except ImportError: + print( + "ERROR: huggingface_hub is not installed.\n" + "Run: pip install huggingface_hub", + file=sys.stderr, + ) + sys.exit(1) + + token: str | None = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") + + print(f"Checking / downloading model: {MODEL_ID}") + print("(This may take several minutes on first run — the model is ~1 GB)") + start = time.time() + + cache_path = snapshot_download( + repo_id=MODEL_ID, + ignore_patterns=_IGNORE, + token=token or None, + ) + + elapsed = time.time() - start + print(f"Model ready in {elapsed:.1f}s → {cache_path}") + return cache_path + + +if __name__ == "__main__": + download() diff --git a/podcast-forge/server/requirements.txt b/podcast-forge/server/requirements.txt index 71a50fe..ebee5aa 100644 --- a/podcast-forge/server/requirements.txt +++ b/podcast-forge/server/requirements.txt @@ -9,3 +9,4 @@ soundfile>=0.12.1 scipy>=1.13.0 numpy>=1.26.0 pydantic>=2.7.0 +huggingface_hub>=0.23.0 diff --git a/podcast-forge/server/start.sh b/podcast-forge/server/start.sh new file mode 100755 index 0000000..66d1a5a --- /dev/null +++ b/podcast-forge/server/start.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# VibePod TTS server startup script +# Usage: ./start.sh [uvicorn options] +# +# Downloads the model on first run, then starts the FastAPI server. +# Set HF_TOKEN env var if a HuggingFace access token is required. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "================================================" +echo " VibePod TTS Server" +echo "================================================" + +# 1. Ensure Python deps are available +if ! python -c "import fastapi" &>/dev/null; then + echo "Installing Python dependencies..." + pip install -r requirements.txt +fi + +# 2. Download model if not already cached +echo "" +echo "--> Checking model cache..." +python download_model.py + +# 3. Start the server +echo "" +echo "--> Starting uvicorn on http://0.0.0.0:8000" +exec uvicorn vibevoice_server:app \ + --host 0.0.0.0 \ + --port 8000 \ + --log-level info \ + "$@" diff --git a/podcast-forge/server/vibevoice_server.py b/podcast-forge/server/vibevoice_server.py index 5d70984..9658a1b 100644 --- a/podcast-forge/server/vibevoice_server.py +++ b/podcast-forge/server/vibevoice_server.py @@ -6,13 +6,16 @@ exposes a POST /generate endpoint that accepts { text, cfg_scale, inference_step and returns a WAV audio blob. Start with: + ./start.sh + or directly: uvicorn vibevoice_server:app --host 0.0.0.0 --port 8000 """ import io import logging +import threading from contextlib import asynccontextmanager -from typing import AsyncGenerator, Optional +from typing import AsyncGenerator, Literal, Optional import numpy as np import soundfile as sf @@ -26,36 +29,54 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(mess logger = logging.getLogger(__name__) MODEL_ID = "microsoft/VibeVoice-Realtime-0.5B" +DEFAULT_SAMPLE_RATE = 24_000 # fallback sample rate when not specified by model config # ─── Global model state ──────────────────────────────────────────────────────── +ModelStatus = Literal["loading", "online", "error"] + _processor: Optional[object] = None _model: Optional[object] = None _device: str = "cpu" +_model_status: ModelStatus = "loading" +_model_error: Optional[str] = None +_load_lock = threading.Lock() -def _load_model() -> None: - global _processor, _model, _device +def _load_model_sync() -> None: + """Load the model synchronously. Called from a background thread at startup.""" + global _processor, _model, _device, _model_status, _model_error - if _model is not None: - return + with _load_lock: + if _model is not None: + return - _device = "cuda" if torch.cuda.is_available() else "cpu" - logger.info("Loading %s on %s …", MODEL_ID, _device) + _device = "cuda" if torch.cuda.is_available() else "cpu" + logger.info("Loading %s on %s …", MODEL_ID, _device) - _processor = AutoProcessor.from_pretrained(MODEL_ID) - _model = AutoModel.from_pretrained( - MODEL_ID, - torch_dtype=torch.float16 if _device == "cuda" else torch.float32, - ) - _model = _model.to(_device) - _model.eval() + try: + _processor = AutoProcessor.from_pretrained(MODEL_ID) + _model = AutoModel.from_pretrained( + MODEL_ID, + torch_dtype=torch.float16 if _device == "cuda" else torch.float32, + ) + _model = _model.to(_device) + _model.eval() - logger.info("Model loaded successfully.") + _model_status = "online" + logger.info("Model loaded successfully on %s.", _device) + except Exception as exc: + _model_status = "error" + _model_error = str(exc) + logger.exception("Failed to load model: %s", exc) @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: - _load_model() + # Start model loading in a background thread so the server answers + # health-check requests immediately (status="loading") rather than + # blocking startup for the full model download/load time. + thread = threading.Thread(target=_load_model_sync, daemon=True, name="model-loader") + thread.start() yield @@ -81,8 +102,16 @@ class GenerateRequest(BaseModel): @app.get("/health") async def health() -> dict: - """Liveness probe used by the Next.js /api/health route.""" - return {"status": "online", "model": MODEL_ID} + """ + Liveness / readiness probe used by the Next.js /api/health route. + + Returns: + { status: "loading" | "online" | "error", model: str, message?: str } + """ + body: dict = {"status": _model_status, "model": MODEL_ID} + if _model_error: + body["message"] = _model_error + return body @app.post("/generate") @@ -90,8 +119,16 @@ async def generate(req: GenerateRequest) -> StreamingResponse: """ Generate speech from text and return a WAV audio stream. """ - if _model is None or _processor is None: - raise HTTPException(status_code=503, detail="Model not loaded yet — please retry in a moment.") + if _model_status == "loading": + raise HTTPException( + status_code=503, + detail="Model is still loading — please retry in a moment.", + ) + if _model_status == "error" or _model is None or _processor is None: + raise HTTPException( + status_code=503, + detail=f"Model failed to load: {_model_error or 'unknown error'}", + ) logger.info( "Generating audio for %d chars (cfg=%.1f, steps=%d)", @@ -113,7 +150,8 @@ async def generate(req: GenerateRequest) -> StreamingResponse: # output is typically a tensor of shape (1, num_samples) or (num_samples,) audio_array = output.squeeze().cpu().numpy() - # Normalise to [-1, 1] float32 for WAV + # Normalise to [-1, 1] float32 for WAV. + # astype() may copy the array, but we need float32 for soundfile — this is intentional. if audio_array.dtype != np.float32: audio_array = audio_array.astype(np.float32) peak = np.abs(audio_array).max() @@ -124,7 +162,7 @@ async def generate(req: GenerateRequest) -> StreamingResponse: sample_rate: int = ( getattr(_model.config, "sampling_rate", None) or getattr(_model.config, "sample_rate", None) - or 24_000 + or DEFAULT_SAMPLE_RATE ) buf = io.BytesIO() @@ -148,3 +186,4 @@ async def generate(req: GenerateRequest) -> StreamingResponse: logger.exception("Generation failed: %s", exc) raise HTTPException(status_code=500, detail=str(exc)) from exc +