Improve dev startup: model download script, loading state in health check, faster polling

Agent-Logs-Url: https://github.com/JezzWTF/vibepod/sessions/3c05c740-b0a3-497d-88f1-dfa63121424d Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com>
2026-06-13 03:58:07 +00:00 · 2026-04-27 16:00:53 +00:00
parent 3974a4cf69
commit 11ffc7df7c
8 changed files with 546 additions and 46 deletions
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+"""
+Download microsoft/VibeVoice-Realtime-0.5B to the local HuggingFace cache.
+
+Run once before starting the server:
+    python download_model.py
+
+Set HF_HOME or HUGGINGFACE_HUB_CACHE to control where the model is stored.
+Set HF_TOKEN (or HUGGINGFACE_TOKEN) if you need an access token.
+"""
+
+import os
+import sys
+import time
+
+MODEL_ID = "microsoft/VibeVoice-Realtime-0.5B"
+
+# Patterns that are not needed for PyTorch inference
+_IGNORE = [
+    "*.msgpack",
+    "flax_model*",
+    "tf_model*",
+    "rust_model*",
+    "*.ot",
+]
+
+
+def download() -> str:
+    try:
+        from huggingface_hub import snapshot_download
+    except ImportError:
+        print(
+            "ERROR: huggingface_hub is not installed.\n"
+            "Run: pip install huggingface_hub",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    token: str | None = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
+
+    print(f"Checking / downloading model: {MODEL_ID}")
+    print("(This may take several minutes on first run — the model is ~1 GB)")
+    start = time.time()
+
+    cache_path = snapshot_download(
+        repo_id=MODEL_ID,
+        ignore_patterns=_IGNORE,
+        token=token or None,
+    )
+
+    elapsed = time.time() - start
+    print(f"Model ready in {elapsed:.1f}s → {cache_path}")
+    return cache_path
+
+
+if __name__ == "__main__":
+    download()