vibepod/server/download_model.py

#!/usr/bin/env python3
"""
Download microsoft/VibeVoice-Realtime-0.5B to the local HuggingFace cache.

Run once before starting the server:
    python download_model.py

Set HF_HOME or HUGGINGFACE_HUB_CACHE to control where the model is stored.
Set HF_TOKEN (or HUGGINGFACE_TOKEN) if you need an access token.
"""

import os
import sys
import time

MODEL_ID = "microsoft/VibeVoice-Realtime-0.5B"

# Patterns that are not needed for PyTorch inference
_IGNORE = [
    "*.msgpack",
    "flax_model*",
    "tf_model*",
    "rust_model*",
    "*.ot",
]


def download() -> str:
    try:
        from huggingface_hub import snapshot_download
    except ImportError:
        print(
            "ERROR: huggingface_hub is not installed.\nRun: pip install huggingface_hub",
            file=sys.stderr,
        )
        sys.exit(1)

    token: str | None = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")

    print(f"Checking / downloading model: {MODEL_ID}")
    print("(This may take several minutes on first run — the model is ~1 GB)")
    start = time.time()

    cache_path = snapshot_download(
        repo_id=MODEL_ID,
        ignore_patterns=_IGNORE,
        token=token or None,
    )

    elapsed = time.time() - start
    print(f"Model ready in {elapsed:.1f}s -> {cache_path}")
    return cache_path


if __name__ == "__main__":
    download()