feat: add --cpu flag to start.sh — separate venv via UV_PROJECT_ENVIRONMENT

2026-07-31 13:07:06 +00:00 · 2026-04-28 14:15:11 +01:00
parent e2f52473ea
commit 5b8b3a011d
1 changed files with 55 additions and 6 deletions
@@ -2,17 +2,45 @@
 # VibePod TTS server — start script
 # Syncs the uv environment, downloads the model on first run, then launches uvicorn.
 # Prerequisite: uv must be installed (https://docs.astral.sh/uv/getting-started/installation/)
+#
+# Usage:
+#   ./start.sh          — CUDA mode (default, uses PyTorch CUDA 12.4 wheel, venv: .venv)
+#   ./start.sh --cpu    — CPU-only mode (uses PyPI CPU torch wheel, venv: .venv-cpu)
+#
+# The two modes maintain completely separate virtual environments so their torch
+# installations never conflict. UV_PROJECT_ENVIRONMENT tells uv which venv to use;
+# --no-sources skips [tool.uv.sources] so the CPU run pulls the default PyPI torch wheel.

 set -euo pipefail

 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd "$SCRIPT_DIR"

+# ---------------------------------------------------------------------------
+# Parse flags
+# ---------------------------------------------------------------------------
+CPU_MODE=false
+PASSTHROUGH_ARGS=()
+
+for arg in "$@"; do
+    case "$arg" in
+        --cpu) CPU_MODE=true ;;
+        *)     PASSTHROUGH_ARGS+=("$arg") ;;
+    esac
+done
+
 echo "================================================"
 echo "  VibePod TTS Server"
+if $CPU_MODE; then
+    echo "  Mode : CPU-only"
+else
+    echo "  Mode : CUDA (default)"
+fi
 echo "================================================"

+# ---------------------------------------------------------------------------
 # 1. Check uv is available
+# ---------------------------------------------------------------------------
 if ! command -v uv &>/dev/null; then
    echo ""
    echo "ERROR: uv is not installed."
@@ -23,18 +51,39 @@ if ! command -v uv &>/dev/null; then
    exit 1
 fi

-# 2. Sync Python environment (creates .venv on first run, no-op afterwards)
+# ---------------------------------------------------------------------------
+# 2. Sync Python environment
+#    CPU mode:  use .venv-cpu and skip [tool.uv.sources] so uv pulls the
+#               default PyPI (CPU-only) torch wheel instead of the CUDA one.
+#    CUDA mode: standard uv sync — uses .venv and respects [tool.uv.sources].
+# ---------------------------------------------------------------------------
 echo ""
-echo "--> Syncing Python environment..."
+if $CPU_MODE; then
+    echo "--> Syncing CPU Python environment (.venv-cpu)..."
+    export UV_PROJECT_ENVIRONMENT=".venv-cpu"
+    uv sync --no-sources
+else
+    echo "--> Syncing CUDA Python environment (.venv)..."
    uv sync
+fi

-# 3. Start the server — model download + load happens inside the server process
-#    so the /health endpoint is reachable immediately and can report progress.
+# ---------------------------------------------------------------------------
+# 3. Launch uvicorn
+#    Pass DEVICE env var so the server can select the correct torch device.
+# ---------------------------------------------------------------------------
 echo ""
 echo "--> Starting uvicorn on http://0.0.0.0:8000"
 export PYTHONUTF8=1
+
+if $CPU_MODE; then
+    export VIBEPOD_DEVICE="cpu"
+    export UV_PROJECT_ENVIRONMENT=".venv-cpu"
+else
+    export VIBEPOD_DEVICE="cuda"
+fi
+
 exec uv run uvicorn vibevoice_server:app \
    --host 0.0.0.0 \
    --port 8000 \
    --log-level info \
-    "$@"
+    "${PASSTHROUGH_ARGS[@]+"${PASSTHROUGH_ARGS[@]}"}"