diff --git a/podcast-forge/.gitignore b/podcast-forge/.gitignore new file mode 100644 index 0000000..5ef6a52 --- /dev/null +++ b/podcast-forge/.gitignore @@ -0,0 +1,41 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/node_modules +/.pnp +.pnp.* +.yarn/* +!.yarn/patches +!.yarn/plugins +!.yarn/releases +!.yarn/versions + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* + +# env files (can opt-in for committing if needed) +.env* + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts diff --git a/podcast-forge/README.md b/podcast-forge/README.md new file mode 100644 index 0000000..e215bc4 --- /dev/null +++ b/podcast-forge/README.md @@ -0,0 +1,36 @@ +This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app). + +## Getting Started + +First, run the development server: + +```bash +npm run dev +# or +yarn dev +# or +pnpm dev +# or +bun dev +``` + +Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. + +You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file. + +This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel. + +## Learn More + +To learn more about Next.js, take a look at the following resources: + +- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. +- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. + +You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome! + +## Deploy on Vercel + +The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. + +Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details. diff --git a/podcast-forge/app/api/generate/route.ts b/podcast-forge/app/api/generate/route.ts new file mode 100644 index 0000000..3643196 --- /dev/null +++ b/podcast-forge/app/api/generate/route.ts @@ -0,0 +1,55 @@ +import { NextRequest, NextResponse } from "next/server"; + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + const { text, cfg_scale, inference_steps } = body as { + text: string; + cfg_scale: number; + inference_steps: number; + }; + + if (!text || typeof text !== "string" || text.trim().length === 0) { + return NextResponse.json( + { error: "Missing or empty text field" }, + { status: 400 } + ); + } + + const pythonServerUrl = + process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000"; + + const upstream = await fetch(`${pythonServerUrl}/generate`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + text: text.trim(), + cfg_scale: cfg_scale ?? 2.5, + inference_steps: inference_steps ?? 20, + }), + }); + + if (!upstream.ok) { + const errorText = await upstream.text().catch(() => "Unknown error"); + return NextResponse.json( + { error: `VibeVoice server error: ${errorText}` }, + { status: upstream.status } + ); + } + + const audioBuffer = await upstream.arrayBuffer(); + + return new NextResponse(audioBuffer, { + status: 200, + headers: { + "Content-Type": "audio/wav", + "Content-Disposition": 'attachment; filename="vibepod-output.wav"', + "Cache-Control": "no-store", + }, + }); + } catch (err) { + const message = + err instanceof Error ? err.message : "Failed to connect to VibeVoice server"; + return NextResponse.json({ error: message }, { status: 502 }); + } +} diff --git a/podcast-forge/app/api/health/route.ts b/podcast-forge/app/api/health/route.ts new file mode 100644 index 0000000..60d4347 --- /dev/null +++ b/podcast-forge/app/api/health/route.ts @@ -0,0 +1,20 @@ +import { NextResponse } from "next/server"; + +export async function GET() { + const pythonServerUrl = + process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000"; + + try { + const res = await fetch(`${pythonServerUrl}/health`, { + method: "GET", + signal: AbortSignal.timeout(4000), + }); + + if (res.ok) { + return NextResponse.json({ status: "online" }); + } + return NextResponse.json({ status: "offline" }); + } catch { + return NextResponse.json({ status: "offline" }); + } +} diff --git a/podcast-forge/app/favicon.ico b/podcast-forge/app/favicon.ico new file mode 100644 index 0000000..718d6fe Binary files /dev/null and b/podcast-forge/app/favicon.ico differ diff --git a/podcast-forge/app/globals.css b/podcast-forge/app/globals.css new file mode 100644 index 0000000..9388e7f --- /dev/null +++ b/podcast-forge/app/globals.css @@ -0,0 +1,87 @@ +@import "tailwindcss"; + +:root { + --background: #0d1117; + --foreground: #e2e8f0; + --card-bg: #161b22; + --border: #21262d; + --accent-teal: #2dd4bf; + --accent-violet: #a78bfa; + --accent-teal-dim: #0d9488; + --accent-violet-dim: #7c3aed; + --muted: #64748b; + --success: #22c55e; + --error: #ef4444; + --font-sans: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + --font-mono: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, "Liberation Mono", monospace; +} + +@theme inline { + --color-background: var(--background); + --color-foreground: var(--foreground); + --font-sans: var(--font-sans); + --font-mono: var(--font-mono); +} + +body { + background: var(--background); + color: var(--foreground); + font-family: var(--font-sans); + min-height: 100vh; +} + +/* Custom scrollbar */ +::-webkit-scrollbar { + width: 6px; + height: 6px; +} +::-webkit-scrollbar-track { + background: var(--card-bg); +} +::-webkit-scrollbar-thumb { + background: var(--border); + border-radius: 3px; +} +::-webkit-scrollbar-thumb:hover { + background: var(--muted); +} + +/* Range input styling */ +input[type="range"] { + -webkit-appearance: none; + appearance: none; + background: transparent; + cursor: pointer; +} +input[type="range"]::-webkit-slider-runnable-track { + background: var(--border); + height: 4px; + border-radius: 2px; +} +input[type="range"]::-webkit-slider-thumb { + -webkit-appearance: none; + appearance: none; + width: 16px; + height: 16px; + border-radius: 50%; + background: var(--accent-teal); + margin-top: -6px; + box-shadow: 0 0 6px rgba(45, 212, 191, 0.4); + transition: box-shadow 0.15s ease; +} +input[type="range"]:hover::-webkit-slider-thumb { + box-shadow: 0 0 10px rgba(45, 212, 191, 0.7); +} +input[type="range"]::-moz-range-track { + background: var(--border); + height: 4px; + border-radius: 2px; +} +input[type="range"]::-moz-range-thumb { + width: 16px; + height: 16px; + border-radius: 50%; + background: var(--accent-teal); + border: none; + box-shadow: 0 0 6px rgba(45, 212, 191, 0.4); +} diff --git a/podcast-forge/app/layout.tsx b/podcast-forge/app/layout.tsx new file mode 100644 index 0000000..226e942 --- /dev/null +++ b/podcast-forge/app/layout.tsx @@ -0,0 +1,21 @@ +import type { Metadata } from "next"; +import "./globals.css"; + +export const metadata: Metadata = { + title: "VibePod — TTS Podcast Generator", + description: "Generate podcast audio using Microsoft VibeVoice 0.5B", +}; + +export default function RootLayout({ + children, +}: Readonly<{ + children: React.ReactNode; +}>) { + return ( + + + {children} + + + ); +} diff --git a/podcast-forge/app/page.tsx b/podcast-forge/app/page.tsx new file mode 100644 index 0000000..81b2142 --- /dev/null +++ b/podcast-forge/app/page.tsx @@ -0,0 +1,168 @@ +"use client"; + +import { useReducer, useCallback } from "react"; +import Header from "@/components/Header"; +import TextInputPanel from "@/components/TextInputPanel"; +import GenerationControls from "@/components/GenerationControls"; +import AudioPlayer from "@/components/AudioPlayer"; +import StatusLog from "@/components/StatusLog"; + +interface AppState { + script: string; + cfgScale: number; + inferenceSteps: number; + isGenerating: boolean; + audioUrl: string | null; + logs: string[]; +} + +type AppAction = + | { type: "SET_SCRIPT"; payload: string } + | { type: "SET_CFG_SCALE"; payload: number } + | { type: "SET_INFERENCE_STEPS"; payload: number } + | { type: "START_GENERATION" } + | { type: "GENERATION_SUCCESS"; payload: string } + | { type: "GENERATION_ERROR"; payload: string } + | { type: "ADD_LOG"; payload: string }; + +function appReducer(state: AppState, action: AppAction): AppState { + switch (action.type) { + case "SET_SCRIPT": + return { ...state, script: action.payload }; + case "SET_CFG_SCALE": + return { ...state, cfgScale: action.payload }; + case "SET_INFERENCE_STEPS": + return { ...state, inferenceSteps: action.payload }; + case "START_GENERATION": + return { + ...state, + isGenerating: true, + audioUrl: null, + logs: [], + }; + case "GENERATION_SUCCESS": + return { + ...state, + isGenerating: false, + audioUrl: action.payload, + }; + case "GENERATION_ERROR": + return { + ...state, + isGenerating: false, + }; + case "ADD_LOG": + return { ...state, logs: [...state.logs, action.payload] }; + default: + return state; + } +} + +const initialState: AppState = { + script: "", + cfgScale: 2.5, + inferenceSteps: 20, + isGenerating: false, + audioUrl: null, + logs: [], +}; + +export default function HomePage() { + const [state, dispatch] = useReducer(appReducer, initialState); + + const wordCount = + state.script.trim() === "" + ? 0 + : state.script.trim().split(/\s+/).length; + + const addLog = useCallback((msg: string) => { + dispatch({ type: "ADD_LOG", payload: msg }); + }, []); + + const handleGenerate = useCallback(async () => { + if (!state.script.trim() || state.isGenerating) return; + + dispatch({ type: "START_GENERATION" }); + addLog("Connecting to VibeVoice server..."); + + try { + addLog(`Sending script (${wordCount} words) for synthesis...`); + addLog( + `Settings: CFG=${state.cfgScale.toFixed(1)}, Steps=${state.inferenceSteps}` + ); + + const res = await fetch("/api/generate", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + text: state.script, + cfg_scale: state.cfgScale, + inference_steps: state.inferenceSteps, + }), + }); + + if (!res.ok) { + const err = await res.json().catch(() => ({ error: res.statusText })); + throw new Error(err.error ?? `HTTP ${res.status}`); + } + + addLog("Generating audio..."); + + const blob = await res.blob(); + const url = URL.createObjectURL(blob); + + const sizeMB = (blob.size / 1024 / 1024).toFixed(2); + addLog(`Audio received — ${sizeMB} MB`); + addLog("Done — audio ready for playback."); + + dispatch({ type: "GENERATION_SUCCESS", payload: url }); + } catch (err) { + const message = + err instanceof Error ? err.message : "Unknown error occurred"; + addLog(`Error: ${message}`); + dispatch({ type: "GENERATION_ERROR", payload: message }); + } + }, [state.script, state.cfgScale, state.inferenceSteps, state.isGenerating, wordCount, addLog]); + + return ( +
+
+ +
+
+ {/* Left column: script input */} +
+ + dispatch({ type: "SET_SCRIPT", payload: text }) + } + /> + {state.audioUrl && } +
+ + {/* Right column: controls + log */} +
+ + dispatch({ type: "SET_CFG_SCALE", payload: v }) + } + inferenceSteps={state.inferenceSteps} + onInferenceStepsChange={(v) => + dispatch({ type: "SET_INFERENCE_STEPS", payload: v }) + } + onGenerate={handleGenerate} + isGenerating={state.isGenerating} + wordCount={wordCount} + /> + +
+
+
+
+ ); +} diff --git a/podcast-forge/components/AudioPlayer.tsx b/podcast-forge/components/AudioPlayer.tsx new file mode 100644 index 0000000..f54f25e --- /dev/null +++ b/podcast-forge/components/AudioPlayer.tsx @@ -0,0 +1,195 @@ +"use client"; + +import { useAudioPlayer } from "@/hooks/useAudioPlayer"; + +interface AudioPlayerProps { + audioUrl: string | null; +} + +function formatTime(seconds: number): string { + if (!isFinite(seconds) || isNaN(seconds)) return "0:00"; + const m = Math.floor(seconds / 60); + const s = Math.floor(seconds % 60); + return `${m}:${s.toString().padStart(2, "0")}`; +} + +export default function AudioPlayer({ audioUrl }: AudioPlayerProps) { + const { + isPlaying, + currentTime, + duration, + volume, + toggle, + seek, + setVolume, + } = useAudioPlayer(audioUrl); + + if (!audioUrl) return null; + + const progress = duration > 0 ? (currentTime / duration) * 100 : 0; + + const handleDownload = () => { + const a = document.createElement("a"); + a.href = audioUrl; + a.download = "vibepod-output.wav"; + a.click(); + }; + + return ( +
+
+

+ Audio Player +

+ +
+ + {/* Waveform / progress bar */} +
+
{ + const rect = e.currentTarget.getBoundingClientRect(); + const ratio = (e.clientX - rect.left) / rect.width; + seek(ratio * duration); + }} + > +
+
+
+ {formatTime(currentTime)} + {formatTime(duration)} +
+
+ + {/* Controls row */} +
+ {/* Play/Pause */} + + + {/* Duration info */} +
+ + {formatTime(currentTime)} + + / + {formatTime(duration)} +
+ + {/* Volume control */} +
+ + {volume === 0 ? ( + <> + + + + + ) : volume < 0.5 ? ( + <> + + + + ) : ( + <> + + + + )} + + setVolume(parseFloat(e.target.value))} + className="w-20" + aria-label="Volume" + /> +
+
+
+ ); +} diff --git a/podcast-forge/components/GenerationControls.tsx b/podcast-forge/components/GenerationControls.tsx new file mode 100644 index 0000000..78e8681 --- /dev/null +++ b/podcast-forge/components/GenerationControls.tsx @@ -0,0 +1,193 @@ +"use client"; + +interface GenerationControlsProps { + cfgScale: number; + onCfgScaleChange: (v: number) => void; + inferenceSteps: number; + onInferenceStepsChange: (v: number) => void; + onGenerate: () => void; + isGenerating: boolean; + wordCount: number; +} + +export default function GenerationControls({ + cfgScale, + onCfgScaleChange, + inferenceSteps, + onInferenceStepsChange, + onGenerate, + isGenerating, + wordCount, +}: GenerationControlsProps) { + const estimatedSeconds = Math.ceil(wordCount / 50); + const estimatedDisplay = + wordCount === 0 + ? "—" + : estimatedSeconds < 60 + ? `~${estimatedSeconds}s` + : `~${Math.floor(estimatedSeconds / 60)}m ${estimatedSeconds % 60}s`; + + return ( +
+

+ Generation Settings +

+ + {/* CFG Scale slider */} +
+
+ + + {cfgScale.toFixed(1)} + +
+ onCfgScaleChange(parseFloat(e.target.value))} + className="w-full" + /> +
+ Flat (1.0) + CFG Scale + Expressive (3.0) +
+
+ + {/* Inference Steps slider */} +
+
+ + + {inferenceSteps} + +
+ onInferenceStepsChange(parseInt(e.target.value, 10))} + className="w-full" + style={ + { + "--thumb-color": "var(--accent-violet)", + } as React.CSSProperties + } + /> +
+ Faster (10) + Inference Steps + Higher quality (30) +
+
+ + {/* Estimated time */} +
+ Estimated generation time + + {estimatedDisplay} + +
+ + {/* Generate button */} + +
+ ); +} diff --git a/podcast-forge/components/Header.tsx b/podcast-forge/components/Header.tsx new file mode 100644 index 0000000..b60d278 --- /dev/null +++ b/podcast-forge/components/Header.tsx @@ -0,0 +1,106 @@ +"use client"; + +import { useEffect, useState } from "react"; + +type ServerStatus = "checking" | "online" | "offline"; + +export default function Header() { + const [status, setStatus] = useState("checking"); + + useEffect(() => { + const checkHealth = async () => { + try { + const res = await fetch("/api/health"); + const data = await res.json(); + setStatus(data.status === "online" ? "online" : "offline"); + } catch { + setStatus("offline"); + } + }; + + checkHealth(); + const interval = setInterval(checkHealth, 30000); + return () => clearInterval(interval); + }, []); + + const statusConfig = { + checking: { + color: "bg-yellow-500", + label: "Checking...", + textColor: "text-yellow-400", + pulse: true, + }, + online: { + color: "bg-green-500", + label: "Server Online", + textColor: "text-green-400", + pulse: false, + }, + offline: { + color: "bg-red-500", + label: "Server Offline", + textColor: "text-red-400", + pulse: false, + }, + }; + + const cfg = statusConfig[status]; + + return ( +
+
+
+
+ 🎙 +
+
+

+ VibePod +

+

+ Powered by VibeVoice 0.5B +

+
+
+
+ +
+ + + + + {cfg.label} +
+
+ ); +} diff --git a/podcast-forge/components/StatusLog.tsx b/podcast-forge/components/StatusLog.tsx new file mode 100644 index 0000000..95155e5 --- /dev/null +++ b/podcast-forge/components/StatusLog.tsx @@ -0,0 +1,76 @@ +"use client"; + +import { useEffect, useRef } from "react"; + +interface StatusLogProps { + messages: string[]; +} + +export default function StatusLog({ messages }: StatusLogProps) { + const bottomRef = useRef(null); + + useEffect(() => { + bottomRef.current?.scrollIntoView({ behavior: "smooth" }); + }, [messages]); + + return ( +
+
+

+ Status Log +

+
+ + + +
+
+ +
+ {messages.length === 0 ? ( +

+ Waiting for input... + +

+ ) : ( + messages.map((msg, i) => { + const isError = + msg.toLowerCase().includes("error") || + msg.toLowerCase().includes("failed"); + const isSuccess = + msg.toLowerCase().includes("done") || + msg.toLowerCase().includes("complete") || + msg.toLowerCase().includes("ready"); + const color = isError + ? "var(--error)" + : isSuccess + ? "var(--success)" + : "var(--foreground)"; + + return ( +
+ + {String(i + 1).padStart(2, "0")} + + {msg} +
+ ); + }) + )} +
+
+
+ ); +} diff --git a/podcast-forge/components/TextInputPanel.tsx b/podcast-forge/components/TextInputPanel.tsx new file mode 100644 index 0000000..347564e --- /dev/null +++ b/podcast-forge/components/TextInputPanel.tsx @@ -0,0 +1,112 @@ +"use client"; + +const SAMPLE_SCRIPT = `Welcome to VibePod, your gateway to the future of audio content creation. Today, we're diving deep into the world of artificial intelligence and how it's transforming the way we produce and consume podcasts. + +Imagine being able to transform any written article, blog post, or essay into a professional-sounding audio experience in just seconds. That's exactly what VibeVoice 0.5B brings to the table — a compact yet powerful text-to-speech model that delivers remarkably natural-sounding voices. + +The technology behind modern TTS systems has evolved dramatically over the past few years. We've moved from robotic, stilted speech synthesis to voices that carry real emotional nuance and natural prosody. VibeVoice represents Microsoft's latest contribution to this rapidly advancing field. + +Whether you're a content creator looking to repurpose written material, an educator who wants to make content more accessible, or a developer building the next generation of audio applications, VibePod provides the tools you need. + +In today's episode, we'll explore the key features that make VibeVoice unique, discuss practical use cases across different industries, and look ahead to what the next generation of voice AI might bring. Let's get started.`; + +interface TextInputPanelProps { + value: string; + onChange: (text: string) => void; +} + +export default function TextInputPanel({ + value, + onChange, +}: TextInputPanelProps) { + const charCount = value.length; + const wordCount = value.trim() === "" ? 0 : value.trim().split(/\s+/).length; + + return ( +
+
+

+ Podcast Script +

+
+ + +
+
+ +