Create VibePod TTS podcast generator application

Agent-Logs-Url: https://github.com/JezzWTF/vibepod/sessions/a78fcf03-e979-4777-a428-18cc8eccc095

Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-04-27 15:41:46 +00:00
committed by GitHub
parent ee85bece74
commit 3974a4cf69
26 changed files with 3083 additions and 0 deletions
+195
View File
@@ -0,0 +1,195 @@
"use client";
import { useAudioPlayer } from "@/hooks/useAudioPlayer";
interface AudioPlayerProps {
audioUrl: string | null;
}
function formatTime(seconds: number): string {
if (!isFinite(seconds) || isNaN(seconds)) return "0:00";
const m = Math.floor(seconds / 60);
const s = Math.floor(seconds % 60);
return `${m}:${s.toString().padStart(2, "0")}`;
}
export default function AudioPlayer({ audioUrl }: AudioPlayerProps) {
const {
isPlaying,
currentTime,
duration,
volume,
toggle,
seek,
setVolume,
} = useAudioPlayer(audioUrl);
if (!audioUrl) return null;
const progress = duration > 0 ? (currentTime / duration) * 100 : 0;
const handleDownload = () => {
const a = document.createElement("a");
a.href = audioUrl;
a.download = "vibepod-output.wav";
a.click();
};
return (
<div
className="rounded-xl border p-5 flex flex-col gap-4"
style={{ background: "var(--card-bg)", borderColor: "var(--border)" }}
>
<div className="flex items-center justify-between">
<h2
className="text-sm font-semibold uppercase tracking-wider"
style={{ color: "var(--accent-teal)" }}
>
Audio Player
</h2>
<button
onClick={handleDownload}
className="flex items-center gap-2 text-xs px-3 py-1.5 rounded-lg border transition-colors cursor-pointer"
style={{
borderColor: "var(--accent-teal-dim)",
color: "var(--accent-teal)",
background: "rgba(45, 212, 191, 0.05)",
}}
onMouseEnter={(e) => {
(e.currentTarget as HTMLButtonElement).style.background =
"rgba(45, 212, 191, 0.15)";
}}
onMouseLeave={(e) => {
(e.currentTarget as HTMLButtonElement).style.background =
"rgba(45, 212, 191, 0.05)";
}}
>
<svg
className="w-3.5 h-3.5"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
>
<path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4" />
<polyline points="7 10 12 15 17 10" />
<line x1="12" y1="15" x2="12" y2="3" />
</svg>
Download WAV
</button>
</div>
{/* Waveform / progress bar */}
<div className="flex flex-col gap-2">
<div
className="relative h-2 rounded-full cursor-pointer overflow-hidden"
style={{ background: "var(--border)" }}
onClick={(e) => {
const rect = e.currentTarget.getBoundingClientRect();
const ratio = (e.clientX - rect.left) / rect.width;
seek(ratio * duration);
}}
>
<div
className="absolute inset-y-0 left-0 rounded-full transition-all"
style={{
width: `${progress}%`,
background:
"linear-gradient(90deg, var(--accent-teal-dim), var(--accent-violet-dim))",
}}
/>
</div>
<div
className="flex items-center justify-between text-xs font-mono"
style={{ color: "var(--muted)" }}
>
<span>{formatTime(currentTime)}</span>
<span>{formatTime(duration)}</span>
</div>
</div>
{/* Controls row */}
<div className="flex items-center gap-4">
{/* Play/Pause */}
<button
onClick={toggle}
className="w-10 h-10 rounded-full flex items-center justify-center transition-transform active:scale-95 cursor-pointer"
style={{
background:
"linear-gradient(135deg, var(--accent-teal-dim), var(--accent-violet-dim))",
boxShadow: "0 4px 12px rgba(45, 212, 191, 0.3)",
}}
aria-label={isPlaying ? "Pause" : "Play"}
>
{isPlaying ? (
<svg
className="w-4 h-4 text-white"
viewBox="0 0 24 24"
fill="currentColor"
>
<rect x="6" y="4" width="4" height="16" />
<rect x="14" y="4" width="4" height="16" />
</svg>
) : (
<svg
className="w-4 h-4 text-white"
viewBox="0 0 24 24"
fill="currentColor"
>
<polygon points="5 3 19 12 5 21 5 3" />
</svg>
)}
</button>
{/* Duration info */}
<div className="flex-1 flex items-center gap-1 text-sm">
<span style={{ color: "var(--foreground)" }}>
{formatTime(currentTime)}
</span>
<span style={{ color: "var(--muted)" }}>/</span>
<span style={{ color: "var(--muted)" }}>{formatTime(duration)}</span>
</div>
{/* Volume control */}
<div className="flex items-center gap-2">
<svg
className="w-4 h-4 flex-shrink-0"
style={{ color: "var(--muted)" }}
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
>
{volume === 0 ? (
<>
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5" />
<line x1="23" y1="9" x2="17" y2="15" />
<line x1="17" y1="9" x2="23" y2="15" />
</>
) : volume < 0.5 ? (
<>
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5" />
<path d="M15.54 8.46a5 5 0 010 7.07" />
</>
) : (
<>
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5" />
<path d="M19.07 4.93a10 10 0 010 14.14M15.54 8.46a5 5 0 010 7.07" />
</>
)}
</svg>
<input
type="range"
min={0}
max={1}
step={0.05}
value={volume}
onChange={(e) => setVolume(parseFloat(e.target.value))}
className="w-20"
aria-label="Volume"
/>
</div>
</div>
</div>
);
}
@@ -0,0 +1,193 @@
"use client";
interface GenerationControlsProps {
cfgScale: number;
onCfgScaleChange: (v: number) => void;
inferenceSteps: number;
onInferenceStepsChange: (v: number) => void;
onGenerate: () => void;
isGenerating: boolean;
wordCount: number;
}
export default function GenerationControls({
cfgScale,
onCfgScaleChange,
inferenceSteps,
onInferenceStepsChange,
onGenerate,
isGenerating,
wordCount,
}: GenerationControlsProps) {
const estimatedSeconds = Math.ceil(wordCount / 50);
const estimatedDisplay =
wordCount === 0
? "—"
: estimatedSeconds < 60
? `~${estimatedSeconds}s`
: `~${Math.floor(estimatedSeconds / 60)}m ${estimatedSeconds % 60}s`;
return (
<div
className="rounded-xl border p-5 flex flex-col gap-5"
style={{ background: "var(--card-bg)", borderColor: "var(--border)" }}
>
<h2
className="text-sm font-semibold uppercase tracking-wider"
style={{ color: "var(--accent-teal)" }}
>
Generation Settings
</h2>
{/* CFG Scale slider */}
<div className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<label className="text-sm font-medium" style={{ color: "var(--foreground)" }}>
Voice Expressiveness
</label>
<span
className="text-sm font-mono px-2 py-0.5 rounded"
style={{
background: "var(--background)",
color: "var(--accent-teal)",
}}
>
{cfgScale.toFixed(1)}
</span>
</div>
<input
type="range"
min={1.0}
max={3.0}
step={0.1}
value={cfgScale}
onChange={(e) => onCfgScaleChange(parseFloat(e.target.value))}
className="w-full"
/>
<div
className="flex items-center justify-between text-xs"
style={{ color: "var(--muted)" }}
>
<span>Flat (1.0)</span>
<span>CFG Scale</span>
<span>Expressive (3.0)</span>
</div>
</div>
{/* Inference Steps slider */}
<div className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<label className="text-sm font-medium" style={{ color: "var(--foreground)" }}>
Quality vs Speed
</label>
<span
className="text-sm font-mono px-2 py-0.5 rounded"
style={{
background: "var(--background)",
color: "var(--accent-violet)",
}}
>
{inferenceSteps}
</span>
</div>
<input
type="range"
min={10}
max={30}
step={1}
value={inferenceSteps}
onChange={(e) => onInferenceStepsChange(parseInt(e.target.value, 10))}
className="w-full"
style={
{
"--thumb-color": "var(--accent-violet)",
} as React.CSSProperties
}
/>
<div
className="flex items-center justify-between text-xs"
style={{ color: "var(--muted)" }}
>
<span>Faster (10)</span>
<span>Inference Steps</span>
<span>Higher quality (30)</span>
</div>
</div>
{/* Estimated time */}
<div
className="flex items-center justify-between px-3 py-2 rounded-lg text-sm"
style={{
background: "var(--background)",
border: "1px solid var(--border)",
}}
>
<span style={{ color: "var(--muted)" }}>Estimated generation time</span>
<span
className="font-mono font-medium"
style={{ color: "var(--accent-teal)" }}
>
{estimatedDisplay}
</span>
</div>
{/* Generate button */}
<button
onClick={onGenerate}
disabled={isGenerating || wordCount === 0}
className="w-full py-3 rounded-xl font-semibold text-sm transition-all cursor-pointer disabled:cursor-not-allowed flex items-center justify-center gap-2"
style={
isGenerating || wordCount === 0
? {
background: "var(--border)",
color: "var(--muted)",
}
: {
background:
"linear-gradient(135deg, var(--accent-teal-dim), var(--accent-violet-dim))",
color: "#fff",
boxShadow: "0 4px 15px rgba(45, 212, 191, 0.2)",
}
}
>
{isGenerating ? (
<>
<svg
className="animate-spin w-4 h-4"
viewBox="0 0 24 24"
fill="none"
>
<circle
className="opacity-25"
cx="12"
cy="12"
r="10"
stroke="currentColor"
strokeWidth="4"
/>
<path
className="opacity-75"
fill="currentColor"
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
/>
</svg>
Generating audio...
</>
) : (
<>
<svg
className="w-4 h-4"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
>
<polygon points="5 3 19 12 5 21 5 3" />
</svg>
Generate Podcast Audio
</>
)}
</button>
</div>
);
}
+106
View File
@@ -0,0 +1,106 @@
"use client";
import { useEffect, useState } from "react";
type ServerStatus = "checking" | "online" | "offline";
export default function Header() {
const [status, setStatus] = useState<ServerStatus>("checking");
useEffect(() => {
const checkHealth = async () => {
try {
const res = await fetch("/api/health");
const data = await res.json();
setStatus(data.status === "online" ? "online" : "offline");
} catch {
setStatus("offline");
}
};
checkHealth();
const interval = setInterval(checkHealth, 30000);
return () => clearInterval(interval);
}, []);
const statusConfig = {
checking: {
color: "bg-yellow-500",
label: "Checking...",
textColor: "text-yellow-400",
pulse: true,
},
online: {
color: "bg-green-500",
label: "Server Online",
textColor: "text-green-400",
pulse: false,
},
offline: {
color: "bg-red-500",
label: "Server Offline",
textColor: "text-red-400",
pulse: false,
},
};
const cfg = statusConfig[status];
return (
<header
className="border-b px-6 py-4 flex items-center justify-between"
style={{
background: "var(--card-bg)",
borderColor: "var(--border)",
}}
>
<div className="flex items-center gap-4">
<div className="flex items-center gap-3">
<div
className="w-9 h-9 rounded-xl flex items-center justify-center text-lg font-bold"
style={{
background:
"linear-gradient(135deg, var(--accent-teal-dim), var(--accent-violet-dim))",
}}
>
🎙
</div>
<div>
<h1
className="text-xl font-bold tracking-tight"
style={{
background:
"linear-gradient(135deg, var(--accent-teal), var(--accent-violet))",
WebkitBackgroundClip: "text",
WebkitTextFillColor: "transparent",
}}
>
VibePod
</h1>
<p className="text-xs" style={{ color: "var(--muted)" }}>
Powered by VibeVoice 0.5B
</p>
</div>
</div>
</div>
<div
className="flex items-center gap-2 px-3 py-1.5 rounded-full text-xs font-medium border"
style={{
background: "var(--background)",
borderColor: "var(--border)",
}}
>
<span className="relative flex h-2 w-2">
<span
className={`${cfg.pulse ? "animate-ping absolute inline-flex h-full w-full rounded-full opacity-75 " + cfg.color : "hidden"}`}
/>
<span
className={`relative inline-flex rounded-full h-2 w-2 ${cfg.color}`}
/>
</span>
<span style={{ color: "var(--foreground)" }}>{cfg.label}</span>
</div>
</header>
);
}
+76
View File
@@ -0,0 +1,76 @@
"use client";
import { useEffect, useRef } from "react";
interface StatusLogProps {
messages: string[];
}
export default function StatusLog({ messages }: StatusLogProps) {
const bottomRef = useRef<HTMLDivElement>(null);
useEffect(() => {
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
}, [messages]);
return (
<div
className="rounded-xl border p-5 flex flex-col gap-3"
style={{ background: "var(--card-bg)", borderColor: "var(--border)" }}
>
<div className="flex items-center gap-2">
<h2
className="text-sm font-semibold uppercase tracking-wider"
style={{ color: "var(--accent-teal)" }}
>
Status Log
</h2>
<div className="flex gap-1 ml-auto">
<span className="w-2.5 h-2.5 rounded-full bg-red-500 opacity-70" />
<span className="w-2.5 h-2.5 rounded-full bg-yellow-500 opacity-70" />
<span className="w-2.5 h-2.5 rounded-full bg-green-500 opacity-70" />
</div>
</div>
<div
className="rounded-lg p-4 h-40 overflow-y-auto font-mono text-xs leading-relaxed"
style={{
background: "var(--background)",
border: "1px solid var(--border)",
}}
>
{messages.length === 0 ? (
<p style={{ color: "var(--muted)" }}>
Waiting for input...
<span className="animate-pulse"></span>
</p>
) : (
messages.map((msg, i) => {
const isError =
msg.toLowerCase().includes("error") ||
msg.toLowerCase().includes("failed");
const isSuccess =
msg.toLowerCase().includes("done") ||
msg.toLowerCase().includes("complete") ||
msg.toLowerCase().includes("ready");
const color = isError
? "var(--error)"
: isSuccess
? "var(--success)"
: "var(--foreground)";
return (
<div key={i} className="flex items-start gap-2">
<span style={{ color: "var(--muted)" }} className="select-none">
{String(i + 1).padStart(2, "0")}
</span>
<span style={{ color }}>{msg}</span>
</div>
);
})
)}
<div ref={bottomRef} />
</div>
</div>
);
}
+112
View File
@@ -0,0 +1,112 @@
"use client";
const SAMPLE_SCRIPT = `Welcome to VibePod, your gateway to the future of audio content creation. Today, we're diving deep into the world of artificial intelligence and how it's transforming the way we produce and consume podcasts.
Imagine being able to transform any written article, blog post, or essay into a professional-sounding audio experience in just seconds. That's exactly what VibeVoice 0.5B brings to the table — a compact yet powerful text-to-speech model that delivers remarkably natural-sounding voices.
The technology behind modern TTS systems has evolved dramatically over the past few years. We've moved from robotic, stilted speech synthesis to voices that carry real emotional nuance and natural prosody. VibeVoice represents Microsoft's latest contribution to this rapidly advancing field.
Whether you're a content creator looking to repurpose written material, an educator who wants to make content more accessible, or a developer building the next generation of audio applications, VibePod provides the tools you need.
In today's episode, we'll explore the key features that make VibeVoice unique, discuss practical use cases across different industries, and look ahead to what the next generation of voice AI might bring. Let's get started.`;
interface TextInputPanelProps {
value: string;
onChange: (text: string) => void;
}
export default function TextInputPanel({
value,
onChange,
}: TextInputPanelProps) {
const charCount = value.length;
const wordCount = value.trim() === "" ? 0 : value.trim().split(/\s+/).length;
return (
<div
className="rounded-xl border p-5 flex flex-col gap-4"
style={{ background: "var(--card-bg)", borderColor: "var(--border)" }}
>
<div className="flex items-center justify-between">
<h2
className="text-sm font-semibold uppercase tracking-wider"
style={{ color: "var(--accent-teal)" }}
>
Podcast Script
</h2>
<div className="flex items-center gap-2">
<button
onClick={() => onChange(SAMPLE_SCRIPT)}
className="text-xs px-3 py-1.5 rounded-lg border transition-colors cursor-pointer"
style={{
borderColor: "var(--border)",
color: "var(--muted)",
}}
onMouseEnter={(e) => {
(e.target as HTMLButtonElement).style.color =
"var(--accent-violet)";
(e.target as HTMLButtonElement).style.borderColor =
"var(--accent-violet)";
}}
onMouseLeave={(e) => {
(e.target as HTMLButtonElement).style.color = "var(--muted)";
(e.target as HTMLButtonElement).style.borderColor =
"var(--border)";
}}
>
Load sample script
</button>
<button
onClick={() => onChange("")}
className="text-xs px-3 py-1.5 rounded-lg border transition-colors cursor-pointer"
style={{
borderColor: "var(--border)",
color: "var(--muted)",
}}
onMouseEnter={(e) => {
(e.target as HTMLButtonElement).style.color = "var(--error)";
(e.target as HTMLButtonElement).style.borderColor = "var(--error)";
}}
onMouseLeave={(e) => {
(e.target as HTMLButtonElement).style.color = "var(--muted)";
(e.target as HTMLButtonElement).style.borderColor =
"var(--border)";
}}
>
Clear
</button>
</div>
</div>
<textarea
value={value}
onChange={(e) => onChange(e.target.value)}
placeholder="Paste or type your podcast script here..."
rows={12}
className="w-full rounded-lg p-4 text-sm resize-y outline-none transition-colors font-sans leading-relaxed"
style={{
background: "var(--background)",
border: "1px solid var(--border)",
color: "var(--foreground)",
minHeight: "200px",
}}
onFocus={(e) => {
e.target.style.borderColor = "var(--accent-teal)";
}}
onBlur={(e) => {
e.target.style.borderColor = "var(--border)";
}}
/>
<div
className="flex items-center justify-between text-xs"
style={{ color: "var(--muted)" }}
>
<span>
{wordCount} word{wordCount !== 1 ? "s" : ""}
</span>
<span>{charCount.toLocaleString()} characters</span>
</div>
</div>
);
}