mirror of
https://github.com/JezzWTF/vibepod.git
synced 2026-06-01 15:22:14 +00:00
Create VibePod TTS podcast generator application
Agent-Logs-Url: https://github.com/JezzWTF/vibepod/sessions/a78fcf03-e979-4777-a428-18cc8eccc095 Co-authored-by: LyAhn <27559362+LyAhn@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
ee85bece74
commit
3974a4cf69
@@ -0,0 +1,41 @@
|
|||||||
|
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||||
|
|
||||||
|
# dependencies
|
||||||
|
/node_modules
|
||||||
|
/.pnp
|
||||||
|
.pnp.*
|
||||||
|
.yarn/*
|
||||||
|
!.yarn/patches
|
||||||
|
!.yarn/plugins
|
||||||
|
!.yarn/releases
|
||||||
|
!.yarn/versions
|
||||||
|
|
||||||
|
# testing
|
||||||
|
/coverage
|
||||||
|
|
||||||
|
# next.js
|
||||||
|
/.next/
|
||||||
|
/out/
|
||||||
|
|
||||||
|
# production
|
||||||
|
/build
|
||||||
|
|
||||||
|
# misc
|
||||||
|
.DS_Store
|
||||||
|
*.pem
|
||||||
|
|
||||||
|
# debug
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
.pnpm-debug.log*
|
||||||
|
|
||||||
|
# env files (can opt-in for committing if needed)
|
||||||
|
.env*
|
||||||
|
|
||||||
|
# vercel
|
||||||
|
.vercel
|
||||||
|
|
||||||
|
# typescript
|
||||||
|
*.tsbuildinfo
|
||||||
|
next-env.d.ts
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
First, run the development server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run dev
|
||||||
|
# or
|
||||||
|
yarn dev
|
||||||
|
# or
|
||||||
|
pnpm dev
|
||||||
|
# or
|
||||||
|
bun dev
|
||||||
|
```
|
||||||
|
|
||||||
|
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
|
||||||
|
|
||||||
|
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
|
||||||
|
|
||||||
|
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
|
||||||
|
|
||||||
|
## Learn More
|
||||||
|
|
||||||
|
To learn more about Next.js, take a look at the following resources:
|
||||||
|
|
||||||
|
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
|
||||||
|
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
|
||||||
|
|
||||||
|
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
|
||||||
|
|
||||||
|
## Deploy on Vercel
|
||||||
|
|
||||||
|
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
|
||||||
|
|
||||||
|
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
|
||||||
|
export async function POST(request: NextRequest) {
|
||||||
|
try {
|
||||||
|
const body = await request.json();
|
||||||
|
const { text, cfg_scale, inference_steps } = body as {
|
||||||
|
text: string;
|
||||||
|
cfg_scale: number;
|
||||||
|
inference_steps: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!text || typeof text !== "string" || text.trim().length === 0) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "Missing or empty text field" },
|
||||||
|
{ status: 400 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const pythonServerUrl =
|
||||||
|
process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000";
|
||||||
|
|
||||||
|
const upstream = await fetch(`${pythonServerUrl}/generate`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
text: text.trim(),
|
||||||
|
cfg_scale: cfg_scale ?? 2.5,
|
||||||
|
inference_steps: inference_steps ?? 20,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!upstream.ok) {
|
||||||
|
const errorText = await upstream.text().catch(() => "Unknown error");
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: `VibeVoice server error: ${errorText}` },
|
||||||
|
{ status: upstream.status }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const audioBuffer = await upstream.arrayBuffer();
|
||||||
|
|
||||||
|
return new NextResponse(audioBuffer, {
|
||||||
|
status: 200,
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "audio/wav",
|
||||||
|
"Content-Disposition": 'attachment; filename="vibepod-output.wav"',
|
||||||
|
"Cache-Control": "no-store",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
const message =
|
||||||
|
err instanceof Error ? err.message : "Failed to connect to VibeVoice server";
|
||||||
|
return NextResponse.json({ error: message }, { status: 502 });
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
import { NextResponse } from "next/server";
|
||||||
|
|
||||||
|
export async function GET() {
|
||||||
|
const pythonServerUrl =
|
||||||
|
process.env.VIBEVOICE_SERVER_URL ?? "http://localhost:8000";
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${pythonServerUrl}/health`, {
|
||||||
|
method: "GET",
|
||||||
|
signal: AbortSignal.timeout(4000),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return NextResponse.json({ status: "online" });
|
||||||
|
}
|
||||||
|
return NextResponse.json({ status: "offline" });
|
||||||
|
} catch {
|
||||||
|
return NextResponse.json({ status: "offline" });
|
||||||
|
}
|
||||||
|
}
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 25 KiB |
@@ -0,0 +1,87 @@
|
|||||||
|
@import "tailwindcss";
|
||||||
|
|
||||||
|
:root {
|
||||||
|
--background: #0d1117;
|
||||||
|
--foreground: #e2e8f0;
|
||||||
|
--card-bg: #161b22;
|
||||||
|
--border: #21262d;
|
||||||
|
--accent-teal: #2dd4bf;
|
||||||
|
--accent-violet: #a78bfa;
|
||||||
|
--accent-teal-dim: #0d9488;
|
||||||
|
--accent-violet-dim: #7c3aed;
|
||||||
|
--muted: #64748b;
|
||||||
|
--success: #22c55e;
|
||||||
|
--error: #ef4444;
|
||||||
|
--font-sans: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
||||||
|
--font-mono: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, "Liberation Mono", monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
@theme inline {
|
||||||
|
--color-background: var(--background);
|
||||||
|
--color-foreground: var(--foreground);
|
||||||
|
--font-sans: var(--font-sans);
|
||||||
|
--font-mono: var(--font-mono);
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
background: var(--background);
|
||||||
|
color: var(--foreground);
|
||||||
|
font-family: var(--font-sans);
|
||||||
|
min-height: 100vh;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Custom scrollbar */
|
||||||
|
::-webkit-scrollbar {
|
||||||
|
width: 6px;
|
||||||
|
height: 6px;
|
||||||
|
}
|
||||||
|
::-webkit-scrollbar-track {
|
||||||
|
background: var(--card-bg);
|
||||||
|
}
|
||||||
|
::-webkit-scrollbar-thumb {
|
||||||
|
background: var(--border);
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
::-webkit-scrollbar-thumb:hover {
|
||||||
|
background: var(--muted);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Range input styling */
|
||||||
|
input[type="range"] {
|
||||||
|
-webkit-appearance: none;
|
||||||
|
appearance: none;
|
||||||
|
background: transparent;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
input[type="range"]::-webkit-slider-runnable-track {
|
||||||
|
background: var(--border);
|
||||||
|
height: 4px;
|
||||||
|
border-radius: 2px;
|
||||||
|
}
|
||||||
|
input[type="range"]::-webkit-slider-thumb {
|
||||||
|
-webkit-appearance: none;
|
||||||
|
appearance: none;
|
||||||
|
width: 16px;
|
||||||
|
height: 16px;
|
||||||
|
border-radius: 50%;
|
||||||
|
background: var(--accent-teal);
|
||||||
|
margin-top: -6px;
|
||||||
|
box-shadow: 0 0 6px rgba(45, 212, 191, 0.4);
|
||||||
|
transition: box-shadow 0.15s ease;
|
||||||
|
}
|
||||||
|
input[type="range"]:hover::-webkit-slider-thumb {
|
||||||
|
box-shadow: 0 0 10px rgba(45, 212, 191, 0.7);
|
||||||
|
}
|
||||||
|
input[type="range"]::-moz-range-track {
|
||||||
|
background: var(--border);
|
||||||
|
height: 4px;
|
||||||
|
border-radius: 2px;
|
||||||
|
}
|
||||||
|
input[type="range"]::-moz-range-thumb {
|
||||||
|
width: 16px;
|
||||||
|
height: 16px;
|
||||||
|
border-radius: 50%;
|
||||||
|
background: var(--accent-teal);
|
||||||
|
border: none;
|
||||||
|
box-shadow: 0 0 6px rgba(45, 212, 191, 0.4);
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
import type { Metadata } from "next";
|
||||||
|
import "./globals.css";
|
||||||
|
|
||||||
|
export const metadata: Metadata = {
|
||||||
|
title: "VibePod — TTS Podcast Generator",
|
||||||
|
description: "Generate podcast audio using Microsoft VibeVoice 0.5B",
|
||||||
|
};
|
||||||
|
|
||||||
|
export default function RootLayout({
|
||||||
|
children,
|
||||||
|
}: Readonly<{
|
||||||
|
children: React.ReactNode;
|
||||||
|
}>) {
|
||||||
|
return (
|
||||||
|
<html lang="en">
|
||||||
|
<body style={{ background: "var(--background)", color: "var(--foreground)" }}>
|
||||||
|
{children}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -0,0 +1,168 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useReducer, useCallback } from "react";
|
||||||
|
import Header from "@/components/Header";
|
||||||
|
import TextInputPanel from "@/components/TextInputPanel";
|
||||||
|
import GenerationControls from "@/components/GenerationControls";
|
||||||
|
import AudioPlayer from "@/components/AudioPlayer";
|
||||||
|
import StatusLog from "@/components/StatusLog";
|
||||||
|
|
||||||
|
interface AppState {
|
||||||
|
script: string;
|
||||||
|
cfgScale: number;
|
||||||
|
inferenceSteps: number;
|
||||||
|
isGenerating: boolean;
|
||||||
|
audioUrl: string | null;
|
||||||
|
logs: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
type AppAction =
|
||||||
|
| { type: "SET_SCRIPT"; payload: string }
|
||||||
|
| { type: "SET_CFG_SCALE"; payload: number }
|
||||||
|
| { type: "SET_INFERENCE_STEPS"; payload: number }
|
||||||
|
| { type: "START_GENERATION" }
|
||||||
|
| { type: "GENERATION_SUCCESS"; payload: string }
|
||||||
|
| { type: "GENERATION_ERROR"; payload: string }
|
||||||
|
| { type: "ADD_LOG"; payload: string };
|
||||||
|
|
||||||
|
function appReducer(state: AppState, action: AppAction): AppState {
|
||||||
|
switch (action.type) {
|
||||||
|
case "SET_SCRIPT":
|
||||||
|
return { ...state, script: action.payload };
|
||||||
|
case "SET_CFG_SCALE":
|
||||||
|
return { ...state, cfgScale: action.payload };
|
||||||
|
case "SET_INFERENCE_STEPS":
|
||||||
|
return { ...state, inferenceSteps: action.payload };
|
||||||
|
case "START_GENERATION":
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
isGenerating: true,
|
||||||
|
audioUrl: null,
|
||||||
|
logs: [],
|
||||||
|
};
|
||||||
|
case "GENERATION_SUCCESS":
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
isGenerating: false,
|
||||||
|
audioUrl: action.payload,
|
||||||
|
};
|
||||||
|
case "GENERATION_ERROR":
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
isGenerating: false,
|
||||||
|
};
|
||||||
|
case "ADD_LOG":
|
||||||
|
return { ...state, logs: [...state.logs, action.payload] };
|
||||||
|
default:
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const initialState: AppState = {
|
||||||
|
script: "",
|
||||||
|
cfgScale: 2.5,
|
||||||
|
inferenceSteps: 20,
|
||||||
|
isGenerating: false,
|
||||||
|
audioUrl: null,
|
||||||
|
logs: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
export default function HomePage() {
|
||||||
|
const [state, dispatch] = useReducer(appReducer, initialState);
|
||||||
|
|
||||||
|
const wordCount =
|
||||||
|
state.script.trim() === ""
|
||||||
|
? 0
|
||||||
|
: state.script.trim().split(/\s+/).length;
|
||||||
|
|
||||||
|
const addLog = useCallback((msg: string) => {
|
||||||
|
dispatch({ type: "ADD_LOG", payload: msg });
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const handleGenerate = useCallback(async () => {
|
||||||
|
if (!state.script.trim() || state.isGenerating) return;
|
||||||
|
|
||||||
|
dispatch({ type: "START_GENERATION" });
|
||||||
|
addLog("Connecting to VibeVoice server...");
|
||||||
|
|
||||||
|
try {
|
||||||
|
addLog(`Sending script (${wordCount} words) for synthesis...`);
|
||||||
|
addLog(
|
||||||
|
`Settings: CFG=${state.cfgScale.toFixed(1)}, Steps=${state.inferenceSteps}`
|
||||||
|
);
|
||||||
|
|
||||||
|
const res = await fetch("/api/generate", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
text: state.script,
|
||||||
|
cfg_scale: state.cfgScale,
|
||||||
|
inference_steps: state.inferenceSteps,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const err = await res.json().catch(() => ({ error: res.statusText }));
|
||||||
|
throw new Error(err.error ?? `HTTP ${res.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
addLog("Generating audio...");
|
||||||
|
|
||||||
|
const blob = await res.blob();
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
|
||||||
|
const sizeMB = (blob.size / 1024 / 1024).toFixed(2);
|
||||||
|
addLog(`Audio received — ${sizeMB} MB`);
|
||||||
|
addLog("Done — audio ready for playback.");
|
||||||
|
|
||||||
|
dispatch({ type: "GENERATION_SUCCESS", payload: url });
|
||||||
|
} catch (err) {
|
||||||
|
const message =
|
||||||
|
err instanceof Error ? err.message : "Unknown error occurred";
|
||||||
|
addLog(`Error: ${message}`);
|
||||||
|
dispatch({ type: "GENERATION_ERROR", payload: message });
|
||||||
|
}
|
||||||
|
}, [state.script, state.cfgScale, state.inferenceSteps, state.isGenerating, wordCount, addLog]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className="min-h-screen flex flex-col"
|
||||||
|
style={{ background: "var(--background)" }}
|
||||||
|
>
|
||||||
|
<Header />
|
||||||
|
|
||||||
|
<main className="flex-1 container mx-auto px-4 py-6 max-w-6xl">
|
||||||
|
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
||||||
|
{/* Left column: script input */}
|
||||||
|
<div className="lg:col-span-2 flex flex-col gap-6">
|
||||||
|
<TextInputPanel
|
||||||
|
value={state.script}
|
||||||
|
onChange={(text) =>
|
||||||
|
dispatch({ type: "SET_SCRIPT", payload: text })
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
{state.audioUrl && <AudioPlayer audioUrl={state.audioUrl} />}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Right column: controls + log */}
|
||||||
|
<div className="flex flex-col gap-6">
|
||||||
|
<GenerationControls
|
||||||
|
cfgScale={state.cfgScale}
|
||||||
|
onCfgScaleChange={(v) =>
|
||||||
|
dispatch({ type: "SET_CFG_SCALE", payload: v })
|
||||||
|
}
|
||||||
|
inferenceSteps={state.inferenceSteps}
|
||||||
|
onInferenceStepsChange={(v) =>
|
||||||
|
dispatch({ type: "SET_INFERENCE_STEPS", payload: v })
|
||||||
|
}
|
||||||
|
onGenerate={handleGenerate}
|
||||||
|
isGenerating={state.isGenerating}
|
||||||
|
wordCount={wordCount}
|
||||||
|
/>
|
||||||
|
<StatusLog messages={state.logs} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -0,0 +1,195 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useAudioPlayer } from "@/hooks/useAudioPlayer";
|
||||||
|
|
||||||
|
interface AudioPlayerProps {
|
||||||
|
audioUrl: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatTime(seconds: number): string {
|
||||||
|
if (!isFinite(seconds) || isNaN(seconds)) return "0:00";
|
||||||
|
const m = Math.floor(seconds / 60);
|
||||||
|
const s = Math.floor(seconds % 60);
|
||||||
|
return `${m}:${s.toString().padStart(2, "0")}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function AudioPlayer({ audioUrl }: AudioPlayerProps) {
|
||||||
|
const {
|
||||||
|
isPlaying,
|
||||||
|
currentTime,
|
||||||
|
duration,
|
||||||
|
volume,
|
||||||
|
toggle,
|
||||||
|
seek,
|
||||||
|
setVolume,
|
||||||
|
} = useAudioPlayer(audioUrl);
|
||||||
|
|
||||||
|
if (!audioUrl) return null;
|
||||||
|
|
||||||
|
const progress = duration > 0 ? (currentTime / duration) * 100 : 0;
|
||||||
|
|
||||||
|
const handleDownload = () => {
|
||||||
|
const a = document.createElement("a");
|
||||||
|
a.href = audioUrl;
|
||||||
|
a.download = "vibepod-output.wav";
|
||||||
|
a.click();
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className="rounded-xl border p-5 flex flex-col gap-4"
|
||||||
|
style={{ background: "var(--card-bg)", borderColor: "var(--border)" }}
|
||||||
|
>
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<h2
|
||||||
|
className="text-sm font-semibold uppercase tracking-wider"
|
||||||
|
style={{ color: "var(--accent-teal)" }}
|
||||||
|
>
|
||||||
|
Audio Player
|
||||||
|
</h2>
|
||||||
|
<button
|
||||||
|
onClick={handleDownload}
|
||||||
|
className="flex items-center gap-2 text-xs px-3 py-1.5 rounded-lg border transition-colors cursor-pointer"
|
||||||
|
style={{
|
||||||
|
borderColor: "var(--accent-teal-dim)",
|
||||||
|
color: "var(--accent-teal)",
|
||||||
|
background: "rgba(45, 212, 191, 0.05)",
|
||||||
|
}}
|
||||||
|
onMouseEnter={(e) => {
|
||||||
|
(e.currentTarget as HTMLButtonElement).style.background =
|
||||||
|
"rgba(45, 212, 191, 0.15)";
|
||||||
|
}}
|
||||||
|
onMouseLeave={(e) => {
|
||||||
|
(e.currentTarget as HTMLButtonElement).style.background =
|
||||||
|
"rgba(45, 212, 191, 0.05)";
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<svg
|
||||||
|
className="w-3.5 h-3.5"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="none"
|
||||||
|
stroke="currentColor"
|
||||||
|
strokeWidth="2"
|
||||||
|
>
|
||||||
|
<path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4" />
|
||||||
|
<polyline points="7 10 12 15 17 10" />
|
||||||
|
<line x1="12" y1="15" x2="12" y2="3" />
|
||||||
|
</svg>
|
||||||
|
Download WAV
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Waveform / progress bar */}
|
||||||
|
<div className="flex flex-col gap-2">
|
||||||
|
<div
|
||||||
|
className="relative h-2 rounded-full cursor-pointer overflow-hidden"
|
||||||
|
style={{ background: "var(--border)" }}
|
||||||
|
onClick={(e) => {
|
||||||
|
const rect = e.currentTarget.getBoundingClientRect();
|
||||||
|
const ratio = (e.clientX - rect.left) / rect.width;
|
||||||
|
seek(ratio * duration);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<div
|
||||||
|
className="absolute inset-y-0 left-0 rounded-full transition-all"
|
||||||
|
style={{
|
||||||
|
width: `${progress}%`,
|
||||||
|
background:
|
||||||
|
"linear-gradient(90deg, var(--accent-teal-dim), var(--accent-violet-dim))",
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
className="flex items-center justify-between text-xs font-mono"
|
||||||
|
style={{ color: "var(--muted)" }}
|
||||||
|
>
|
||||||
|
<span>{formatTime(currentTime)}</span>
|
||||||
|
<span>{formatTime(duration)}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Controls row */}
|
||||||
|
<div className="flex items-center gap-4">
|
||||||
|
{/* Play/Pause */}
|
||||||
|
<button
|
||||||
|
onClick={toggle}
|
||||||
|
className="w-10 h-10 rounded-full flex items-center justify-center transition-transform active:scale-95 cursor-pointer"
|
||||||
|
style={{
|
||||||
|
background:
|
||||||
|
"linear-gradient(135deg, var(--accent-teal-dim), var(--accent-violet-dim))",
|
||||||
|
boxShadow: "0 4px 12px rgba(45, 212, 191, 0.3)",
|
||||||
|
}}
|
||||||
|
aria-label={isPlaying ? "Pause" : "Play"}
|
||||||
|
>
|
||||||
|
{isPlaying ? (
|
||||||
|
<svg
|
||||||
|
className="w-4 h-4 text-white"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="currentColor"
|
||||||
|
>
|
||||||
|
<rect x="6" y="4" width="4" height="16" />
|
||||||
|
<rect x="14" y="4" width="4" height="16" />
|
||||||
|
</svg>
|
||||||
|
) : (
|
||||||
|
<svg
|
||||||
|
className="w-4 h-4 text-white"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="currentColor"
|
||||||
|
>
|
||||||
|
<polygon points="5 3 19 12 5 21 5 3" />
|
||||||
|
</svg>
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
|
||||||
|
{/* Duration info */}
|
||||||
|
<div className="flex-1 flex items-center gap-1 text-sm">
|
||||||
|
<span style={{ color: "var(--foreground)" }}>
|
||||||
|
{formatTime(currentTime)}
|
||||||
|
</span>
|
||||||
|
<span style={{ color: "var(--muted)" }}>/</span>
|
||||||
|
<span style={{ color: "var(--muted)" }}>{formatTime(duration)}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Volume control */}
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<svg
|
||||||
|
className="w-4 h-4 flex-shrink-0"
|
||||||
|
style={{ color: "var(--muted)" }}
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="none"
|
||||||
|
stroke="currentColor"
|
||||||
|
strokeWidth="2"
|
||||||
|
>
|
||||||
|
{volume === 0 ? (
|
||||||
|
<>
|
||||||
|
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5" />
|
||||||
|
<line x1="23" y1="9" x2="17" y2="15" />
|
||||||
|
<line x1="17" y1="9" x2="23" y2="15" />
|
||||||
|
</>
|
||||||
|
) : volume < 0.5 ? (
|
||||||
|
<>
|
||||||
|
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5" />
|
||||||
|
<path d="M15.54 8.46a5 5 0 010 7.07" />
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5" />
|
||||||
|
<path d="M19.07 4.93a10 10 0 010 14.14M15.54 8.46a5 5 0 010 7.07" />
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</svg>
|
||||||
|
<input
|
||||||
|
type="range"
|
||||||
|
min={0}
|
||||||
|
max={1}
|
||||||
|
step={0.05}
|
||||||
|
value={volume}
|
||||||
|
onChange={(e) => setVolume(parseFloat(e.target.value))}
|
||||||
|
className="w-20"
|
||||||
|
aria-label="Volume"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -0,0 +1,193 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
interface GenerationControlsProps {
|
||||||
|
cfgScale: number;
|
||||||
|
onCfgScaleChange: (v: number) => void;
|
||||||
|
inferenceSteps: number;
|
||||||
|
onInferenceStepsChange: (v: number) => void;
|
||||||
|
onGenerate: () => void;
|
||||||
|
isGenerating: boolean;
|
||||||
|
wordCount: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function GenerationControls({
|
||||||
|
cfgScale,
|
||||||
|
onCfgScaleChange,
|
||||||
|
inferenceSteps,
|
||||||
|
onInferenceStepsChange,
|
||||||
|
onGenerate,
|
||||||
|
isGenerating,
|
||||||
|
wordCount,
|
||||||
|
}: GenerationControlsProps) {
|
||||||
|
const estimatedSeconds = Math.ceil(wordCount / 50);
|
||||||
|
const estimatedDisplay =
|
||||||
|
wordCount === 0
|
||||||
|
? "—"
|
||||||
|
: estimatedSeconds < 60
|
||||||
|
? `~${estimatedSeconds}s`
|
||||||
|
: `~${Math.floor(estimatedSeconds / 60)}m ${estimatedSeconds % 60}s`;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className="rounded-xl border p-5 flex flex-col gap-5"
|
||||||
|
style={{ background: "var(--card-bg)", borderColor: "var(--border)" }}
|
||||||
|
>
|
||||||
|
<h2
|
||||||
|
className="text-sm font-semibold uppercase tracking-wider"
|
||||||
|
style={{ color: "var(--accent-teal)" }}
|
||||||
|
>
|
||||||
|
Generation Settings
|
||||||
|
</h2>
|
||||||
|
|
||||||
|
{/* CFG Scale slider */}
|
||||||
|
<div className="flex flex-col gap-2">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<label className="text-sm font-medium" style={{ color: "var(--foreground)" }}>
|
||||||
|
Voice Expressiveness
|
||||||
|
</label>
|
||||||
|
<span
|
||||||
|
className="text-sm font-mono px-2 py-0.5 rounded"
|
||||||
|
style={{
|
||||||
|
background: "var(--background)",
|
||||||
|
color: "var(--accent-teal)",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{cfgScale.toFixed(1)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<input
|
||||||
|
type="range"
|
||||||
|
min={1.0}
|
||||||
|
max={3.0}
|
||||||
|
step={0.1}
|
||||||
|
value={cfgScale}
|
||||||
|
onChange={(e) => onCfgScaleChange(parseFloat(e.target.value))}
|
||||||
|
className="w-full"
|
||||||
|
/>
|
||||||
|
<div
|
||||||
|
className="flex items-center justify-between text-xs"
|
||||||
|
style={{ color: "var(--muted)" }}
|
||||||
|
>
|
||||||
|
<span>Flat (1.0)</span>
|
||||||
|
<span>CFG Scale</span>
|
||||||
|
<span>Expressive (3.0)</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Inference Steps slider */}
|
||||||
|
<div className="flex flex-col gap-2">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<label className="text-sm font-medium" style={{ color: "var(--foreground)" }}>
|
||||||
|
Quality vs Speed
|
||||||
|
</label>
|
||||||
|
<span
|
||||||
|
className="text-sm font-mono px-2 py-0.5 rounded"
|
||||||
|
style={{
|
||||||
|
background: "var(--background)",
|
||||||
|
color: "var(--accent-violet)",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{inferenceSteps}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<input
|
||||||
|
type="range"
|
||||||
|
min={10}
|
||||||
|
max={30}
|
||||||
|
step={1}
|
||||||
|
value={inferenceSteps}
|
||||||
|
onChange={(e) => onInferenceStepsChange(parseInt(e.target.value, 10))}
|
||||||
|
className="w-full"
|
||||||
|
style={
|
||||||
|
{
|
||||||
|
"--thumb-color": "var(--accent-violet)",
|
||||||
|
} as React.CSSProperties
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
<div
|
||||||
|
className="flex items-center justify-between text-xs"
|
||||||
|
style={{ color: "var(--muted)" }}
|
||||||
|
>
|
||||||
|
<span>Faster (10)</span>
|
||||||
|
<span>Inference Steps</span>
|
||||||
|
<span>Higher quality (30)</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Estimated time */}
|
||||||
|
<div
|
||||||
|
className="flex items-center justify-between px-3 py-2 rounded-lg text-sm"
|
||||||
|
style={{
|
||||||
|
background: "var(--background)",
|
||||||
|
border: "1px solid var(--border)",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<span style={{ color: "var(--muted)" }}>Estimated generation time</span>
|
||||||
|
<span
|
||||||
|
className="font-mono font-medium"
|
||||||
|
style={{ color: "var(--accent-teal)" }}
|
||||||
|
>
|
||||||
|
{estimatedDisplay}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Generate button */}
|
||||||
|
<button
|
||||||
|
onClick={onGenerate}
|
||||||
|
disabled={isGenerating || wordCount === 0}
|
||||||
|
className="w-full py-3 rounded-xl font-semibold text-sm transition-all cursor-pointer disabled:cursor-not-allowed flex items-center justify-center gap-2"
|
||||||
|
style={
|
||||||
|
isGenerating || wordCount === 0
|
||||||
|
? {
|
||||||
|
background: "var(--border)",
|
||||||
|
color: "var(--muted)",
|
||||||
|
}
|
||||||
|
: {
|
||||||
|
background:
|
||||||
|
"linear-gradient(135deg, var(--accent-teal-dim), var(--accent-violet-dim))",
|
||||||
|
color: "#fff",
|
||||||
|
boxShadow: "0 4px 15px rgba(45, 212, 191, 0.2)",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
>
|
||||||
|
{isGenerating ? (
|
||||||
|
<>
|
||||||
|
<svg
|
||||||
|
className="animate-spin w-4 h-4"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="none"
|
||||||
|
>
|
||||||
|
<circle
|
||||||
|
className="opacity-25"
|
||||||
|
cx="12"
|
||||||
|
cy="12"
|
||||||
|
r="10"
|
||||||
|
stroke="currentColor"
|
||||||
|
strokeWidth="4"
|
||||||
|
/>
|
||||||
|
<path
|
||||||
|
className="opacity-75"
|
||||||
|
fill="currentColor"
|
||||||
|
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
Generating audio...
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<svg
|
||||||
|
className="w-4 h-4"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="none"
|
||||||
|
stroke="currentColor"
|
||||||
|
strokeWidth="2"
|
||||||
|
>
|
||||||
|
<polygon points="5 3 19 12 5 21 5 3" />
|
||||||
|
</svg>
|
||||||
|
Generate Podcast Audio
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -0,0 +1,106 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useEffect, useState } from "react";
|
||||||
|
|
||||||
|
type ServerStatus = "checking" | "online" | "offline";
|
||||||
|
|
||||||
|
export default function Header() {
|
||||||
|
const [status, setStatus] = useState<ServerStatus>("checking");
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const checkHealth = async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch("/api/health");
|
||||||
|
const data = await res.json();
|
||||||
|
setStatus(data.status === "online" ? "online" : "offline");
|
||||||
|
} catch {
|
||||||
|
setStatus("offline");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
checkHealth();
|
||||||
|
const interval = setInterval(checkHealth, 30000);
|
||||||
|
return () => clearInterval(interval);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const statusConfig = {
|
||||||
|
checking: {
|
||||||
|
color: "bg-yellow-500",
|
||||||
|
label: "Checking...",
|
||||||
|
textColor: "text-yellow-400",
|
||||||
|
pulse: true,
|
||||||
|
},
|
||||||
|
online: {
|
||||||
|
color: "bg-green-500",
|
||||||
|
label: "Server Online",
|
||||||
|
textColor: "text-green-400",
|
||||||
|
pulse: false,
|
||||||
|
},
|
||||||
|
offline: {
|
||||||
|
color: "bg-red-500",
|
||||||
|
label: "Server Offline",
|
||||||
|
textColor: "text-red-400",
|
||||||
|
pulse: false,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const cfg = statusConfig[status];
|
||||||
|
|
||||||
|
return (
|
||||||
|
<header
|
||||||
|
className="border-b px-6 py-4 flex items-center justify-between"
|
||||||
|
style={{
|
||||||
|
background: "var(--card-bg)",
|
||||||
|
borderColor: "var(--border)",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-4">
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<div
|
||||||
|
className="w-9 h-9 rounded-xl flex items-center justify-center text-lg font-bold"
|
||||||
|
style={{
|
||||||
|
background:
|
||||||
|
"linear-gradient(135deg, var(--accent-teal-dim), var(--accent-violet-dim))",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
🎙
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h1
|
||||||
|
className="text-xl font-bold tracking-tight"
|
||||||
|
style={{
|
||||||
|
background:
|
||||||
|
"linear-gradient(135deg, var(--accent-teal), var(--accent-violet))",
|
||||||
|
WebkitBackgroundClip: "text",
|
||||||
|
WebkitTextFillColor: "transparent",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
VibePod
|
||||||
|
</h1>
|
||||||
|
<p className="text-xs" style={{ color: "var(--muted)" }}>
|
||||||
|
Powered by VibeVoice 0.5B
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div
|
||||||
|
className="flex items-center gap-2 px-3 py-1.5 rounded-full text-xs font-medium border"
|
||||||
|
style={{
|
||||||
|
background: "var(--background)",
|
||||||
|
borderColor: "var(--border)",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<span className="relative flex h-2 w-2">
|
||||||
|
<span
|
||||||
|
className={`${cfg.pulse ? "animate-ping absolute inline-flex h-full w-full rounded-full opacity-75 " + cfg.color : "hidden"}`}
|
||||||
|
/>
|
||||||
|
<span
|
||||||
|
className={`relative inline-flex rounded-full h-2 w-2 ${cfg.color}`}
|
||||||
|
/>
|
||||||
|
</span>
|
||||||
|
<span style={{ color: "var(--foreground)" }}>{cfg.label}</span>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -0,0 +1,76 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useEffect, useRef } from "react";
|
||||||
|
|
||||||
|
interface StatusLogProps {
|
||||||
|
messages: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function StatusLog({ messages }: StatusLogProps) {
|
||||||
|
const bottomRef = useRef<HTMLDivElement>(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
|
||||||
|
}, [messages]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className="rounded-xl border p-5 flex flex-col gap-3"
|
||||||
|
style={{ background: "var(--card-bg)", borderColor: "var(--border)" }}
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<h2
|
||||||
|
className="text-sm font-semibold uppercase tracking-wider"
|
||||||
|
style={{ color: "var(--accent-teal)" }}
|
||||||
|
>
|
||||||
|
Status Log
|
||||||
|
</h2>
|
||||||
|
<div className="flex gap-1 ml-auto">
|
||||||
|
<span className="w-2.5 h-2.5 rounded-full bg-red-500 opacity-70" />
|
||||||
|
<span className="w-2.5 h-2.5 rounded-full bg-yellow-500 opacity-70" />
|
||||||
|
<span className="w-2.5 h-2.5 rounded-full bg-green-500 opacity-70" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div
|
||||||
|
className="rounded-lg p-4 h-40 overflow-y-auto font-mono text-xs leading-relaxed"
|
||||||
|
style={{
|
||||||
|
background: "var(--background)",
|
||||||
|
border: "1px solid var(--border)",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{messages.length === 0 ? (
|
||||||
|
<p style={{ color: "var(--muted)" }}>
|
||||||
|
Waiting for input...
|
||||||
|
<span className="animate-pulse">▌</span>
|
||||||
|
</p>
|
||||||
|
) : (
|
||||||
|
messages.map((msg, i) => {
|
||||||
|
const isError =
|
||||||
|
msg.toLowerCase().includes("error") ||
|
||||||
|
msg.toLowerCase().includes("failed");
|
||||||
|
const isSuccess =
|
||||||
|
msg.toLowerCase().includes("done") ||
|
||||||
|
msg.toLowerCase().includes("complete") ||
|
||||||
|
msg.toLowerCase().includes("ready");
|
||||||
|
const color = isError
|
||||||
|
? "var(--error)"
|
||||||
|
: isSuccess
|
||||||
|
? "var(--success)"
|
||||||
|
: "var(--foreground)";
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div key={i} className="flex items-start gap-2">
|
||||||
|
<span style={{ color: "var(--muted)" }} className="select-none">
|
||||||
|
{String(i + 1).padStart(2, "0")}
|
||||||
|
</span>
|
||||||
|
<span style={{ color }}>{msg}</span>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})
|
||||||
|
)}
|
||||||
|
<div ref={bottomRef} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -0,0 +1,112 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
const SAMPLE_SCRIPT = `Welcome to VibePod, your gateway to the future of audio content creation. Today, we're diving deep into the world of artificial intelligence and how it's transforming the way we produce and consume podcasts.
|
||||||
|
|
||||||
|
Imagine being able to transform any written article, blog post, or essay into a professional-sounding audio experience in just seconds. That's exactly what VibeVoice 0.5B brings to the table — a compact yet powerful text-to-speech model that delivers remarkably natural-sounding voices.
|
||||||
|
|
||||||
|
The technology behind modern TTS systems has evolved dramatically over the past few years. We've moved from robotic, stilted speech synthesis to voices that carry real emotional nuance and natural prosody. VibeVoice represents Microsoft's latest contribution to this rapidly advancing field.
|
||||||
|
|
||||||
|
Whether you're a content creator looking to repurpose written material, an educator who wants to make content more accessible, or a developer building the next generation of audio applications, VibePod provides the tools you need.
|
||||||
|
|
||||||
|
In today's episode, we'll explore the key features that make VibeVoice unique, discuss practical use cases across different industries, and look ahead to what the next generation of voice AI might bring. Let's get started.`;
|
||||||
|
|
||||||
|
interface TextInputPanelProps {
|
||||||
|
value: string;
|
||||||
|
onChange: (text: string) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function TextInputPanel({
|
||||||
|
value,
|
||||||
|
onChange,
|
||||||
|
}: TextInputPanelProps) {
|
||||||
|
const charCount = value.length;
|
||||||
|
const wordCount = value.trim() === "" ? 0 : value.trim().split(/\s+/).length;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className="rounded-xl border p-5 flex flex-col gap-4"
|
||||||
|
style={{ background: "var(--card-bg)", borderColor: "var(--border)" }}
|
||||||
|
>
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<h2
|
||||||
|
className="text-sm font-semibold uppercase tracking-wider"
|
||||||
|
style={{ color: "var(--accent-teal)" }}
|
||||||
|
>
|
||||||
|
Podcast Script
|
||||||
|
</h2>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<button
|
||||||
|
onClick={() => onChange(SAMPLE_SCRIPT)}
|
||||||
|
className="text-xs px-3 py-1.5 rounded-lg border transition-colors cursor-pointer"
|
||||||
|
style={{
|
||||||
|
borderColor: "var(--border)",
|
||||||
|
color: "var(--muted)",
|
||||||
|
}}
|
||||||
|
onMouseEnter={(e) => {
|
||||||
|
(e.target as HTMLButtonElement).style.color =
|
||||||
|
"var(--accent-violet)";
|
||||||
|
(e.target as HTMLButtonElement).style.borderColor =
|
||||||
|
"var(--accent-violet)";
|
||||||
|
}}
|
||||||
|
onMouseLeave={(e) => {
|
||||||
|
(e.target as HTMLButtonElement).style.color = "var(--muted)";
|
||||||
|
(e.target as HTMLButtonElement).style.borderColor =
|
||||||
|
"var(--border)";
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Load sample script
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => onChange("")}
|
||||||
|
className="text-xs px-3 py-1.5 rounded-lg border transition-colors cursor-pointer"
|
||||||
|
style={{
|
||||||
|
borderColor: "var(--border)",
|
||||||
|
color: "var(--muted)",
|
||||||
|
}}
|
||||||
|
onMouseEnter={(e) => {
|
||||||
|
(e.target as HTMLButtonElement).style.color = "var(--error)";
|
||||||
|
(e.target as HTMLButtonElement).style.borderColor = "var(--error)";
|
||||||
|
}}
|
||||||
|
onMouseLeave={(e) => {
|
||||||
|
(e.target as HTMLButtonElement).style.color = "var(--muted)";
|
||||||
|
(e.target as HTMLButtonElement).style.borderColor =
|
||||||
|
"var(--border)";
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Clear
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<textarea
|
||||||
|
value={value}
|
||||||
|
onChange={(e) => onChange(e.target.value)}
|
||||||
|
placeholder="Paste or type your podcast script here..."
|
||||||
|
rows={12}
|
||||||
|
className="w-full rounded-lg p-4 text-sm resize-y outline-none transition-colors font-sans leading-relaxed"
|
||||||
|
style={{
|
||||||
|
background: "var(--background)",
|
||||||
|
border: "1px solid var(--border)",
|
||||||
|
color: "var(--foreground)",
|
||||||
|
minHeight: "200px",
|
||||||
|
}}
|
||||||
|
onFocus={(e) => {
|
||||||
|
e.target.style.borderColor = "var(--accent-teal)";
|
||||||
|
}}
|
||||||
|
onBlur={(e) => {
|
||||||
|
e.target.style.borderColor = "var(--border)";
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<div
|
||||||
|
className="flex items-center justify-between text-xs"
|
||||||
|
style={{ color: "var(--muted)" }}
|
||||||
|
>
|
||||||
|
<span>
|
||||||
|
{wordCount} word{wordCount !== 1 ? "s" : ""}
|
||||||
|
</span>
|
||||||
|
<span>{charCount.toLocaleString()} characters</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -0,0 +1,94 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useRef, useState } from "react";
|
||||||
|
|
||||||
|
interface AudioPlayerState {
|
||||||
|
isPlaying: boolean;
|
||||||
|
currentTime: number;
|
||||||
|
duration: number;
|
||||||
|
volume: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useAudioPlayer(audioUrl: string | null) {
|
||||||
|
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||||
|
const [state, setState] = useState<AudioPlayerState>({
|
||||||
|
isPlaying: false,
|
||||||
|
currentTime: 0,
|
||||||
|
duration: 0,
|
||||||
|
volume: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create/replace the Audio element whenever the URL changes
|
||||||
|
useEffect(() => {
|
||||||
|
if (!audioUrl) {
|
||||||
|
if (audioRef.current) {
|
||||||
|
audioRef.current.pause();
|
||||||
|
audioRef.current = null;
|
||||||
|
}
|
||||||
|
setState({ isPlaying: false, currentTime: 0, duration: 0, volume: 1 });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const audio = new Audio(audioUrl);
|
||||||
|
audioRef.current = audio;
|
||||||
|
|
||||||
|
const onTimeUpdate = () =>
|
||||||
|
setState((prev) => ({ ...prev, currentTime: audio.currentTime }));
|
||||||
|
const onDurationChange = () =>
|
||||||
|
setState((prev) => ({ ...prev, duration: audio.duration }));
|
||||||
|
const onEnded = () =>
|
||||||
|
setState((prev) => ({ ...prev, isPlaying: false, currentTime: 0 }));
|
||||||
|
const onPlay = () => setState((prev) => ({ ...prev, isPlaying: true }));
|
||||||
|
const onPause = () => setState((prev) => ({ ...prev, isPlaying: false }));
|
||||||
|
|
||||||
|
audio.addEventListener("timeupdate", onTimeUpdate);
|
||||||
|
audio.addEventListener("durationchange", onDurationChange);
|
||||||
|
audio.addEventListener("loadedmetadata", onDurationChange);
|
||||||
|
audio.addEventListener("ended", onEnded);
|
||||||
|
audio.addEventListener("play", onPlay);
|
||||||
|
audio.addEventListener("pause", onPause);
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
audio.pause();
|
||||||
|
audio.removeEventListener("timeupdate", onTimeUpdate);
|
||||||
|
audio.removeEventListener("durationchange", onDurationChange);
|
||||||
|
audio.removeEventListener("loadedmetadata", onDurationChange);
|
||||||
|
audio.removeEventListener("ended", onEnded);
|
||||||
|
audio.removeEventListener("play", onPlay);
|
||||||
|
audio.removeEventListener("pause", onPause);
|
||||||
|
};
|
||||||
|
}, [audioUrl]);
|
||||||
|
|
||||||
|
const toggle = useCallback(() => {
|
||||||
|
const audio = audioRef.current;
|
||||||
|
if (!audio) return;
|
||||||
|
if (audio.paused) {
|
||||||
|
audio.play();
|
||||||
|
} else {
|
||||||
|
audio.pause();
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const seek = useCallback((time: number) => {
|
||||||
|
const audio = audioRef.current;
|
||||||
|
if (!audio) return;
|
||||||
|
audio.currentTime = Math.max(0, Math.min(time, audio.duration));
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const setVolume = useCallback((v: number) => {
|
||||||
|
const audio = audioRef.current;
|
||||||
|
if (!audio) return;
|
||||||
|
audio.volume = Math.max(0, Math.min(1, v));
|
||||||
|
setState((prev) => ({ ...prev, volume: v }));
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
return {
|
||||||
|
isPlaying: state.isPlaying,
|
||||||
|
currentTime: state.currentTime,
|
||||||
|
duration: state.duration,
|
||||||
|
volume: state.volume,
|
||||||
|
toggle,
|
||||||
|
seek,
|
||||||
|
setVolume,
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
import type { NextConfig } from "next";
|
||||||
|
|
||||||
|
const nextConfig: NextConfig = {
|
||||||
|
/* config options here */
|
||||||
|
};
|
||||||
|
|
||||||
|
export default nextConfig;
|
||||||
Generated
+1651
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"name": "podcast-forge",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"scripts": {
|
||||||
|
"dev": "next dev --turbopack",
|
||||||
|
"build": "next build --turbopack",
|
||||||
|
"start": "next start"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"react": "19.1.0",
|
||||||
|
"react-dom": "19.1.0",
|
||||||
|
"next": "15.5.15"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"typescript": "^5",
|
||||||
|
"@types/node": "^20",
|
||||||
|
"@types/react": "^19",
|
||||||
|
"@types/react-dom": "^19",
|
||||||
|
"@tailwindcss/postcss": "^4",
|
||||||
|
"tailwindcss": "^4"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
const config = {
|
||||||
|
plugins: ["@tailwindcss/postcss"],
|
||||||
|
};
|
||||||
|
|
||||||
|
export default config;
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>
|
||||||
|
After Width: | Height: | Size: 391 B |
@@ -0,0 +1 @@
|
|||||||
|
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
|
||||||
|
After Width: | Height: | Size: 1.0 KiB |
@@ -0,0 +1 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
|
||||||
|
After Width: | Height: | Size: 1.3 KiB |
@@ -0,0 +1 @@
|
|||||||
|
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>
|
||||||
|
After Width: | Height: | Size: 128 B |
@@ -0,0 +1 @@
|
|||||||
|
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>
|
||||||
|
After Width: | Height: | Size: 385 B |
@@ -0,0 +1,11 @@
|
|||||||
|
# VibePod TTS Server dependencies
|
||||||
|
# Install with: pip install -r requirements.txt
|
||||||
|
|
||||||
|
fastapi>=0.111.0
|
||||||
|
uvicorn[standard]>=0.29.0
|
||||||
|
transformers>=4.40.0
|
||||||
|
torch>=2.2.0
|
||||||
|
soundfile>=0.12.1
|
||||||
|
scipy>=1.13.0
|
||||||
|
numpy>=1.26.0
|
||||||
|
pydantic>=2.7.0
|
||||||
@@ -0,0 +1,150 @@
|
|||||||
|
"""
|
||||||
|
VibePod — VibeVoice FastAPI TTS Server
|
||||||
|
|
||||||
|
Loads microsoft/VibeVoice-Realtime-0.5B via HuggingFace transformers and
|
||||||
|
exposes a POST /generate endpoint that accepts { text, cfg_scale, inference_steps }
|
||||||
|
and returns a WAV audio blob.
|
||||||
|
|
||||||
|
Start with:
|
||||||
|
uvicorn vibevoice_server:app --host 0.0.0.0 --port 8000
|
||||||
|
"""
|
||||||
|
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from typing import AsyncGenerator, Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
|
import torch
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
from pydantic import BaseModel, Field, field_validator
|
||||||
|
from transformers import AutoProcessor, AutoModel
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
MODEL_ID = "microsoft/VibeVoice-Realtime-0.5B"
|
||||||
|
|
||||||
|
# ─── Global model state ────────────────────────────────────────────────────────
|
||||||
|
_processor: Optional[object] = None
|
||||||
|
_model: Optional[object] = None
|
||||||
|
_device: str = "cpu"
|
||||||
|
|
||||||
|
|
||||||
|
def _load_model() -> None:
|
||||||
|
global _processor, _model, _device
|
||||||
|
|
||||||
|
if _model is not None:
|
||||||
|
return
|
||||||
|
|
||||||
|
_device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
logger.info("Loading %s on %s …", MODEL_ID, _device)
|
||||||
|
|
||||||
|
_processor = AutoProcessor.from_pretrained(MODEL_ID)
|
||||||
|
_model = AutoModel.from_pretrained(
|
||||||
|
MODEL_ID,
|
||||||
|
torch_dtype=torch.float16 if _device == "cuda" else torch.float32,
|
||||||
|
)
|
||||||
|
_model = _model.to(_device)
|
||||||
|
_model.eval()
|
||||||
|
|
||||||
|
logger.info("Model loaded successfully.")
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
||||||
|
_load_model()
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(title="VibePod TTS Server", version="0.1.0", lifespan=lifespan)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Request / response schemas ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class GenerateRequest(BaseModel):
|
||||||
|
text: str = Field(..., min_length=1, max_length=10_000)
|
||||||
|
cfg_scale: float = Field(default=2.5, ge=1.0, le=3.0)
|
||||||
|
inference_steps: int = Field(default=20, ge=10, le=30)
|
||||||
|
|
||||||
|
@field_validator("text")
|
||||||
|
@classmethod
|
||||||
|
def text_not_blank(cls, v: str) -> str:
|
||||||
|
if not v.strip():
|
||||||
|
raise ValueError("text must not be blank")
|
||||||
|
return v.strip()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Endpoints ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health() -> dict:
|
||||||
|
"""Liveness probe used by the Next.js /api/health route."""
|
||||||
|
return {"status": "online", "model": MODEL_ID}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/generate")
|
||||||
|
async def generate(req: GenerateRequest) -> StreamingResponse:
|
||||||
|
"""
|
||||||
|
Generate speech from text and return a WAV audio stream.
|
||||||
|
"""
|
||||||
|
if _model is None or _processor is None:
|
||||||
|
raise HTTPException(status_code=503, detail="Model not loaded yet — please retry in a moment.")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Generating audio for %d chars (cfg=%.1f, steps=%d)",
|
||||||
|
len(req.text),
|
||||||
|
req.cfg_scale,
|
||||||
|
req.inference_steps,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
inputs = _processor(text=req.text, return_tensors="pt").to(_device)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
output = _model.generate(
|
||||||
|
**inputs,
|
||||||
|
guidance_scale=req.cfg_scale,
|
||||||
|
num_inference_steps=req.inference_steps,
|
||||||
|
)
|
||||||
|
|
||||||
|
# output is typically a tensor of shape (1, num_samples) or (num_samples,)
|
||||||
|
audio_array = output.squeeze().cpu().numpy()
|
||||||
|
|
||||||
|
# Normalise to [-1, 1] float32 for WAV
|
||||||
|
if audio_array.dtype != np.float32:
|
||||||
|
audio_array = audio_array.astype(np.float32)
|
||||||
|
peak = np.abs(audio_array).max()
|
||||||
|
if peak > 0:
|
||||||
|
audio_array = audio_array / peak
|
||||||
|
|
||||||
|
# Determine sample rate — try common attribute names
|
||||||
|
sample_rate: int = (
|
||||||
|
getattr(_model.config, "sampling_rate", None)
|
||||||
|
or getattr(_model.config, "sample_rate", None)
|
||||||
|
or 24_000
|
||||||
|
)
|
||||||
|
|
||||||
|
buf = io.BytesIO()
|
||||||
|
sf.write(buf, audio_array, sample_rate, format="WAV", subtype="FLOAT")
|
||||||
|
buf.seek(0)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Audio generated: %.2f s at %d Hz (%d bytes)",
|
||||||
|
len(audio_array) / sample_rate,
|
||||||
|
sample_rate,
|
||||||
|
buf.getbuffer().nbytes,
|
||||||
|
)
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
buf,
|
||||||
|
media_type="audio/wav",
|
||||||
|
headers={"Content-Disposition": 'attachment; filename="vibepod-output.wav"'},
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("Generation failed: %s", exc)
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||||
|
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2017",
|
||||||
|
"lib": ["dom", "dom.iterable", "esnext"],
|
||||||
|
"allowJs": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"strict": true,
|
||||||
|
"noEmit": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"module": "esnext",
|
||||||
|
"moduleResolution": "bundler",
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"isolatedModules": true,
|
||||||
|
"jsx": "preserve",
|
||||||
|
"incremental": true,
|
||||||
|
"plugins": [
|
||||||
|
{
|
||||||
|
"name": "next"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"paths": {
|
||||||
|
"@/*": ["./*"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
||||||
|
"exclude": ["node_modules"]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user