Create Kokoro TTS JavaScript library (#3)

* Set up JS project

* Finalise JS library

* Update README

* Fix package.json repository url

* Rename package -> `kokoro-js`

* Fix samples in README

* Cleanup README

* Bump `phonemizer` version

* Create web demo

* Run prettier

* Link to model used in demo

* Enable multithreading in HF space demo (~40% faster)

* Add link to demo in README

* Bump to v1.0.1
This commit is contained in:
Joshua Lochner
2025-01-16 19:50:34 +02:00
committed by GitHub
parent 757c80cc5b
commit 0a1dc5750c
37 changed files with 8820 additions and 0 deletions

144
kokoro.js/demo/src/App.jsx Normal file
View File

@@ -0,0 +1,144 @@
import { useRef, useState, useEffect } from "react";
import { motion } from "motion/react";
export default function App() {
// Create a reference to the worker object.
const worker = useRef(null);
const [inputText, setInputText] = useState("Life is like a box of chocolates. You never know what you're gonna get.");
const [selectedSpeaker, setSelectedSpeaker] = useState("af");
const [status, setStatus] = useState(null);
const [error, setError] = useState(null);
const [loadingMessage, setLoadingMessage] = useState("Loading model (only downloaded once)...");
const [results, setResults] = useState([]);
// We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
useEffect(() => {
// Create the worker if it does not yet exist.
worker.current ??= new Worker(new URL("./worker.js", import.meta.url), {
type: "module",
});
// Create a callback function for messages from the worker thread.
const onMessageReceived = (e) => {
switch (e.data.status) {
// TODO: WebGPU feature checking
// case "feature-success":
// break;
// case "feature-error":
// setError(e.data.data);
// break;
case "ready":
setStatus("ready");
break;
case "complete":
const { audio, text } = e.data;
// Generation complete: re-enable the "Generate" button
setResults((prev) => [{ text, src: audio }, ...prev]);
setStatus("ready");
break;
}
};
const onErrorReceived = (e) => {
console.error("Worker error:", e);
};
// Attach the callback function as an event listener.
worker.current.addEventListener("message", onMessageReceived);
worker.current.addEventListener("error", onErrorReceived);
// Define a cleanup function for when the component is unmounted.
return () => {
worker.current.removeEventListener("message", onMessageReceived);
worker.current.removeEventListener("error", onErrorReceived);
};
}, []);
const handleSubmit = (e) => {
e.preventDefault();
setStatus("running");
worker.current.postMessage({
type: "generate",
text: inputText.trim(),
voice: selectedSpeaker,
});
};
return (
<div className="relative w-full min-h-screen bg-gradient-to-br from-gray-900 to-gray-700 flex flex-col items-center justify-center p-4 relative overflow-hidden font-sans">
<motion.div initial={{ opacity: 1 }} animate={{ opacity: status === null ? 1 : 0 }} transition={{ duration: 0.5 }} className="absolute w-screen h-screen justify-center flex flex-col items-center z-10 bg-gray-800/95 backdrop-blur-md" style={{ pointerEvents: status === null ? "auto" : "none" }}>
<div className="w-[250px] h-[250px] border-4 border-white shadow-[0_0_0_5px_#4973ff] rounded-full overflow-hidden">
<div className="loading-wave"></div>
</div>
<p className={`text-3xl my-5 text-center ${error ? "text-red-500" : "text-white"}`}>{error ?? loadingMessage}</p>
</motion.div>
<div className="max-w-3xl w-full space-y-8 relative z-[2]">
<div className="text-center">
<h1 className="text-5xl font-extrabold text-gray-100 mb-2 drop-shadow-lg font-heading">Kokoro Text-to-Speech</h1>
<p className="text-2xl text-gray-300 font-semibold font-subheading">
Powered by&nbsp;
<a href="https://github.com/hexgrad/kokoro" target="_blank" rel="noreferrer" className="underline">
Kokoro
</a>
&nbsp;and&nbsp;
<a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noreferrer" className="underline">
<img width="40" src="hf-logo.svg" className="inline translate-y-[-2px] me-1"></img>Transformers.js
</a>
</p>
</div>
<div className="bg-gray-800/50 backdrop-blur-sm border border-gray-700 rounded-lg p-6">
<form onSubmit={handleSubmit} className="space-y-4">
<textarea placeholder="Enter text..." value={inputText} onChange={(e) => setInputText(e.target.value)} className="w-full min-h-[100px] max-h-[300px] bg-gray-700/50 backdrop-blur-sm border-2 border-gray-600 rounded-xl resize-y text-gray-100 placeholder-gray-400 px-3 py-2 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent" rows={Math.min(8, inputText.split("\n").length)} />
<div className="flex flex-col items-center space-y-4">
<select value={selectedSpeaker} onChange={(e) => setSelectedSpeaker(e.target.value)} className="w-full bg-gray-700/50 backdrop-blur-sm border-2 border-gray-600 rounded-xl text-gray-100 px-3 py-2 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent">
<option value="af">Default (American Female)</option>
<option value="af_bella">Bella (American Female)</option>
<option value="af_nicole">Nicole (American Female)</option>
<option value="af_sarah">Sarah (American Female)</option>
<option value="af_sky">Sky (American Female)</option>
<option value="am_adam">Adam (American Male)</option>
<option value="am_michael">Michael (American Male)</option>
<option value="bf_emma">Emma (British Female)</option>
<option value="bf_isabella">Isabella (British Female)</option>
<option value="bm_george">George (British Male)</option>
<option value="bm_lewis">Lewis (British Male)</option>
</select>
<button type="submit" className="inline-flex justify-center items-center px-6 py-2 text-lg font-semibold bg-gradient-to-t from-blue-600 to-purple-600 hover:from-blue-700 hover:to-purple-700 transition-colors duration-300 rounded-xl text-white disabled:opacity-50" disabled={status === "running" || inputText.trim() === ""}>
{status === "running" ? "Generating..." : "Generate"}
</button>
</div>
</form>
</div>
{results.length > 0 && (
<motion.div initial={{ y: 50, opacity: 0 }} animate={{ y: 0, opacity: 1 }} transition={{ duration: 0.5 }} className="max-h-[250px] overflow-y-auto px-2 mt-4 space-y-6 relative z-[2]">
{results.map((result, i) => (
<div key={i}>
<div className="text-white bg-gray-800/70 backdrop-blur-sm border border-gray-700 rounded-lg p-4 z-10">
<span className="absolute right-5 font-bold">#{results.length - i}</span>
<p className="mb-3 max-w-[95%]">{result.text}</p>
<audio controls src={result.src} className="w-full">
Your browser does not support the audio element.
</audio>
</div>
</div>
))}
</motion.div>
)}
</div>
<div className="bg-[#015871] pointer-events-none absolute left-0 w-full h-[5%] bottom-[-50px]">
<div className="wave"></div>
<div className="wave"></div>
</div>
</div>
);
}

View File

@@ -0,0 +1,100 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
/*
* Wave animations adapted from the following two demos:
* - https://codepen.io/upasanaasopa/pen/poObEWZ
* - https://codepen.io/breakstorm00/pen/qBJZQNB
*/
*,
*:before,
*:after {
margin: 0;
padding: 0;
box-sizing: border-box;
}
.loading-wave {
position: relative;
top: 0;
width: 100%;
height: 100%;
background: #2c74b3;
border-radius: 50%;
box-shadow: inset 0 0 50px 0 rgba(0, 0, 0, 0.5);
}
.loading-wave:before,
.loading-wave:after {
content: "";
position: absolute;
top: 0;
left: 50%;
width: 200%;
height: 200%;
background: black;
transform: translate(-50%, -75%);
}
.loading-wave:before {
border-radius: 45%;
background: rgba(255, 255, 255, 1);
animation: animate 5s linear infinite;
}
.loading-wave:after {
border-radius: 40%;
background: rgba(255, 255, 255, 0.5);
animation: animate 10s linear infinite;
}
.wave {
background: url(/wave.svg) repeat-x;
position: absolute;
top: -198px;
width: 6400px;
height: 198px;
animation: wave 7s cubic-bezier(0.36, 0.45, 0.63, 0.53) infinite;
transform: translate3d(0, 0, 0);
}
.wave:nth-of-type(2) {
top: -175px;
animation:
wave 7s cubic-bezier(0.36, 0.45, 0.63, 0.53) -0.125s infinite,
swell 7s ease -1.25s infinite;
opacity: 1;
}
@keyframes wave {
0% {
margin-left: 0;
}
100% {
margin-left: -1600px;
}
}
@keyframes swell {
0%,
100% {
transform: translate3d(0, -25px, 0);
}
50% {
transform: translate3d(0, 5px, 0);
}
}
@keyframes animate {
0% {
transform: translate(-50%, -75%) rotate(0deg);
}
100% {
transform: translate(-50%, -75%) rotate(360deg);
}
}

View File

@@ -0,0 +1,10 @@
import { StrictMode } from "react";
import { createRoot } from "react-dom/client";
import "./index.css";
import App from "./App.jsx";
createRoot(document.getElementById("root")).render(
<StrictMode>
<App />
</StrictMode>,
);

View File

@@ -0,0 +1,20 @@
import { KokoroTTS } from "kokoro-js";
const model_id = "onnx-community/Kokoro-82M-ONNX";
const tts = await KokoroTTS.from_pretrained(model_id, {
dtype: "q8", // Options: "fp32", "fp16", "q8", "q4", "q4f16"
});
self.postMessage({ status: "ready" });
// Listen for messages from the main thread
self.addEventListener("message", async (e) => {
const { text, voice } = e.data;
// Generate speech
const audio = await tts.generate(text, { voice });
// Send the audio file back to the main thread
const blob = audio.toBlob();
self.postMessage({ status: "complete", audio: URL.createObjectURL(blob), text });
});