Update Kokoro.js: WebGPU support, v1.0 integration (#60)

* Set up JS project

* Finalise JS library

* Update README

* Fix package.json repository url

* Rename package -> `kokoro-js`

* Fix samples in README

* Cleanup README

* Bump `phonemizer` version

* Create web demo

* Run prettier

* Link to model used in demo

* Enable multithreading in HF space demo (~40% faster)

* Add link to demo in README

* Bump to v1.0.1

* Update voices

* Update versions

* Update phonemize JSDoc

* Use updated voice pack

* Update versions

* Update demo (v1.0 & WebGPU support)

* Update README

* Enforce maximum number of tokens

* Update README

* [version] Update to 1.1.1
This commit is contained in:
Joshua Lochner
2025-02-07 20:04:41 +02:00
committed by GitHub
parent 31a2b6337b
commit e0bf641def
65 changed files with 1242 additions and 164 deletions

View File

@@ -41,33 +41,39 @@ export class KokoroTTS {
console.table(VOICES);
}
/**
* Generate audio from text.
*
* Note: The model will be loaded on the first call, and subsequent calls will use the same model.
* @param {string} text The input text
* @param {Object} options Additional options
* @param {keyof typeof VOICES} [options.voice="af"] The voice style to use
* @param {number} [options.speed=1] The speaking speed
* @returns {Promise<RawAudio>} The generated audio
*/
async generate(text, { voice = "af", speed = 1 } = {}) {
_validate_voice(voice) {
if (!VOICES.hasOwnProperty(voice)) {
console.error(`Voice "${voice}" not found. Available voices:`);
console.table(VOICES);
throw new Error(`Voice "${voice}" not found. Should be one of: ${Object.keys(VOICES).join(", ")}.`);
}
}
const language = voice.at(0); // "a" or "b"
/**
* Generate audio from text.
*
* @param {string} text The input text
* @param {Object} options Additional options
* @param {keyof typeof VOICES} [options.voice="af_heart"] The voice style to use
* @param {number} [options.speed=1] The speaking speed
* @returns {Promise<RawAudio>} The generated audio
*/
async generate(text, { voice = "af_heart", speed = 1 } = {}) {
this._validate_voice(voice);
const language = /** @type {"a"|"b"} */ (voice.at(0)); // "a" or "b"
const phonemes = await phonemize(text, language);
const { input_ids } = this.tokenizer(phonemes, {
truncation: true,
});
// Select voice style based on number of input tokens
const num_tokens = Math.max(
input_ids.dims.at(-1) - 2, // Without padding;
0,
const num_tokens = Math.min(
Math.max(
input_ids.dims.at(-1) - 2,
0,
),
509,
);
// Load voice style

View File

@@ -164,6 +164,13 @@ function escapeRegExp(string) {
const PUNCTUATION = ';:,.!?¡¿—…"«»“”(){}[]';
const PUNCTUATION_PATTERN = new RegExp(`(\\s*[${escapeRegExp(PUNCTUATION)}]+\\s*)+`, "g");
/**
* Phonemize text using the eSpeak-NG phonemizer
* @param {string} text The text to phonemize
* @param {"a"|"b"} language The language to use
* @param {boolean} norm Whether to normalize the text
* @returns {Promise<string>} The phonemized text
*/
export async function phonemize(text, language = "a", norm = true) {
// 1. Normalize text
if (norm) {

View File

@@ -2,66 +2,423 @@ import path from "path";
import fs from "fs/promises";
export const VOICES = Object.freeze({
af: {
// Default voice is a 50-50 mix of Bella & Sarah
name: "Default",
af_heart: {
name: "Heart",
language: "en-us",
gender: "Female",
traits: "❤️",
targetQuality: "A",
overallGrade: "A",
},
af_alloy: {
name: "Alloy",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C",
},
af_aoede: {
name: "Aoede",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C+",
},
af_bella: {
name: "Bella",
language: "en-us",
gender: "Female",
traits: "🔥",
targetQuality: "A",
overallGrade: "A-",
},
af_jessica: {
name: "Jessica",
language: "en-us",
gender: "Female",
targetQuality: "C",
overallGrade: "D",
},
af_kore: {
name: "Kore",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C+",
},
af_nicole: {
name: "Nicole",
language: "en-us",
gender: "Female",
traits: "🎧",
targetQuality: "B",
overallGrade: "B-",
},
af_nova: {
name: "Nova",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C",
},
af_river: {
name: "River",
language: "en-us",
gender: "Female",
targetQuality: "C",
overallGrade: "D",
},
af_sarah: {
name: "Sarah",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C+",
},
af_sky: {
name: "Sky",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C-",
},
am_adam: {
name: "Adam",
language: "en-us",
gender: "Male",
targetQuality: "D",
overallGrade: "F+",
},
am_echo: {
name: "Echo",
language: "en-us",
gender: "Male",
targetQuality: "C",
overallGrade: "D",
},
am_eric: {
name: "Eric",
language: "en-us",
gender: "Male",
targetQuality: "C",
overallGrade: "D",
},
am_fenrir: {
name: "Fenrir",
language: "en-us",
gender: "Male",
targetQuality: "B",
overallGrade: "C+",
},
am_liam: {
name: "Liam",
language: "en-us",
gender: "Male",
targetQuality: "C",
overallGrade: "D",
},
am_michael: {
name: "Michael",
language: "en-us",
gender: "Male",
targetQuality: "B",
overallGrade: "C+",
},
am_onyx: {
name: "Onyx",
language: "en-us",
gender: "Male",
targetQuality: "C",
overallGrade: "D",
},
am_puck: {
name: "Puck",
language: "en-us",
gender: "Male",
targetQuality: "B",
overallGrade: "C+",
},
am_santa: {
name: "Santa",
language: "en-us",
gender: "Male",
targetQuality: "C",
overallGrade: "D-",
},
bf_emma: {
name: "Emma",
language: "en-gb",
gender: "Female",
traits: "🚺",
targetQuality: "B",
overallGrade: "B-",
},
bf_isabella: {
name: "Isabella",
language: "en-gb",
gender: "Female",
targetQuality: "B",
overallGrade: "C",
},
bm_george: {
name: "George",
language: "en-gb",
gender: "Male",
targetQuality: "B",
overallGrade: "C",
},
bm_lewis: {
name: "Lewis",
language: "en-gb",
gender: "Male",
targetQuality: "C",
overallGrade: "D+",
},
bf_alice: {
name: "Alice",
language: "en-gb",
gender: "Female",
traits: "🚺",
targetQuality: "C",
overallGrade: "D",
},
bf_lily: {
name: "Lily",
language: "en-gb",
gender: "Female",
traits: "🚺",
targetQuality: "C",
overallGrade: "D",
},
bm_daniel: {
name: "Daniel",
language: "en-gb",
gender: "Male",
traits: "🚹",
targetQuality: "C",
overallGrade: "D",
},
bm_fable: {
name: "Fable",
language: "en-gb",
gender: "Male",
traits: "🚹",
targetQuality: "B",
overallGrade: "C",
},
// TODO: Add support for other languages:
// jf_alpha: {
// name: "alpha",
// language: "ja",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C+",
// },
// jf_gongitsune: {
// name: "gongitsune",
// language: "ja",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C",
// },
// jf_nezumi: {
// name: "nezumi",
// language: "ja",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C-",
// },
// jf_tebukuro: {
// name: "tebukuro",
// language: "ja",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C",
// },
// jm_kumo: {
// name: "kumo",
// language: "ja",
// gender: "Male",
// traits: "🚹",
// targetQuality: "B",
// overallGrade: "C-",
// },
// zf_xiaobei: {
// name: "xiaobei",
// language: "zh",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// zf_xiaoni: {
// name: "xiaoni",
// language: "zh",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// zf_xiaoxiao: {
// name: "xiaoxiao",
// language: "zh",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// zf_xiaoyi: {
// name: "xiaoyi",
// language: "zh",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// zm_yunjian: {
// name: "yunjian",
// language: "zh",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// zm_yunxi: {
// name: "yunxi",
// language: "zh",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// zm_yunxia: {
// name: "yunxia",
// language: "zh",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// zm_yunyang: {
// name: "yunyang",
// language: "zh",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// ef_dora: {
// name: "dora",
// language: "es",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// em_alex: {
// name: "alex",
// language: "es",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// em_santa: {
// name: "santa",
// language: "es",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// ff_siwis: {
// name: "siwis",
// language: "es",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "B-",
// },
// hf_alpha: {
// name: "alpha",
// language: "hi",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C",
// },
// hf_beta: {
// name: "beta",
// language: "hi",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C",
// },
// hm_omega: {
// name: "omega",
// language: "hi",
// gender: "Male",
// traits: "🚹",
// targetQuality: "B",
// overallGrade: "C",
// },
// hm_psi: {
// name: "psi",
// language: "hi",
// gender: "Male",
// traits: "🚹",
// targetQuality: "B",
// overallGrade: "C",
// },
// if_sara: {
// name: "sara",
// language: "it",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C",
// },
// im_nicola: {
// name: "nicola",
// language: "it",
// gender: "Male",
// traits: "🚹",
// targetQuality: "B",
// overallGrade: "C",
// },
// pf_dora: {
// name: "dora",
// language: "pt-br",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// pm_alex: {
// name: "alex",
// language: "pt-br",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// pm_santa: {
// name: "santa",
// language: "pt-br",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
});
const VOICE_DATA_URL = "https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/voices";
const VOICE_DATA_URL = "https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX/resolve/main/voices";
/**
*
@@ -70,7 +427,8 @@ const VOICE_DATA_URL = "https://huggingface.co/onnx-community/Kokoro-82M-ONNX/re
*/
async function getVoiceFile(id) {
if (fs?.readFile) {
const file = path.resolve(import.meta.dirname ?? __dirname, `../voices/${id}.bin`);
const dirname = typeof __dirname !== "undefined" ? __dirname : import.meta.dirname;
const file = path.resolve(dirname, `../voices/${id}.bin`);
const { buffer } = await fs.readFile(file);
return buffer;
}