From e0bf641def52db8471d4330d04948056e08e2248 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 7 Feb 2025 20:04:41 +0200 Subject: [PATCH] Update Kokoro.js: WebGPU support, v1.0 integration (#60) * Set up JS project * Finalise JS library * Update README * Fix package.json repository url * Rename package -> `kokoro-js` * Fix samples in README * Cleanup README * Bump `phonemizer` version * Create web demo * Run prettier * Link to model used in demo * Enable multithreading in HF space demo (~40% faster) * Add link to demo in README * Bump to v1.0.1 * Update voices * Update versions * Update phonemize JSDoc * Use updated voice pack * Update versions * Update demo (v1.0 & WebGPU support) * Update README * Enforce maximum number of tokens * Update README * [version] Update to 1.1.1 --- kokoro.js/README.md | 62 ++- kokoro.js/demo/package-lock.json | 812 ++++++++++++++++++++++++++--- kokoro.js/demo/src/App.jsx | 38 +- kokoro.js/demo/src/utils.js | 8 + kokoro.js/demo/src/worker.js | 17 +- kokoro.js/package-lock.json | 52 +- kokoro.js/package.json | 4 +- kokoro.js/src/kokoro.js | 36 +- kokoro.js/src/phonemize.js | 7 + kokoro.js/src/voices.js | 370 ++++++++++++- kokoro.js/voices/af.bin | Bin 524288 -> 0 bytes kokoro.js/voices/af_alloy.bin | Bin 0 -> 522240 bytes kokoro.js/voices/af_aoede.bin | Bin 0 -> 522240 bytes kokoro.js/voices/af_bella.bin | Bin 524288 -> 522240 bytes kokoro.js/voices/af_heart.bin | Bin 0 -> 522240 bytes kokoro.js/voices/af_jessica.bin | Bin 0 -> 522240 bytes kokoro.js/voices/af_kore.bin | Bin 0 -> 522240 bytes kokoro.js/voices/af_nicole.bin | Bin 524288 -> 522240 bytes kokoro.js/voices/af_nova.bin | Bin 0 -> 522240 bytes kokoro.js/voices/af_river.bin | Bin 0 -> 522240 bytes kokoro.js/voices/af_sarah.bin | Bin 524288 -> 522240 bytes kokoro.js/voices/af_sky.bin | Bin 524288 -> 522240 bytes kokoro.js/voices/am_adam.bin | Bin 524288 -> 522240 bytes kokoro.js/voices/am_echo.bin | Bin 0 -> 522240 bytes kokoro.js/voices/am_eric.bin | Bin 0 -> 522240 bytes kokoro.js/voices/am_fenrir.bin | Bin 0 -> 522240 bytes kokoro.js/voices/am_liam.bin | Bin 0 -> 522240 bytes kokoro.js/voices/am_michael.bin | Bin 524288 -> 522240 bytes kokoro.js/voices/am_onyx.bin | Bin 0 -> 522240 bytes kokoro.js/voices/am_puck.bin | Bin 0 -> 522240 bytes kokoro.js/voices/am_santa.bin | Bin 0 -> 522240 bytes kokoro.js/voices/bf_alice.bin | Bin 0 -> 522240 bytes kokoro.js/voices/bf_emma.bin | Bin 524288 -> 522240 bytes kokoro.js/voices/bf_isabella.bin | Bin 524288 -> 522240 bytes kokoro.js/voices/bf_lily.bin | Bin 0 -> 522240 bytes kokoro.js/voices/bm_daniel.bin | Bin 0 -> 522240 bytes kokoro.js/voices/bm_fable.bin | Bin 0 -> 522240 bytes kokoro.js/voices/bm_george.bin | Bin 524288 -> 522240 bytes kokoro.js/voices/bm_lewis.bin | Bin 524288 -> 522240 bytes kokoro.js/voices/ef_dora.bin | Bin 0 -> 522240 bytes kokoro.js/voices/em_alex.bin | Bin 0 -> 522240 bytes kokoro.js/voices/em_santa.bin | Bin 0 -> 522240 bytes kokoro.js/voices/ff_siwis.bin | Bin 0 -> 522240 bytes kokoro.js/voices/hf_alpha.bin | Bin 0 -> 522240 bytes kokoro.js/voices/hf_beta.bin | Bin 0 -> 522240 bytes kokoro.js/voices/hm_omega.bin | Bin 0 -> 522240 bytes kokoro.js/voices/hm_psi.bin | Bin 0 -> 522240 bytes kokoro.js/voices/if_sara.bin | Bin 0 -> 522240 bytes kokoro.js/voices/im_nicola.bin | Bin 0 -> 522240 bytes kokoro.js/voices/jf_alpha.bin | Bin 0 -> 522240 bytes kokoro.js/voices/jf_gongitsune.bin | Bin 0 -> 522240 bytes kokoro.js/voices/jf_nezumi.bin | Bin 0 -> 522240 bytes kokoro.js/voices/jf_tebukuro.bin | Bin 0 -> 522240 bytes kokoro.js/voices/jm_kumo.bin | Bin 0 -> 522240 bytes kokoro.js/voices/pf_dora.bin | Bin 0 -> 522240 bytes kokoro.js/voices/pm_alex.bin | Bin 0 -> 522240 bytes kokoro.js/voices/pm_santa.bin | Bin 0 -> 522240 bytes kokoro.js/voices/zf_xiaobei.bin | Bin 0 -> 522240 bytes kokoro.js/voices/zf_xiaoni.bin | Bin 0 -> 522240 bytes kokoro.js/voices/zf_xiaoxiao.bin | Bin 0 -> 522240 bytes kokoro.js/voices/zf_xiaoyi.bin | Bin 0 -> 522240 bytes kokoro.js/voices/zm_yunjian.bin | Bin 0 -> 522240 bytes kokoro.js/voices/zm_yunxi.bin | Bin 0 -> 522240 bytes kokoro.js/voices/zm_yunxia.bin | Bin 0 -> 522240 bytes kokoro.js/voices/zm_yunyang.bin | Bin 0 -> 522240 bytes 65 files changed, 1242 insertions(+), 164 deletions(-) create mode 100644 kokoro.js/demo/src/utils.js delete mode 100644 kokoro.js/voices/af.bin create mode 100644 kokoro.js/voices/af_alloy.bin create mode 100644 kokoro.js/voices/af_aoede.bin create mode 100644 kokoro.js/voices/af_heart.bin create mode 100644 kokoro.js/voices/af_jessica.bin create mode 100644 kokoro.js/voices/af_kore.bin create mode 100644 kokoro.js/voices/af_nova.bin create mode 100644 kokoro.js/voices/af_river.bin create mode 100644 kokoro.js/voices/am_echo.bin create mode 100644 kokoro.js/voices/am_eric.bin create mode 100644 kokoro.js/voices/am_fenrir.bin create mode 100644 kokoro.js/voices/am_liam.bin create mode 100644 kokoro.js/voices/am_onyx.bin create mode 100644 kokoro.js/voices/am_puck.bin create mode 100644 kokoro.js/voices/am_santa.bin create mode 100644 kokoro.js/voices/bf_alice.bin create mode 100644 kokoro.js/voices/bf_lily.bin create mode 100644 kokoro.js/voices/bm_daniel.bin create mode 100644 kokoro.js/voices/bm_fable.bin create mode 100644 kokoro.js/voices/ef_dora.bin create mode 100644 kokoro.js/voices/em_alex.bin create mode 100644 kokoro.js/voices/em_santa.bin create mode 100644 kokoro.js/voices/ff_siwis.bin create mode 100644 kokoro.js/voices/hf_alpha.bin create mode 100644 kokoro.js/voices/hf_beta.bin create mode 100644 kokoro.js/voices/hm_omega.bin create mode 100644 kokoro.js/voices/hm_psi.bin create mode 100644 kokoro.js/voices/if_sara.bin create mode 100644 kokoro.js/voices/im_nicola.bin create mode 100644 kokoro.js/voices/jf_alpha.bin create mode 100644 kokoro.js/voices/jf_gongitsune.bin create mode 100644 kokoro.js/voices/jf_nezumi.bin create mode 100644 kokoro.js/voices/jf_tebukuro.bin create mode 100644 kokoro.js/voices/jm_kumo.bin create mode 100644 kokoro.js/voices/pf_dora.bin create mode 100644 kokoro.js/voices/pm_alex.bin create mode 100644 kokoro.js/voices/pm_santa.bin create mode 100644 kokoro.js/voices/zf_xiaobei.bin create mode 100644 kokoro.js/voices/zf_xiaoni.bin create mode 100644 kokoro.js/voices/zf_xiaoxiao.bin create mode 100644 kokoro.js/voices/zf_xiaoyi.bin create mode 100644 kokoro.js/voices/zm_yunjian.bin create mode 100644 kokoro.js/voices/zm_yunxi.bin create mode 100644 kokoro.js/voices/zm_yunxia.bin create mode 100644 kokoro.js/voices/zm_yunyang.bin diff --git a/kokoro.js/README.md b/kokoro.js/README.md index 61a76ab..9190fd5 100644 --- a/kokoro.js/README.md +++ b/kokoro.js/README.md @@ -5,10 +5,10 @@ NPM Downloads jsDelivr Hits License - Demo + Demo

-Kokoro is a frontier TTS model for its size of 82 million parameters (text in/audio out). This JavaScript library allows the model to be run 100% locally in the browser thanks to [πŸ€— Transformers.js](https://huggingface.co/docs/transformers.js). Try it out using our [online demo](https://huggingface.co/spaces/webml-community/kokoro-web)! +Kokoro is a frontier TTS model for its size of 82 million parameters (text in/audio out). This JavaScript library allows the model to be run 100% locally in the browser thanks to [πŸ€— Transformers.js](https://huggingface.co/docs/transformers.js). Try it out using our [online demo](https://huggingface.co/spaces/webml-community/kokoro-webgpu)! ## Usage @@ -23,33 +23,59 @@ You can then generate speech as follows: ```js import { KokoroTTS } from "kokoro-js"; -const model_id = "onnx-community/Kokoro-82M-ONNX"; +const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX"; const tts = await KokoroTTS.from_pretrained(model_id, { dtype: "q8", // Options: "fp32", "fp16", "q8", "q4", "q4f16" + device: "wasm", // Options: "wasm", "webgpu" (web) or "cpu" (node). If using "webgpu", we recommend using dtype="fp32". }); const text = "Life is like a box of chocolates. You never know what you're gonna get."; const audio = await tts.generate(text, { // Use `tts.list_voices()` to list all available voices - voice: "af_bella", + voice: "af_heart", }); audio.save("audio.wav"); ``` ## Voices/Samples -> Life is like a box of chocolates. You never know what you're gonna get. +> [!TIP] +> You can find samples for each of the voices in the [model card](https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX#samples) on Hugging Face. -| Voice | Nationality | Gender | Sample | -| ------------------------ | ----------- | ------ | -------------------------------------------------------------------------------------------------------- | -| Default (`af`) | American | Female |