Files
kokoro/kokoro.js/src/phonemize.js
Joshua Lochner 0a1dc5750c Create Kokoro TTS JavaScript library (#3)
* Set up JS project

* Finalise JS library

* Update README

* Fix package.json repository url

* Rename package -> `kokoro-js`

* Fix samples in README

* Cleanup README

* Bump `phonemizer` version

* Create web demo

* Run prettier

* Link to model used in demo

* Enable multithreading in HF space demo (~40% faster)

* Add link to demo in README

* Bump to v1.0.1
2025-01-16 09:50:34 -08:00

198 lines
6.2 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { phonemize as espeakng } from "phonemizer";
/**
* Helper function to split a string on a regex, but keep the delimiters.
* This is required, because the JavaScript `.split()` method does not keep the delimiters,
* and wrapping in a capturing group causes issues with existing capturing groups (due to nesting).
* @param {string} text The text to split.
* @param {RegExp} regex The regex to split on.
* @returns {{match: boolean; text: string}[]} The split string.
*/
function split(text, regex) {
const result = [];
let prev = 0;
for (const match of text.matchAll(regex)) {
const fullMatch = match[0];
if (prev < match.index) {
result.push({ match: false, text: text.slice(prev, match.index) });
}
if (fullMatch.length > 0) {
result.push({ match: true, text: fullMatch });
}
prev = match.index + fullMatch.length;
}
if (prev < text.length) {
result.push({ match: false, text: text.slice(prev) });
}
return result;
}
/**
* Helper function to split numbers into phonetic equivalents
* @param {string} match The matched number
* @returns {string} The phonetic equivalent
*/
function split_num(match) {
if (match.includes(".")) {
return match;
} else if (match.includes(":")) {
let [h, m] = match.split(":").map(Number);
if (m === 0) {
return `${h} o'clock`;
} else if (m < 10) {
return `${h} oh ${m}`;
}
return `${h} ${m}`;
}
let year = parseInt(match.slice(0, 4), 10);
if (year < 1100 || year % 1000 < 10) {
return match;
}
let left = match.slice(0, 2);
let right = parseInt(match.slice(2, 4), 10);
let suffix = match.endsWith("s") ? "s" : "";
if (year % 1000 >= 100 && year % 1000 <= 999) {
if (right === 0) {
return `${left} hundred${suffix}`;
} else if (right < 10) {
return `${left} oh ${right}${suffix}`;
}
}
return `${left} ${right}${suffix}`;
}
/**
* Helper function to format monetary values
* @param {string} match The matched currency
* @returns {string} The formatted currency
*/
function flip_money(match) {
const bill = match[0] === "$" ? "dollar" : "pound";
if (isNaN(Number(match.slice(1)))) {
return `${match.slice(1)} ${bill}s`;
} else if (!match.includes(".")) {
let suffix = match.slice(1) === "1" ? "" : "s";
return `${match.slice(1)} ${bill}${suffix}`;
}
const [b, c] = match.slice(1).split(".");
const d = parseInt(c.padEnd(2, "0"), 10);
let coins = match[0] === "$" ? (d === 1 ? "cent" : "cents") : d === 1 ? "penny" : "pence";
return `${b} ${bill}${b === "1" ? "" : "s"} and ${d} ${coins}`;
}
/**
* Helper function to process decimal numbers
* @param {string} match The matched number
* @returns {string} The formatted number
*/
function point_num(match) {
let [a, b] = match.split(".");
return `${a} point ${b.split("").join(" ")}`;
}
/**
* Normalize text for phonemization
* @param {string} text The text to normalize
* @returns {string} The normalized text
*/
function normalize_text(text) {
return (
text
// 1. Handle quotes and brackets
.replace(/[]/g, "'")
.replace(/«/g, "“")
.replace(/»/g, "”")
.replace(/[“”]/g, '"')
.replace(/\(/g, "«")
.replace(/\)/g, "»")
// 2. Replace uncommon punctuation marks
.replace(/、/g, ", ")
.replace(/。/g, ". ")
.replace(//g, "! ")
.replace(//g, ", ")
.replace(//g, ": ")
.replace(//g, "; ")
.replace(//g, "? ")
// 3. Whitespace normalization
.replace(/[^\S \n]/g, " ")
.replace(/ +/, " ")
.replace(/(?<=\n) +(?=\n)/g, "")
// 4. Abbreviations
.replace(/\bD[Rr]\.(?= [A-Z])/g, "Doctor")
.replace(/\b(?:Mr\.|MR\.(?= [A-Z]))/g, "Mister")
.replace(/\b(?:Ms\.|MS\.(?= [A-Z]))/g, "Miss")
.replace(/\b(?:Mrs\.|MRS\.(?= [A-Z]))/g, "Mrs")
.replace(/\betc\.(?! [A-Z])/gi, "etc")
// 5. Normalize casual words
.replace(/\b(y)eah?\b/gi, "$1e'a")
// 5. Handle numbers and currencies
.replace(/\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)/g, split_num)
.replace(/(?<=\d),(?=\d)/g, "")
.replace(/[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b/gi, flip_money)
.replace(/\d*\.\d+/g, point_num)
.replace(/(?<=\d)-(?=\d)/g, " to ")
.replace(/(?<=\d)S/g, " S")
// 6. Handle possessives
.replace(/(?<=[BCDFGHJ-NP-TV-Z])'?s\b/g, "'S")
.replace(/(?<=X')S\b/g, "s")
// 7. Handle hyphenated words/letters
.replace(/(?:[A-Za-z]\.){2,} [a-z]/g, (m) => m.replace(/\./g, "-"))
.replace(/(?<=[A-Z])\.(?=[A-Z])/gi, "-")
// 8. Strip leading and trailing whitespace
.trim()
);
}
/**
* Escapes regular expression special characters from a string by replacing them with their escaped counterparts.
*
* @param {string} string The string to escape.
* @returns {string} The escaped string.
*/
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
}
const PUNCTUATION = ';:,.!?¡¿—…"«»“”(){}[]';
const PUNCTUATION_PATTERN = new RegExp(`(\\s*[${escapeRegExp(PUNCTUATION)}]+\\s*)+`, "g");
export async function phonemize(text, language = "a", norm = true) {
// 1. Normalize text
if (norm) {
text = normalize_text(text);
}
// 2. Split into chunks, to ensure we preserve punctuation
const sections = split(text, PUNCTUATION_PATTERN);
// 3. Convert each section to phonemes
const lang = language === "a" ? "en-us" : "en";
const ps = (await Promise.all(sections.map(async ({ match, text }) => (match ? text : (await espeakng(text, lang)).join(" "))))).join("");
// 4. Post-process phonemes
let processed = ps
// https://en.wiktionary.org/wiki/kokoro#English
.replace(/kəkˈoːɹoʊ/g, "kˈoʊkəɹoʊ")
.replace(/kəkˈɔːɹəʊ/g, "kˈəʊkəɹəʊ")
.replace(/ʲ/g, "j")
.replace(/r/g, "ɹ")
.replace(/x/g, "k")
.replace(/ɬ/g, "l")
.replace(/(?<=[a-zɹː])(?=hˈʌndɹɪd)/g, " ")
.replace(/ z(?=[;:,.!?¡¿—…"«»“” ]|$)/g, "z");
// 5. Additional post-processing for American English
if (language === "a") {
processed = processed.replace(/(?<=nˈaɪn)ti(?!ː)/g, "di");
}
return processed.trim();
}