From 3ab76a9817b5cc33ad3be888eecc3cfd71b8115d Mon Sep 17 00:00:00 2001
From: hexgrad <166769057+hexgrad@users.noreply.github.com>
Date: Wed, 29 Jan 2025 11:18:39 -0800
Subject: [PATCH] Japanese and Mandarin Chinese (#20)

---
 kokoro/__init__.py |  2 +-
 kokoro/pipeline.py | 24 ++++++++++++++++++++++++
 setup.py           | 18 +++++++++---------
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/kokoro/__init__.py b/kokoro/__init__.py
index c9dbfa3..e5dccfb 100644
--- a/kokoro/__init__.py
+++ b/kokoro/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.3.0'
+__version__ = '0.3.1'
 
 from .model import KModel
 from .pipeline import KPipeline
diff --git a/kokoro/pipeline.py b/kokoro/pipeline.py
index b91bd85..ad5f2a3 100644
--- a/kokoro/pipeline.py
+++ b/kokoro/pipeline.py
@@ -7,13 +7,22 @@ import re
 import torch
 
 LANG_CODES = dict(
+    # pip install misaki[en]
     a='American English',
     b='British English',
+
+    # espeak-ng
     e='es',
     f='fr-fr',
     h='hi',
     i='it',
     p='pt-br',
+
+    # pip install misaki[ja]
+    j='Japanese',
+
+    # pip install misaki[zh]
+    z='Mandarin Chinese',
 )
 
 class KPipeline:
@@ -55,6 +64,20 @@ class KPipeline:
                 print('⚠️ WARNING: EspeakFallback not enabled. OOD words will be skipped.', e)
                 fallback = None
             self.g2p = en.G2P(trf=trf, british=lang_code=='b', fallback=fallback)
+        elif lang_code == 'j':
+            try:
+                from misaki import ja
+                self.g2p = ja.JAG2P()
+            except ImportError:
+                print("❌ ERROR: You need to `pip install misaki[ja]` to use lang_code='j'")
+                raise
+        elif lang_code == 'z':
+            try:
+                from misaki import zh
+                self.g2p = zh.ZHG2P()
+            except ImportError:
+                print("❌ ERROR: You need to `pip install misaki[zh]` to use lang_code='z'")
+                raise
         else:
             language = LANG_CODES[lang_code]
             print(f"⚠️ WARNING: Using EspeakG2P(language='{language}'). Chunking logic not yet implemented, so long texts may be truncated unless you split them with '\\n'.")
@@ -147,6 +170,7 @@ class KPipeline:
         if isinstance(text, str):
             text = re.split(split_pattern, text.strip()) if split_pattern else [text]
         for graphemes in text:
+            # TODO(misaki): Unify G2P interface between English and non-English
             if self.lang_code in 'ab':
                 _, tokens = self.g2p(graphemes)
                 for gs, ps in self.en_tokenize(tokens):
diff --git a/setup.py b/setup.py
index 7d10e33..fde49fd 100644
--- a/setup.py
+++ b/setup.py
@@ -1,10 +1,10 @@
 from setuptools import setup, find_packages
 
 setup(
-    name='kokoro',  # Name of the package
-    version='0.3.0',           # Initial version
-    packages=find_packages(),  # Automatically finds packages
-    install_requires=[         # List your dependencies here
+    name='kokoro',
+    version='0.3.1',
+    packages=find_packages(),
+    install_requires=[
         'huggingface_hub',
         'misaki[en]>=0.6.1',
         'numpy==1.26.4',
@@ -12,15 +12,15 @@ setup(
         'torch',
         'transformers',
     ],
-    python_requires='>=3.6',  # Minimum Python version required
+    python_requires='>=3.6',
     author='hexgrad',
     author_email='hello@hexgrad.com',
     description='TTS',
-    long_description=open('README.md').read(),  # Content from your README
-    long_description_content_type='text/markdown',  # Required for markdown
-    url='https://github.com/hexgrad/kokoro',  # GitHub repo URL
+    long_description=open('README.md').read(),
+    long_description_content_type='text/markdown',
+    url='https://github.com/hexgrad/kokoro',
     license='Apache 2.0',
-    classifiers=[  # This helps users discover your package
+    classifiers=[
         'Programming Language :: Python :: 3',
         'License :: OSI Approved :: Apache Software License',
         'Operating System :: OS Independent',