Files
kokoro/examples/phoneme_example.py

62 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from kokoro import KPipeline, KModel
import torch
from scipy.io import wavfile
def save_audio(audio: torch.Tensor, filename: str):
"""Helper function to save audio tensor as WAV file"""
if audio is not None:
# Ensure audio is on CPU and in the right format
audio_cpu = audio.cpu().numpy()
# Save using scipy.io.wavfile
wavfile.write(
filename,
24000, # Kokoro uses 24kHz sample rate
audio_cpu
)
print(f"Audio saved as '{filename}'")
else:
print("No audio was generated")
def main():
# Initialize pipeline with American English
pipeline = KPipeline(lang_code='a')
# The phoneme string for:
# "How are you today? I am doing reasonably well, thank you for asking"
phonemes = "hˌW ɑɹ ju tədˈA? ˌI ɐm dˈuɪŋ ɹˈizənəbli wˈɛl, θˈæŋk ju fɔɹ ˈæskɪŋ"
try:
print("\nExample 1: Using generate_from_tokens with raw phonemes")
results = list(pipeline.generate_from_tokens(
tokens=phonemes,
voice="af_bella",
speed=1.0
))
if results:
save_audio(results[0].audio, 'phoneme_output_new.wav')
# Example 2: Using generate_from_tokens with pre-processed tokens
print("\nExample 2: Using generate_from_tokens with pre-processed tokens")
# get the tokens through G2P or any other method
text = "How are you today? I am doing reasonably well, thank you for asking"
_, tokens = pipeline.g2p(text)
# Then generate from tokens
for result in pipeline.generate_from_tokens(
tokens=tokens,
voice="af_bella",
speed=1.0
):
# Each result may contain timestamps if available
if result.tokens:
for token in result.tokens:
if hasattr(token, 'start_ts') and hasattr(token, 'end_ts'):
print(f"Token: {token.text} ({token.start_ts:.2f}s - {token.end_ts:.2f}s)")
save_audio(result.audio, f'token_output_{hash(result.phonemes)}.wav')
except Exception as e:
print(f"An error occurred: {str(e)}")
if __name__ == "__main__":
main()