Split text into sentences to stream audio chunk-by-chunk
All checks were successful
Build and Push Docker Image / build (push) Successful in 2m15s

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-08 19:41:19 -04:00
parent 067a853d3b
commit 83aa2ec08c

View File

@@ -4,6 +4,7 @@
import argparse import argparse
import asyncio import asyncio
import logging import logging
import re
import sys import sys
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from functools import partial from functools import partial
@@ -22,6 +23,8 @@ from wyoming.tts import Synthesize
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
KOKORO_SAMPLE_RATE = 24000 KOKORO_SAMPLE_RATE = 24000
_SENTENCE_RE = re.compile(r'(?<=[.!?])\s+')
SAMPLE_WIDTH = 2 # 16-bit PCM SAMPLE_WIDTH = 2 # 16-bit PCM
CHANNELS = 1 CHANNELS = 1
@@ -125,7 +128,13 @@ class KokoroEventHandler(AsyncEventHandler):
chunk_count = 0 chunk_count = 0
try: try:
_LOGGER.debug("Pipeline thread started") _LOGGER.debug("Pipeline thread started")
for _, _, audio in self.pipeline(text, voice=voice_name, speed=speed): sentences = [s for s in _SENTENCE_RE.split(text.strip()) if s]
if not sentences:
sentences = [text]
_LOGGER.debug("Split into %d sentence(s)", len(sentences))
for sentence in sentences:
_LOGGER.debug("Synthesizing: %r", sentence[:60])
for _, _, audio in self.pipeline(sentence, voice=voice_name, speed=speed):
if audio is None: if audio is None:
continue continue
# float32 [-1, 1] → int16 # float32 [-1, 1] → int16