Split text into sentences to stream audio chunk-by-chunk

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 19:41:19 -04:00
parent 067a853d3b
commit 83aa2ec08c
1 changed files with 19 additions and 10 deletions
--- a/server.py
+++ b/server.py
@@ -4,6 +4,7 @@
 import argparse
 import asyncio
 import logging
+import re
 import sys
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
@@ -22,6 +23,8 @@ from wyoming.tts import Synthesize
 _LOGGER = logging.getLogger(__name__)

 KOKORO_SAMPLE_RATE = 24000
+
+_SENTENCE_RE = re.compile(r'(?<=[.!?])\s+')
 SAMPLE_WIDTH = 2   # 16-bit PCM
 CHANNELS = 1

@@ -125,16 +128,22 @@ class KokoroEventHandler(AsyncEventHandler):
                chunk_count = 0
                try:
                    _LOGGER.debug("Pipeline thread started")
-                    for _, _, audio in self.pipeline(text, voice=voice_name, speed=speed):
-                        if audio is None:
-                            continue
-                        # float32 [-1, 1] → int16
-                        audio_np = audio.cpu().numpy() if hasattr(audio, 'cpu') else audio
-                        pcm = (np.clip(audio_np, -1.0, 1.0) * 32767).astype(np.int16)
-                        chunk_count += 1
-                        _LOGGER.debug("Queueing chunk %d (%d bytes)", chunk_count, len(pcm.tobytes()))
-                        fut = asyncio.run_coroutine_threadsafe(chunk_queue.put(pcm.tobytes()), loop)
-                        fut.result()  # propagate any queue errors
+                    sentences = [s for s in _SENTENCE_RE.split(text.strip()) if s]
+                    if not sentences:
+                        sentences = [text]
+                    _LOGGER.debug("Split into %d sentence(s)", len(sentences))
+                    for sentence in sentences:
+                        _LOGGER.debug("Synthesizing: %r", sentence[:60])
+                        for _, _, audio in self.pipeline(sentence, voice=voice_name, speed=speed):
+                            if audio is None:
+                                continue
+                            # float32 [-1, 1] → int16
+                            audio_np = audio.cpu().numpy() if hasattr(audio, 'cpu') else audio
+                            pcm = (np.clip(audio_np, -1.0, 1.0) * 32767).astype(np.int16)
+                            chunk_count += 1
+                            _LOGGER.debug("Queueing chunk %d (%d bytes)", chunk_count, len(pcm.tobytes()))
+                            fut = asyncio.run_coroutine_threadsafe(chunk_queue.put(pcm.tobytes()), loop)
+                            fut.result()  # propagate any queue errors
                    _LOGGER.debug("Pipeline finished, %d chunks generated", chunk_count)
                except Exception as exc:
                    _LOGGER.exception("Pipeline thread error")