diff --git a/wyoming_handler.py b/wyoming_handler.py index bcb96a7..3d573a4 100644 --- a/wyoming_handler.py +++ b/wyoming_handler.py @@ -135,21 +135,26 @@ class ChatterboxWyomingHandler(AsyncEventHandler): continue audio_np = audio_tensor.cpu().numpy().squeeze() - audio_bytes = (audio_np * 32767).clip(-32768, 32767).astype(np.int16).tobytes() + audio_int16 = (audio_np * 32767).clip(-32768, 32767).astype(np.int16) if first_chunk: ttfa = time.monotonic() - start_time logger.info(f"Time to first audio: {ttfa:.3f}s") first_chunk = False - await self.write_event( - AudioChunk( - audio=audio_bytes, - rate=sample_rate, - width=2, - channels=1, - ).event() - ) + # Send in small sub-chunks so HA can begin playback immediately + # rather than waiting for the full audio blob to arrive. + audio_chunk_samples = 4096 + for offset in range(0, len(audio_int16), audio_chunk_samples): + sub = audio_int16[offset : offset + audio_chunk_samples] + await self.write_event( + AudioChunk( + audio=sub.tobytes(), + rate=sample_rate, + width=2, + channels=1, + ).event() + ) await self.write_event(AudioStop().event()) total = time.monotonic() - start_time