diff --git a/wyoming_handler.py b/wyoming_handler.py
index bcb96a7..3d573a4 100644
--- a/wyoming_handler.py
+++ b/wyoming_handler.py
@@ -135,21 +135,26 @@ class ChatterboxWyomingHandler(AsyncEventHandler):
                 continue
 
             audio_np = audio_tensor.cpu().numpy().squeeze()
-            audio_bytes = (audio_np * 32767).clip(-32768, 32767).astype(np.int16).tobytes()
+            audio_int16 = (audio_np * 32767).clip(-32768, 32767).astype(np.int16)
 
             if first_chunk:
                 ttfa = time.monotonic() - start_time
                 logger.info(f"Time to first audio: {ttfa:.3f}s")
                 first_chunk = False
 
-            await self.write_event(
-                AudioChunk(
-                    audio=audio_bytes,
-                    rate=sample_rate,
-                    width=2,
-                    channels=1,
-                ).event()
-            )
+            # Send in small sub-chunks so HA can begin playback immediately
+            # rather than waiting for the full audio blob to arrive.
+            audio_chunk_samples = 4096
+            for offset in range(0, len(audio_int16), audio_chunk_samples):
+                sub = audio_int16[offset : offset + audio_chunk_samples]
+                await self.write_event(
+                    AudioChunk(
+                        audio=sub.tobytes(),
+                        rate=sample_rate,
+                        width=2,
+                        channels=1,
+                    ).event()
+                )
 
         await self.write_event(AudioStop().event())
         total = time.monotonic() - start_time