Initial implementation: Chatterbox TTS with ROCm and Wyoming

Wyoming-only server built around the official chatterbox TTS model. Includes ROCm/AMD GPU support, sentence-level streaming, config.yaml management, and Gitea CI for container builds. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 09:51:09 -04:00
parent 4b15e44181
commit 16ea2853f5
12 changed files with 691 additions and 0 deletions
--- a/config.yaml
+++ b/config.yaml
@@ -0,0 +1,29 @@
+model:
+  # Options: chatterbox, chatterbox-turbo
+  repo_id: chatterbox-turbo
+
+tts_engine:
+  # Device: cuda, cpu, or leave empty for auto-detect
+  device: ""
+  predefined_voices_path: voices
+  reference_audio_path: reference_audio
+  # Fallback voice (stem name, e.g. "default" matches default.wav)
+  default_voice_id: default.wav
+
+generation_defaults:
+  # Turbo model: uses temperature only (exaggeration/cfg_weight ignored)
+  # Standard model: uses exaggeration and cfg_weight (temperature ignored)
+  temperature: 0.8
+  exaggeration: 0.5
+  cfg_weight: 0.5
+  # seed: 0 = random each call, >0 = reproducible output
+  seed: 0
+
+wyoming:
+  host: "0.0.0.0"
+  port: 10200
+  # Max characters per synthesis chunk (split at sentence boundaries)
+  chunk_size: 300
+
+paths:
+  model_cache: /app/hf_cache