Initial implementation: Chatterbox TTS with ROCm and Wyoming

Wyoming-only server built around the official chatterbox TTS model. Includes ROCm/AMD GPU support, sentence-level streaming, config.yaml management, and Gitea CI for container builds. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 09:51:09 -04:00
parent 4b15e44181
commit 16ea2853f5
12 changed files with 691 additions and 0 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,35 @@
+services:
+  chatterbox-whisper:
+    image: git.sdgarren.com/scott/rocm-chatterbox-whisper:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.rocm
+    restart: unless-stopped
+    ports:
+      - "${WYOMING_PORT:-10200}:10200"
+    devices:
+      - /dev/kfd
+      - /dev/dri
+    group_add:
+      - video
+      - render
+    ipc: host
+    shm_size: 8g
+    security_opt:
+      - seccomp=unconfined
+    volumes:
+      - ./config.yaml:/app/config.yaml
+      - ./voices:/app/voices
+      - ./reference_audio:/app/reference_audio
+      - hf_cache:/app/hf_cache
+    environment:
+      - HF_HUB_ENABLE_HF_TRANSFER=1
+      # Set your GPU architecture:
+      #   10.3.0 = RX 5000/6000 series
+      #   11.0.0 = RX 7000 series
+      #   9.0.6  = Vega
+      - HSA_OVERRIDE_GFX_VERSION=10.3.0
+      # - HF_TOKEN=your_token_here
+
+volumes:
+  hf_cache: