Files
rocm-chatterbox-whisper/wyoming_voices.py
scott 3d3e8bdabf
All checks were successful
Build ROCm Image / build (push) Successful in 4m56s
Add supports_synthesize_streaming=True to TtsProgram
Without this flag HA buffers all audio until AudioStop before forwarding
to the media player. With it, HA streams AudioChunk events to the player
as they arrive, so playback starts on the first chunk rather than after
the full text is synthesized.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 16:52:02 -04:00

102 lines
3.1 KiB
Python

import logging
from pathlib import Path
from typing import Dict, Optional
from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
logger = logging.getLogger(__name__)
VOICE_EXTENSIONS = {".wav", ".mp3", ".flac", ".ogg"}
def load_voices() -> Dict[str, str]:
"""Scan voice directories and return {voice_name: file_path} mapping."""
from config import get_predefined_voices_path, get_reference_audio_path
voices: Dict[str, str] = {}
def _scan_dir(directory: Path) -> None:
if not directory.exists():
return
for f in sorted(directory.iterdir()):
if f.suffix.lower() in VOICE_EXTENSIONS:
name = f.stem
if name not in voices:
voices[name] = str(f)
# Reference audio first so predefined voices take priority on collision
_scan_dir(get_reference_audio_path())
_scan_dir(get_predefined_voices_path())
logger.info(f"Discovered {len(voices)} voice(s): {list(voices.keys())}")
return voices
def resolve_voice(voice_name: Optional[str], voices: Dict[str, str]) -> Optional[str]:
"""Resolve a voice name to its audio file path."""
from config import get_predefined_voices_path, get_reference_audio_path, get_default_voice_id
if not voice_name:
default = get_default_voice_id()
voice_name = Path(default).stem
# Exact name match in discovered voices
if voice_name in voices:
return voices[voice_name]
# Try predefined voices dir with extensions
for ext in VOICE_EXTENSIONS:
p = get_predefined_voices_path() / f"{voice_name}{ext}"
if p.exists():
return str(p)
# Try reference audio dir with extensions
for ext in VOICE_EXTENSIONS:
p = get_reference_audio_path() / f"{voice_name}{ext}"
if p.exists():
return str(p)
# Fall back to any voice
if voices:
fallback = next(iter(voices.values()))
logger.warning(f"Voice '{voice_name}' not found, falling back to '{fallback}'")
return fallback
return None
def create_wyoming_info(sample_rate: int, voices: Dict[str, str]) -> Info:
"""Build the Wyoming Info object advertised to Home Assistant."""
tts_voices = [
TtsVoice(
name=name,
description=f"Chatterbox voice: {name}",
attribution=Attribution(
name="ResembleAI",
url="https://github.com/resemble-ai/chatterbox",
),
installed=True,
languages=["en"],
speakers=[TtsVoiceSpeaker(name=name)],
version=1,
)
for name in sorted(voices.keys())
]
return Info(
tts=[
TtsProgram(
name="chatterbox",
description="Chatterbox TTS with ROCm/AMD GPU support",
attribution=Attribution(
name="ResembleAI",
url="https://github.com/resemble-ai/chatterbox",
),
installed=True,
voices=tts_voices,
version="1.0",
supports_synthesize_streaming=True,
)
]
)