rocm-chatterbox-whisper/wyoming_voices.py

import logging
from pathlib import Path
from typing import Dict, Optional

from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker

logger = logging.getLogger(__name__)

VOICE_EXTENSIONS = {".wav", ".mp3", ".flac", ".ogg"}


def load_voices() -> Dict[str, str]:
    """Scan voice directories and return {voice_name: file_path} mapping."""
    from config import get_predefined_voices_path, get_reference_audio_path

    voices: Dict[str, str] = {}

    def _scan_dir(directory: Path) -> None:
        if not directory.exists():
            return
        for f in sorted(directory.iterdir()):
            if f.suffix.lower() in VOICE_EXTENSIONS:
                name = f.stem
                if name not in voices:
                    voices[name] = str(f)

    # Reference audio first so predefined voices take priority on collision
    _scan_dir(get_reference_audio_path())
    _scan_dir(get_predefined_voices_path())

    logger.info(f"Discovered {len(voices)} voice(s): {list(voices.keys())}")
    return voices


def resolve_voice(voice_name: Optional[str], voices: Dict[str, str]) -> Optional[str]:
    """Resolve a voice name to its audio file path."""
    from config import get_predefined_voices_path, get_reference_audio_path, get_default_voice_id

    if not voice_name:
        default = get_default_voice_id()
        voice_name = Path(default).stem

    # Exact name match in discovered voices
    if voice_name in voices:
        return voices[voice_name]

    # Try predefined voices dir with extensions
    for ext in VOICE_EXTENSIONS:
        p = get_predefined_voices_path() / f"{voice_name}{ext}"
        if p.exists():
            return str(p)

    # Try reference audio dir with extensions
    for ext in VOICE_EXTENSIONS:
        p = get_reference_audio_path() / f"{voice_name}{ext}"
        if p.exists():
            return str(p)

    # Fall back to any voice
    if voices:
        fallback = next(iter(voices.values()))
        logger.warning(f"Voice '{voice_name}' not found, falling back to '{fallback}'")
        return fallback

    return None


def create_wyoming_info(sample_rate: int, voices: Dict[str, str]) -> Info:
    """Build the Wyoming Info object advertised to Home Assistant."""
    tts_voices = [
        TtsVoice(
            name=name,
            description=f"Chatterbox voice: {name}",
            attribution=Attribution(
                name="ResembleAI",
                url="https://github.com/resemble-ai/chatterbox",
            ),
            installed=True,
            languages=["en"],
            speakers=[TtsVoiceSpeaker(name=name)],
            version=1,
        )
        for name in sorted(voices.keys())
    ]

    return Info(
        tts=[
            TtsProgram(
                name="chatterbox",
                description="Chatterbox TTS with ROCm/AMD GPU support",
                attribution=Attribution(
                    name="ResembleAI",
                    url="https://github.com/resemble-ai/chatterbox",
                ),
                installed=True,
                voices=tts_voices,
                version="1.0",
                supports_synthesize_streaming=True,
            )
        ]
    )