Files
rocm-chatterbox-whisper/wyoming_voices.py
scott 4b21d6c252
Some checks failed
Build ROCm Image / build (push) Has been cancelled
Fix TtsVoice missing required version argument
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 11:52:14 -04:00

101 lines
3.1 KiB
Python

import logging
from pathlib import Path
from typing import Dict, Optional
from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
logger = logging.getLogger(__name__)
VOICE_EXTENSIONS = {".wav", ".mp3", ".flac", ".ogg"}
def load_voices() -> Dict[str, str]:
"""Scan voice directories and return {voice_name: file_path} mapping."""
from config import get_predefined_voices_path, get_reference_audio_path
voices: Dict[str, str] = {}
def _scan_dir(directory: Path) -> None:
if not directory.exists():
return
for f in sorted(directory.iterdir()):
if f.suffix.lower() in VOICE_EXTENSIONS:
name = f.stem
if name not in voices:
voices[name] = str(f)
# Reference audio first so predefined voices take priority on collision
_scan_dir(get_reference_audio_path())
_scan_dir(get_predefined_voices_path())
logger.info(f"Discovered {len(voices)} voice(s): {list(voices.keys())}")
return voices
def resolve_voice(voice_name: Optional[str], voices: Dict[str, str]) -> Optional[str]:
"""Resolve a voice name to its audio file path."""
from config import get_predefined_voices_path, get_reference_audio_path, get_default_voice_id
if not voice_name:
default = get_default_voice_id()
voice_name = Path(default).stem
# Exact name match in discovered voices
if voice_name in voices:
return voices[voice_name]
# Try predefined voices dir with extensions
for ext in VOICE_EXTENSIONS:
p = get_predefined_voices_path() / f"{voice_name}{ext}"
if p.exists():
return str(p)
# Try reference audio dir with extensions
for ext in VOICE_EXTENSIONS:
p = get_reference_audio_path() / f"{voice_name}{ext}"
if p.exists():
return str(p)
# Fall back to any voice
if voices:
fallback = next(iter(voices.values()))
logger.warning(f"Voice '{voice_name}' not found, falling back to '{fallback}'")
return fallback
return None
def create_wyoming_info(sample_rate: int, voices: Dict[str, str]) -> Info:
"""Build the Wyoming Info object advertised to Home Assistant."""
tts_voices = [
TtsVoice(
name=name,
description=f"Chatterbox voice: {name}",
attribution=Attribution(
name="ResembleAI",
url="https://github.com/resemble-ai/chatterbox",
),
installed=True,
languages=["en"],
speakers=[TtsVoiceSpeaker(name=name)],
version=1,
)
for name in sorted(voices.keys())
]
return Info(
tts=[
TtsProgram(
name="chatterbox",
description="Chatterbox TTS with ROCm/AMD GPU support",
attribution=Attribution(
name="ResembleAI",
url="https://github.com/resemble-ai/chatterbox",
),
installed=True,
voices=tts_voices,
version="1.0",
)
]
)