All checks were successful
Build ROCm Image / build (push) Successful in 4m56s
Without this flag HA buffers all audio until AudioStop before forwarding to the media player. With it, HA streams AudioChunk events to the player as they arrive, so playback starts on the first chunk rather than after the full text is synthesized. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
102 lines
3.1 KiB
Python
102 lines
3.1 KiB
Python
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, Optional
|
|
|
|
from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
VOICE_EXTENSIONS = {".wav", ".mp3", ".flac", ".ogg"}
|
|
|
|
|
|
def load_voices() -> Dict[str, str]:
|
|
"""Scan voice directories and return {voice_name: file_path} mapping."""
|
|
from config import get_predefined_voices_path, get_reference_audio_path
|
|
|
|
voices: Dict[str, str] = {}
|
|
|
|
def _scan_dir(directory: Path) -> None:
|
|
if not directory.exists():
|
|
return
|
|
for f in sorted(directory.iterdir()):
|
|
if f.suffix.lower() in VOICE_EXTENSIONS:
|
|
name = f.stem
|
|
if name not in voices:
|
|
voices[name] = str(f)
|
|
|
|
# Reference audio first so predefined voices take priority on collision
|
|
_scan_dir(get_reference_audio_path())
|
|
_scan_dir(get_predefined_voices_path())
|
|
|
|
logger.info(f"Discovered {len(voices)} voice(s): {list(voices.keys())}")
|
|
return voices
|
|
|
|
|
|
def resolve_voice(voice_name: Optional[str], voices: Dict[str, str]) -> Optional[str]:
|
|
"""Resolve a voice name to its audio file path."""
|
|
from config import get_predefined_voices_path, get_reference_audio_path, get_default_voice_id
|
|
|
|
if not voice_name:
|
|
default = get_default_voice_id()
|
|
voice_name = Path(default).stem
|
|
|
|
# Exact name match in discovered voices
|
|
if voice_name in voices:
|
|
return voices[voice_name]
|
|
|
|
# Try predefined voices dir with extensions
|
|
for ext in VOICE_EXTENSIONS:
|
|
p = get_predefined_voices_path() / f"{voice_name}{ext}"
|
|
if p.exists():
|
|
return str(p)
|
|
|
|
# Try reference audio dir with extensions
|
|
for ext in VOICE_EXTENSIONS:
|
|
p = get_reference_audio_path() / f"{voice_name}{ext}"
|
|
if p.exists():
|
|
return str(p)
|
|
|
|
# Fall back to any voice
|
|
if voices:
|
|
fallback = next(iter(voices.values()))
|
|
logger.warning(f"Voice '{voice_name}' not found, falling back to '{fallback}'")
|
|
return fallback
|
|
|
|
return None
|
|
|
|
|
|
def create_wyoming_info(sample_rate: int, voices: Dict[str, str]) -> Info:
|
|
"""Build the Wyoming Info object advertised to Home Assistant."""
|
|
tts_voices = [
|
|
TtsVoice(
|
|
name=name,
|
|
description=f"Chatterbox voice: {name}",
|
|
attribution=Attribution(
|
|
name="ResembleAI",
|
|
url="https://github.com/resemble-ai/chatterbox",
|
|
),
|
|
installed=True,
|
|
languages=["en"],
|
|
speakers=[TtsVoiceSpeaker(name=name)],
|
|
version=1,
|
|
)
|
|
for name in sorted(voices.keys())
|
|
]
|
|
|
|
return Info(
|
|
tts=[
|
|
TtsProgram(
|
|
name="chatterbox",
|
|
description="Chatterbox TTS with ROCm/AMD GPU support",
|
|
attribution=Attribution(
|
|
name="ResembleAI",
|
|
url="https://github.com/resemble-ai/chatterbox",
|
|
),
|
|
installed=True,
|
|
voices=tts_voices,
|
|
version="1.0",
|
|
supports_synthesize_streaming=True,
|
|
)
|
|
]
|
|
)
|