Adds read-only access to persisted call records for the dashboard and implements a client for the Rhema text-to-speech service. - api/call_history.py: New router providing paged call lists and detailed call records with transcript metadata. - services/tts.py: Async client for OpenAI-compatible TTS endpoints (Rhema/Kokoro) used for call-flow steps.
91 lines
2.8 KiB
Python
91 lines
2.8 KiB
Python
"""
|
|
TTS Service — Rhema (OpenAI-compatible) text-to-speech client.
|
|
|
|
Synthesizes speech for the SPEAK call-flow step and the AI Receptionist.
|
|
Rhema exposes POST /v1/audio/speech (OpenAI-compatible) with Kokoro voices.
|
|
"""
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
|
|
from config import TTSSettings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TTSService:
|
|
"""Client for Rhema TTS service."""
|
|
|
|
def __init__(self, settings: TTSSettings):
|
|
self.settings = settings
|
|
self._client: Optional[httpx.AsyncClient] = None
|
|
|
|
async def _get_client(self) -> httpx.AsyncClient:
|
|
if self._client is None or self._client.is_closed:
|
|
headers = {}
|
|
if self.settings.api_key:
|
|
headers["Authorization"] = f"Bearer {self.settings.api_key}"
|
|
self._client = httpx.AsyncClient(
|
|
base_url=self.settings.base_url,
|
|
timeout=httpx.Timeout(self.settings.timeout, connect=5.0),
|
|
headers=headers,
|
|
)
|
|
return self._client
|
|
|
|
async def synthesize(
|
|
self,
|
|
text: str,
|
|
voice: Optional[str] = None,
|
|
response_format: str = "wav",
|
|
) -> bytes:
|
|
"""Synthesize speech and return audio bytes."""
|
|
if not text or not text.strip():
|
|
return b""
|
|
|
|
client = await self._get_client()
|
|
body = {
|
|
"model": self.settings.model,
|
|
"input": text,
|
|
"voice": voice or self.settings.voice,
|
|
"response_format": response_format,
|
|
"sample_rate": self.settings.sample_rate,
|
|
}
|
|
|
|
try:
|
|
response = await client.post("/v1/audio/speech", json=body)
|
|
response.raise_for_status()
|
|
return response.content
|
|
except httpx.HTTPStatusError as e:
|
|
logger.error(f"Rhema TTS error: {e.response.status_code} {e.response.text}")
|
|
return b""
|
|
except httpx.ConnectError:
|
|
logger.error(f"Cannot connect to Rhema at {self.settings.base_url}")
|
|
return b""
|
|
except Exception as e:
|
|
logger.error(f"TTS synthesis failed: {e}")
|
|
return b""
|
|
|
|
async def synthesize_to_file(
|
|
self,
|
|
text: str,
|
|
filepath: str | Path,
|
|
voice: Optional[str] = None,
|
|
) -> bool:
|
|
"""Synthesize to a WAV file. Returns True on success."""
|
|
audio = await self.synthesize(text, voice=voice, response_format="wav")
|
|
if not audio:
|
|
return False
|
|
path = Path(filepath)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_bytes(audio)
|
|
logger.debug(f"TTS wrote {len(audio)} bytes to {path}")
|
|
return True
|
|
|
|
async def close(self) -> None:
|
|
if self._client and not self._client.is_closed:
|
|
await self._client.aclose()
|
|
self._client = None
|