feat: add call history API endpoints and TTS service client

Adds read-only access to persisted call records for the dashboard
and implements a client for the Rhema text-to-speech service.

- api/call_history.py: New router providing paged call lists
  and detailed call records with transcript metadata.
- services/tts.py: Async client for OpenAI-compatible TTS
  endpoints (Rhema/Kokoro) used for call-flow steps.
This commit is contained in:
2026-05-22 06:28:33 -04:00
parent dbdb03beb9
commit 63f1a270bb
28 changed files with 2275 additions and 11 deletions

View File

@@ -24,6 +24,7 @@ from models.call_flow import ActionType, CallFlow, CallFlowStep
from models.events import EventType, GatewayEvent
from services.audio_classifier import AudioClassifier
from services.transcription import TranscriptionService
from services.tts import TTSService
logger = logging.getLogger(__name__)
@@ -68,6 +69,7 @@ class HoldSlayerService:
classifier: AudioClassifier,
transcription: TranscriptionService,
settings: Settings,
tts: Optional[TTSService] = None,
):
self.gateway = gateway
self.call_manager = call_manager
@@ -75,6 +77,7 @@ class HoldSlayerService:
self.classifier = classifier
self.transcription = transcription
self.settings = settings
self.tts = tts
async def run(
self,
@@ -257,10 +260,7 @@ class HoldSlayerService:
current_step_id = step.next_step
elif step.action == ActionType.SPEAK:
# Say something into the call (TTS)
# TODO: Implement TTS integration
logger.info(f"🗣️ Would say: '{step.action_value}' (TTS not yet implemented)")
await asyncio.sleep(3.0)
await self._speak(call, sip_leg_id, step.action_value or "")
current_step_id = step.next_step
elif step.action == ActionType.TRANSFER:
@@ -715,3 +715,53 @@ class HoldSlayerService:
logger.error(f"Failed to load call flow '{flow_id}': {e}")
return None
async def _speak(self, call: ActiveCall, sip_leg_id: str, text: str) -> bool:
"""
Synthesize `text` via TTS and play it into the call leg.
Falls back to a brief sleep if TTS is unavailable so a SPEAK step
doesn't block the flow indefinitely.
"""
if not text.strip():
return False
if not self.tts or not getattr(self.gateway, "media_pipeline", None):
logger.warning(f"🗣️ TTS unavailable, skipping SPEAK: '{text[:60]}'")
await asyncio.sleep(2.0)
return False
import os
import tempfile
fd, tmp_path = tempfile.mkstemp(suffix=".wav", prefix=f"speak_{call.id}_")
os.close(fd)
try:
ok = await self.tts.synthesize_to_file(text, tmp_path)
if not ok:
logger.warning(f"🗣️ TTS synthesis returned no audio for: '{text[:60]}'")
return False
logger.info(f"🗣️ Speaking: '{text[:80]}'")
await self.gateway.media_pipeline.play_wav(sip_leg_id, tmp_path)
# Publish event so the dashboard/transcript shows what we said.
try:
await self.gateway.event_bus.publish(
GatewayEvent(
type=EventType.SPEAK_PLAYED,
call_id=call.id,
data={"text": text},
message=f"Played TTS: {text[:80]}",
)
)
except Exception:
pass
return True
finally:
try:
os.unlink(tmp_path)
except OSError:
pass