feat: add call history API endpoints and TTS service client

Adds read-only access to persisted call records for the dashboard and implements a client for the Rhema text-to-speech service. - api/call_history.py: New router providing paged call lists and detailed call records with transcript metadata. - services/tts.py: Async client for OpenAI-compatible TTS endpoints (Rhema/Kokoro) used for call-flow steps.
2026-05-22 06:28:33 -04:00
parent dbdb03beb9
commit 63f1a270bb
28 changed files with 2275 additions and 11 deletions
--- a/services/hold_slayer.py
+++ b/services/hold_slayer.py
@@ -24,6 +24,7 @@ from models.call_flow import ActionType, CallFlow, CallFlowStep
 from models.events import EventType, GatewayEvent
 from services.audio_classifier import AudioClassifier
 from services.transcription import TranscriptionService
+from services.tts import TTSService

 logger = logging.getLogger(__name__)

@@ -68,6 +69,7 @@ class HoldSlayerService:
        classifier: AudioClassifier,
        transcription: TranscriptionService,
        settings: Settings,
+        tts: Optional[TTSService] = None,
    ):
        self.gateway = gateway
        self.call_manager = call_manager
@@ -75,6 +77,7 @@ class HoldSlayerService:
        self.classifier = classifier
        self.transcription = transcription
        self.settings = settings
+        self.tts = tts

    async def run(
        self,
@@ -257,10 +260,7 @@ class HoldSlayerService:
                current_step_id = step.next_step

            elif step.action == ActionType.SPEAK:
-                # Say something into the call (TTS)
-                # TODO: Implement TTS integration
-                logger.info(f"🗣️ Would say: '{step.action_value}' (TTS not yet implemented)")
-                await asyncio.sleep(3.0)
+                await self._speak(call, sip_leg_id, step.action_value or "")
                current_step_id = step.next_step

            elif step.action == ActionType.TRANSFER:
@@ -715,3 +715,53 @@ class HoldSlayerService:
            logger.error(f"Failed to load call flow '{flow_id}': {e}")

        return None
+
+    async def _speak(self, call: ActiveCall, sip_leg_id: str, text: str) -> bool:
+        """
+        Synthesize `text` via TTS and play it into the call leg.
+
+        Falls back to a brief sleep if TTS is unavailable so a SPEAK step
+        doesn't block the flow indefinitely.
+        """
+        if not text.strip():
+            return False
+
+        if not self.tts or not getattr(self.gateway, "media_pipeline", None):
+            logger.warning(f"🗣️ TTS unavailable, skipping SPEAK: '{text[:60]}'")
+            await asyncio.sleep(2.0)
+            return False
+
+        import os
+        import tempfile
+
+        fd, tmp_path = tempfile.mkstemp(suffix=".wav", prefix=f"speak_{call.id}_")
+        os.close(fd)
+
+        try:
+            ok = await self.tts.synthesize_to_file(text, tmp_path)
+            if not ok:
+                logger.warning(f"🗣️ TTS synthesis returned no audio for: '{text[:60]}'")
+                return False
+
+            logger.info(f"🗣️ Speaking: '{text[:80]}'")
+            await self.gateway.media_pipeline.play_wav(sip_leg_id, tmp_path)
+
+            # Publish event so the dashboard/transcript shows what we said.
+            try:
+                await self.gateway.event_bus.publish(
+                    GatewayEvent(
+                        type=EventType.SPEAK_PLAYED,
+                        call_id=call.id,
+                        data={"text": text},
+                        message=f"Played TTS: {text[:80]}",
+                    )
+                )
+            except Exception:
+                pass
+
+            return True
+        finally:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass