feat: add call history API endpoints and TTS service client

Adds read-only access to persisted call records for the dashboard and implements a client for the Rhema text-to-speech service. - api/call_history.py: New router providing paged call lists and detailed call records with transcript metadata. - services/tts.py: Async client for OpenAI-compatible TTS endpoints (Rhema/Kokoro) used for call-flow steps.
2026-05-22 06:28:33 -04:00
parent dbdb03beb9
commit 63f1a270bb
28 changed files with 2275 additions and 11 deletions
--- a/services/call_persistence.py
+++ b/services/call_persistence.py
@@ -0,0 +1,70 @@
+"""
+Call Persistence — Writes completed calls and their transcript chunks
+to the database when CallManager.end_call() fires.
+"""
+
+import logging
+import uuid
+from datetime import datetime
+
+from db.database import CallRecord, TranscriptChunk, get_session_factory
+from models.call import ActiveCall, CallStatus
+
+logger = logging.getLogger(__name__)
+
+
+async def persist_call_on_end(call: ActiveCall, final_status: CallStatus) -> None:
+    """Insert a CallRecord and any transcript chunks for `call`.
+
+    Wired into CallManager via _on_call_ended in gateway.start().
+    """
+    try:
+        async with get_session_factory()() as session:
+            record = CallRecord(
+                id=call.id,
+                direction=call.direction,
+                remote_number=call.remote_number,
+                status=final_status.value,
+                mode=call.mode.value,
+                intent=call.intent,
+                started_at=call.started_at,
+                ended_at=datetime.now(),
+                duration=int(call.duration),
+                hold_time=int(call.hold_time),
+                device_used=call.device,
+                call_flow_id=call.call_flow_id,
+                classification_timeline=[
+                    {
+                        "timestamp": c.timestamp,
+                        "audio_type": c.audio_type.value,
+                        "confidence": c.confidence,
+                    }
+                    for c in call.classification_history
+                ],
+                metadata_={"services": list(call.services)},
+            )
+            session.add(record)
+
+            # Each transcript chunk gets its own row with a sequence number
+            # so the dashboard can render them in order with click-to-seek.
+            for seq, text in enumerate(call.transcript_chunks):
+                speaker = "unknown"
+                payload = text
+                if ":" in text:
+                    head, rest = text.split(":", 1)
+                    head = head.strip().lower()
+                    if head in {"caller", "agent", "receptionist", "caller_message"}:
+                        speaker = head if head != "caller_message" else "caller"
+                        payload = rest.strip()
+                session.add(TranscriptChunk(
+                    id=f"tc_{uuid.uuid4().hex[:10]}",
+                    call_id=call.id,
+                    seq=seq,
+                    t_offset_ms=0,
+                    speaker=speaker,
+                    text=payload,
+                ))
+
+            await session.commit()
+    except Exception as e:
+        logger.warning(f"Could not persist call {call.id}: {e}")
--- a/services/hold_slayer.py
+++ b/services/hold_slayer.py
@@ -24,6 +24,7 @@ from models.call_flow import ActionType, CallFlow, CallFlowStep
 from models.events import EventType, GatewayEvent
 from services.audio_classifier import AudioClassifier
 from services.transcription import TranscriptionService
+from services.tts import TTSService

 logger = logging.getLogger(__name__)

@@ -68,6 +69,7 @@ class HoldSlayerService:
        classifier: AudioClassifier,
        transcription: TranscriptionService,
        settings: Settings,
+        tts: Optional[TTSService] = None,
    ):
        self.gateway = gateway
        self.call_manager = call_manager
@@ -75,6 +77,7 @@ class HoldSlayerService:
        self.classifier = classifier
        self.transcription = transcription
        self.settings = settings
+        self.tts = tts

    async def run(
        self,
@@ -257,10 +260,7 @@ class HoldSlayerService:
                current_step_id = step.next_step

            elif step.action == ActionType.SPEAK:
-                # Say something into the call (TTS)
-                # TODO: Implement TTS integration
-                logger.info(f"🗣️ Would say: '{step.action_value}' (TTS not yet implemented)")
-                await asyncio.sleep(3.0)
+                await self._speak(call, sip_leg_id, step.action_value or "")
                current_step_id = step.next_step

            elif step.action == ActionType.TRANSFER:
@@ -715,3 +715,53 @@ class HoldSlayerService:
            logger.error(f"Failed to load call flow '{flow_id}': {e}")

        return None
+
+    async def _speak(self, call: ActiveCall, sip_leg_id: str, text: str) -> bool:
+        """
+        Synthesize `text` via TTS and play it into the call leg.
+
+        Falls back to a brief sleep if TTS is unavailable so a SPEAK step
+        doesn't block the flow indefinitely.
+        """
+        if not text.strip():
+            return False
+
+        if not self.tts or not getattr(self.gateway, "media_pipeline", None):
+            logger.warning(f"🗣️ TTS unavailable, skipping SPEAK: '{text[:60]}'")
+            await asyncio.sleep(2.0)
+            return False
+
+        import os
+        import tempfile
+
+        fd, tmp_path = tempfile.mkstemp(suffix=".wav", prefix=f"speak_{call.id}_")
+        os.close(fd)
+
+        try:
+            ok = await self.tts.synthesize_to_file(text, tmp_path)
+            if not ok:
+                logger.warning(f"🗣️ TTS synthesis returned no audio for: '{text[:60]}'")
+                return False
+
+            logger.info(f"🗣️ Speaking: '{text[:80]}'")
+            await self.gateway.media_pipeline.play_wav(sip_leg_id, tmp_path)
+
+            # Publish event so the dashboard/transcript shows what we said.
+            try:
+                await self.gateway.event_bus.publish(
+                    GatewayEvent(
+                        type=EventType.SPEAK_PLAYED,
+                        call_id=call.id,
+                        data={"text": text},
+                        message=f"Played TTS: {text[:80]}",
+                    )
+                )
+            except Exception:
+                pass
+
+            return True
+        finally:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
--- a/services/receptionist.py
+++ b/services/receptionist.py
@@ -0,0 +1,345 @@
+"""
+AI Receptionist — Screens inbound calls, then routes or takes a message.
+
+State machine:
+  GREET    → TTS greeting plays into the call leg
+  LISTEN   → buffer audio from the leg's tap until end-of-utterance
+  CLASSIFY → LLM extracts intent, urgency, recommended action
+  DECIDE   → combine LLM recommendation with the routing decision
+             (rules win on conflict)
+  RING     → ring_chain devices; bridge on pickup
+  RECORD   → TTS prompt + WAV record up to message_max_seconds; transcribe
+             and notify
+"""
+
+import asyncio
+import logging
+import time as _time
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+from models.call import ActiveCall, CallStatus
+from models.events import EventType, GatewayEvent
+from models.routing import RoutingAction, RoutingActionType, RoutingDecision
+
+logger = logging.getLogger(__name__)
+
+
+class ReceptionistService:
+    """Drives the receptionist state machine for a single inbound call."""
+
+    def __init__(self, gateway):
+        self.gateway = gateway
+        self.settings = gateway.settings.receptionist
+
+    async def handle(
+        self,
+        call: ActiveCall,
+        sip_leg_id: str,
+        routing_decision: Optional[RoutingDecision] = None,
+    ) -> None:
+        """Run the full receptionist flow for an inbound call."""
+        try:
+            await self._greet(call, sip_leg_id)
+
+            transcript = await self._listen(call, sip_leg_id)
+            if transcript:
+                call.transcript_chunks.append(f"caller: {transcript}")
+                await self.gateway.event_bus.publish(GatewayEvent(
+                    type=EventType.TRANSCRIPT_CHUNK,
+                    call_id=call.id,
+                    data={"text": transcript, "speaker": "caller"},
+                    message=f"📝 caller: {transcript[:80]}",
+                ))
+
+            classification = await self._classify(call, transcript, routing_decision)
+            call.intent = classification.get("intent")
+
+            await self.gateway.event_bus.publish(GatewayEvent(
+                type=EventType.RECEPTIONIST_CAPTURED_INTENT,
+                call_id=call.id,
+                data=classification,
+                message=f"Intent: {classification.get('intent', '?')}",
+            ))
+
+            action = self._decide(routing_decision, classification)
+
+            await self.gateway.event_bus.publish(GatewayEvent(
+                type=EventType.RECEPTIONIST_ROUTING,
+                call_id=call.id,
+                data={"action": action.type.value},
+                message=f"Routing decision: {action.type.value}",
+            ))
+
+            if action.type in (RoutingActionType.REJECT, RoutingActionType.DND):
+                if action.message:
+                    await self._speak(call, sip_leg_id, action.message)
+                await self._hangup(call, sip_leg_id)
+                return
+
+            if action.type in (RoutingActionType.RING_DEVICE, RoutingActionType.RING_CHAIN):
+                devices = self._resolve_device_list(action, classification)
+                if not devices:
+                    logger.info("Receptionist: no devices to ring, falling back to message")
+                    await self._take_message(call, sip_leg_id)
+                    return
+
+                await self._speak(
+                    call, sip_leg_id, "One moment, I'll connect you now."
+                )
+                answered = await self.gateway._routing.ring_chain(
+                    call.id, devices, action.ring_timeout
+                )
+                if answered:
+                    return  # Bridged to a device — receptionist done
+                # Nobody home — take a message
+                await self._take_message(call, sip_leg_id)
+                return
+
+            # Default: take a message
+            await self._take_message(call, sip_leg_id)
+
+        except Exception as e:
+            logger.error(f"Receptionist failed for {call.id}: {e}", exc_info=True)
+            try:
+                await self._hangup(call, sip_leg_id)
+            except Exception:
+                pass
+
+    # ----------------------------------------------------------------
+    # State machine steps
+    # ----------------------------------------------------------------
+
+    async def _greet(self, call: ActiveCall, sip_leg_id: str) -> None:
+        await self.gateway.event_bus.publish(GatewayEvent(
+            type=EventType.RECEPTIONIST_GREETING,
+            call_id=call.id,
+            data={"text": self.settings.greeting_template},
+            message="Playing greeting",
+        ))
+        await self._speak(call, sip_leg_id, self.settings.greeting_template)
+
+    async def _listen(self, call: ActiveCall, sip_leg_id: str) -> str:
+        """Buffer audio from the call's tap until silence or timeout."""
+        await self.gateway.event_bus.publish(GatewayEvent(
+            type=EventType.RECEPTIONIST_LISTENING,
+            call_id=call.id,
+            message="Listening for caller",
+        ))
+
+        media = self.gateway.media_pipeline
+        if media is None:
+            return ""
+
+        tap = media.create_tap(sip_leg_id)
+        audio = bytearray()
+        deadline = _time.monotonic() + self.settings.listen_timeout_s
+        silent_for = 0.0
+        frame_ms = 20
+
+        try:
+            while _time.monotonic() < deadline:
+                remaining = max(0.05, deadline - _time.monotonic())
+                frame = await tap.read_frame(timeout=min(0.5, remaining))
+                if frame is None:
+                    silent_for += 0.5
+                else:
+                    audio.extend(frame)
+                    if self._frame_is_silent(frame):
+                        silent_for += frame_ms / 1000.0
+                    else:
+                        silent_for = 0.0
+                if silent_for >= self.settings.end_of_utterance_silence_s and audio:
+                    break
+        finally:
+            tap.close()
+
+        if not audio:
+            return ""
+
+        return await self.gateway._transcription.transcribe(bytes(audio))
+
+    async def _classify(
+        self,
+        call: ActiveCall,
+        transcript: str,
+        routing_decision: Optional[RoutingDecision],
+    ) -> dict:
+        """Ask the LLM to interpret the caller's utterance."""
+        from services.hold_slayer import _get_llm
+
+        llm = _get_llm()
+        if llm is None or not transcript.strip():
+            return {
+                "intent": transcript or "unknown",
+                "urgency": "normal",
+                "recommended_action": "ring",
+                "device_hint": None,
+            }
+
+        rules_summary = ""
+        if routing_decision and routing_decision.matched_rule_name:
+            rules_summary = (
+                f"A routing rule already matched: '{routing_decision.matched_rule_name}' "
+                f"(action: {routing_decision.action.type.value})."
+            )
+
+        try:
+            return await llm.chat_json(
+                user_message=(
+                    f"Caller: {call.remote_number}\n"
+                    f"Transcript: {transcript}\n"
+                    f"{rules_summary}\n\n"
+                    "Return JSON with keys: intent (short string), "
+                    "urgency (low|normal|high), "
+                    "recommended_action (ring|message|reject), "
+                    "device_hint (string or null)."
+                ),
+                system=self.settings.llm_persona,
+            )
+        except Exception as e:
+            logger.warning(f"Receptionist LLM classify failed: {e}")
+            return {
+                "intent": transcript,
+                "urgency": "normal",
+                "recommended_action": "ring",
+                "device_hint": None,
+            }
+
+    def _decide(
+        self,
+        routing_decision: Optional[RoutingDecision],
+        classification: dict,
+    ) -> RoutingAction:
+        """Rules win on conflict; otherwise use the LLM's recommendation."""
+        if routing_decision and routing_decision.action.type not in (
+            RoutingActionType.TAKE_MESSAGE,
+        ):
+            return routing_decision.action
+
+        recommended = (classification.get("recommended_action") or "ring").lower()
+        if recommended == "reject":
+            return RoutingAction(type=RoutingActionType.REJECT,
+                                 message="Sorry, I can't connect that call right now.")
+        if recommended == "message":
+            return RoutingAction(type=RoutingActionType.TAKE_MESSAGE)
+        return RoutingAction(type=RoutingActionType.RING_CHAIN)
+
+    def _resolve_device_list(
+        self, action: RoutingAction, classification: dict
+    ) -> list[str]:
+        if action.type == RoutingActionType.RING_DEVICE and action.device_id:
+            return [action.device_id]
+        if action.device_ids:
+            return action.device_ids
+        # Default chain: every device that can take a call, in priority order
+        devices = sorted(
+            (d for d in self.gateway.devices.values() if d.can_receive_call),
+            key=lambda d: d.priority,
+        )
+        return [d.id for d in devices]
+
+    async def _take_message(self, call: ActiveCall, sip_leg_id: str) -> None:
+        await self._speak(call, sip_leg_id, self.settings.message_prompt)
+
+        media = self.gateway.media_pipeline
+        recording_svc = getattr(self.gateway, "_recording_service", None)
+        if recording_svc is None or media is None:
+            logger.warning("Receptionist: recording unavailable, ending call")
+            await self._hangup(call, sip_leg_id)
+            return
+
+        # RecordingService writes a WAV file and the recordings row.
+        session = await recording_svc.start_recording(
+            call.id, media_pipeline=media, leg_ids=[sip_leg_id]
+        )
+        try:
+            await asyncio.sleep(self.settings.message_max_seconds)
+        finally:
+            session = await recording_svc.stop_recording(
+                call.id, media_pipeline=media
+            )
+
+        message_text = ""
+        rec_path = session.filepath_mixed if session else None
+        if rec_path and Path(rec_path).exists():
+            try:
+                audio_bytes = Path(rec_path).read_bytes()
+                message_text = await self.gateway._transcription.transcribe(audio_bytes)
+            except Exception as e:
+                logger.warning(f"Receptionist transcribe failed: {e}")
+
+        if message_text:
+            call.transcript_chunks.append(f"caller_message: {message_text}")
+
+        await self.gateway.event_bus.publish(GatewayEvent(
+            type=EventType.RECEPTIONIST_MESSAGE_SAVED,
+            call_id=call.id,
+            data={
+                "path": rec_path,
+                "transcript": message_text,
+                "caller": call.remote_number,
+            },
+            message=f"📥 Message saved from {call.remote_number}",
+        ))
+
+        await self._hangup(call, sip_leg_id)
+
+    # ----------------------------------------------------------------
+    # Helpers
+    # ----------------------------------------------------------------
+
+    async def _speak(self, call: ActiveCall, sip_leg_id: str, text: str) -> None:
+        tts = self.gateway._tts
+        media = self.gateway.media_pipeline
+        if tts is None or media is None or not text.strip():
+            return
+
+        import os
+        import tempfile
+
+        fd, tmp_path = tempfile.mkstemp(suffix=".wav", prefix=f"recept_{call.id}_")
+        os.close(fd)
+        try:
+            ok = await tts.synthesize_to_file(text, tmp_path)
+            if not ok:
+                return
+            await media.play_wav(sip_leg_id, tmp_path)
+            await self.gateway.event_bus.publish(GatewayEvent(
+                type=EventType.SPEAK_PLAYED,
+                call_id=call.id,
+                data={"text": text, "speaker": "receptionist"},
+                message=f"🗣️ {text[:80]}",
+            ))
+        finally:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+
+    async def _hangup(self, call: ActiveCall, sip_leg_id: str) -> None:
+        try:
+            await self.gateway.sip_engine.hangup(sip_leg_id)
+        except Exception as e:
+            logger.warning(f"Receptionist hangup failed: {e}")
+        await self.gateway.call_manager.end_call(call.id, CallStatus.COMPLETED)
+
+    @staticmethod
+    def _frame_is_silent(frame: bytes, threshold: int = 500) -> bool:
+        """Crude RMS-style check on a 16-bit PCM frame (mono, signed LE)."""
+        if not frame or len(frame) < 2:
+            return True
+        # Inline RMS — `audioop` was removed in Python 3.13.
+        import struct
+
+        n = len(frame) // 2
+        if n == 0:
+            return True
+        samples = struct.unpack_from(f"<{n}h", frame)
+        sq = 0
+        for s in samples:
+            sq += s * s
+        rms = (sq / n) ** 0.5
+        return rms < threshold
--- a/services/recording.py
+++ b/services/recording.py
@@ -138,6 +138,9 @@ class RecordingService:
        # Store metadata
        self._metadata.append(session.to_dict())

+        # Persist a recording row so the dashboard can find it later
+        await self._persist_recording(session)
+
        logger.info(
            f"⏹ Recording stopped: {call_id} "
            f"({session.duration_seconds}s, "
@@ -145,6 +148,29 @@ class RecordingService:
        )
        return session

+    @staticmethod
+    async def _persist_recording(session: "RecordingSession") -> None:
+        """Write a recordings row for this session. Failures are non-fatal."""
+        try:
+            import uuid as _uuid
+            from db.database import RecordingRecord, get_session_factory
+
+            async with get_session_factory()() as db:
+                db.add(RecordingRecord(
+                    id=f"rec_{_uuid.uuid4().hex[:10]}",
+                    call_id=session.call_id,
+                    path=session.filepath_mixed or "",
+                    format="wav",
+                    duration_s=float(session.duration_seconds or 0),
+                    size_bytes=int(session.file_size_bytes or 0),
+                    channels=1,
+                    started_at=session.started_at,
+                    ended_at=session.stopped_at,
+                ))
+                await db.commit()
+        except Exception as e:
+            logger.warning(f"Recording persistence failed: {e}")
+
    async def _recording_timeout(self, call_id: str) -> None:
        """Auto-stop recording after max duration."""
        await asyncio.sleep(self._max_recording_seconds)
--- a/services/routing.py
+++ b/services/routing.py
@@ -0,0 +1,258 @@
+"""
+Routing Service — Smart routing for inbound calls.
+
+Evaluates `RoutingRule` records against an incoming call's caller ID,
+DNIS, and the current time, returning a `RoutingDecision`. Also exposes
+a ring-chain helper that tries devices in priority order until one
+answers (or all timeouts elapse).
+"""
+
+import asyncio
+import fnmatch
+import logging
+import uuid
+from datetime import datetime, time
+from typing import Optional
+from zoneinfo import ZoneInfo
+
+from sqlalchemy import select
+
+from db.database import RoutingRuleRecord, get_session_factory
+from models.routing import (
+    RoutingAction,
+    RoutingActionType,
+    RoutingDecision,
+    RoutingMatch,
+    RoutingRule,
+    RoutingRuleCreate,
+    RoutingRuleUpdate,
+    TimeRange,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _parse_hhmm(s: str) -> time:
+    h, m = s.split(":")
+    return time(hour=int(h), minute=int(m))
+
+
+def _time_in_range(now: datetime, tr: TimeRange) -> bool:
+    """True if `now` (interpreted in tr.tz) falls inside the window."""
+    try:
+        tz = ZoneInfo(tr.tz)
+    except Exception:
+        tz = ZoneInfo("UTC")
+
+    local = now.astimezone(tz)
+    if local.weekday() not in tr.days:
+        return False
+
+    start = _parse_hhmm(tr.start)
+    end = _parse_hhmm(tr.end)
+    cur = local.time()
+
+    if start <= end:
+        return start <= cur < end
+    # Wraps past midnight (e.g. 22:00 → 06:00)
+    return cur >= start or cur < end
+
+
+def _caller_matches(pattern: Optional[str], caller_number: str) -> bool:
+    if not pattern:
+        return True
+    return fnmatch.fnmatch(caller_number or "", pattern)
+
+
+def _dnis_matches(rule_dnis: Optional[str], dnis: str) -> bool:
+    if not rule_dnis:
+        return True
+    return rule_dnis == dnis
+
+
+class RoutingService:
+    """Caches enabled rules and evaluates them per inbound call."""
+
+    DEFAULT_ACTION = RoutingAction(type=RoutingActionType.TAKE_MESSAGE)
+
+    def __init__(self, gateway):
+        self.gateway = gateway
+        self._rules: list[RoutingRule] = []
+        self._lock = asyncio.Lock()
+
+    async def start(self) -> None:
+        await self.reload()
+
+    async def reload(self) -> None:
+        async with self._lock:
+            self._rules = await self._load_rules_from_db()
+            logger.info(f"📋 Loaded {len(self._rules)} routing rules")
+
+    @property
+    def rules(self) -> list[RoutingRule]:
+        return list(self._rules)
+
+    async def evaluate(
+        self,
+        caller_number: str,
+        dnis: str,
+        now: Optional[datetime] = None,
+    ) -> RoutingDecision:
+        """Walk rules in priority order, return first match or default."""
+        now = now or datetime.now().astimezone()
+        for rule in sorted(self._rules, key=lambda r: (r.priority, r.id)):
+            if not rule.enabled:
+                continue
+            m = rule.match
+            if not _caller_matches(m.caller_pattern, caller_number):
+                continue
+            if not _dnis_matches(m.dnis, dnis):
+                continue
+            if m.time_range and not _time_in_range(now, m.time_range):
+                continue
+            return RoutingDecision(
+                matched_rule_id=rule.id,
+                matched_rule_name=rule.name,
+                action=rule.action,
+                reason=f"matched rule '{rule.name}'",
+            )
+        return RoutingDecision(
+            action=self.DEFAULT_ACTION,
+            reason="no rule matched — default take_message",
+        )
+
+    # ----------------------------------------------------------------
+    # Ring chain
+    # ----------------------------------------------------------------
+
+    async def ring_chain(
+        self,
+        call_id: str,
+        device_ids: list[str],
+        ring_timeout: int = 25,
+    ) -> Optional[str]:
+        """
+        Try each device sequentially. Returns the device_id that answered,
+        or None if all timed out.
+        """
+        for device_id in device_ids:
+            device = self.gateway.devices.get(device_id)
+            if not device:
+                continue
+            if getattr(device, "dnd", False):
+                logger.info(f"📞 Skipping {device_id} (DND)")
+                continue
+            try:
+                await self.gateway.transfer_call(call_id, device_id)
+                # If transfer_call returned normally, treat as picked up.
+                return device_id
+            except asyncio.TimeoutError:
+                logger.info(f"📞 Ring timeout for device {device_id}")
+                continue
+            except Exception as e:
+                logger.warning(f"📞 Ring failed for device {device_id}: {e}")
+                continue
+        return None
+
+    # ----------------------------------------------------------------
+    # CRUD
+    # ----------------------------------------------------------------
+
+    async def create_rule(self, payload: RoutingRuleCreate) -> RoutingRule:
+        rule_id = f"rule_{uuid.uuid4().hex[:8]}"
+        record = RoutingRuleRecord(
+            id=rule_id,
+            name=payload.name,
+            priority=payload.priority,
+            enabled=payload.enabled,
+            match=payload.match.model_dump(),
+            action=payload.action.model_dump(),
+        )
+        async with get_session_factory()() as session:
+            session.add(record)
+            await session.commit()
+
+        rule = RoutingRule(
+            id=rule_id,
+            name=payload.name,
+            priority=payload.priority,
+            enabled=payload.enabled,
+            match=payload.match,
+            action=payload.action,
+        )
+        async with self._lock:
+            self._rules.append(rule)
+        return rule
+
+    async def update_rule(
+        self, rule_id: str, payload: RoutingRuleUpdate
+    ) -> Optional[RoutingRule]:
+        async with get_session_factory()() as session:
+            result = await session.execute(
+                select(RoutingRuleRecord).where(RoutingRuleRecord.id == rule_id)
+            )
+            record = result.scalar_one_or_none()
+            if not record:
+                return None
+            if payload.name is not None:
+                record.name = payload.name
+            if payload.priority is not None:
+                record.priority = payload.priority
+            if payload.enabled is not None:
+                record.enabled = payload.enabled
+            if payload.match is not None:
+                record.match = payload.match.model_dump()
+            if payload.action is not None:
+                record.action = payload.action.model_dump()
+            await session.commit()
+
+        await self.reload()
+        return next((r for r in self._rules if r.id == rule_id), None)
+
+    async def delete_rule(self, rule_id: str) -> bool:
+        async with get_session_factory()() as session:
+            result = await session.execute(
+                select(RoutingRuleRecord).where(RoutingRuleRecord.id == rule_id)
+            )
+            record = result.scalar_one_or_none()
+            if not record:
+                return False
+            await session.delete(record)
+            await session.commit()
+        async with self._lock:
+            self._rules = [r for r in self._rules if r.id != rule_id]
+        return True
+
+    # ----------------------------------------------------------------
+    # Internals
+    # ----------------------------------------------------------------
+
+    async def _load_rules_from_db(self) -> list[RoutingRule]:
+        try:
+            async with get_session_factory()() as session:
+                result = await session.execute(select(RoutingRuleRecord))
+                rows = result.scalars().all()
+        except Exception as e:
+            logger.warning(f"Routing rules load failed: {e}")
+            return []
+
+        rules: list[RoutingRule] = []
+        for row in rows:
+            try:
+                match = RoutingMatch(**(row.match or {}))
+                action = RoutingAction(**(row.action or {}))
+                rules.append(
+                    RoutingRule(
+                        id=row.id,
+                        name=row.name,
+                        priority=row.priority,
+                        enabled=row.enabled,
+                        match=match,
+                        action=action,
+                        created_at=row.created_at,
+                        updated_at=row.updated_at,
+                    )
+                )
+            except Exception as e:
+                logger.warning(f"Skipping malformed rule {row.id}: {e}")
+        return rules
--- a/services/tts.py
+++ b/services/tts.py
@@ -0,0 +1,90 @@
+"""
+TTS Service — Rhema (OpenAI-compatible) text-to-speech client.
+
+Synthesizes speech for the SPEAK call-flow step and the AI Receptionist.
+Rhema exposes POST /v1/audio/speech (OpenAI-compatible) with Kokoro voices.
+"""
+
+import logging
+from pathlib import Path
+from typing import Optional
+
+import httpx
+
+from config import TTSSettings
+
+logger = logging.getLogger(__name__)
+
+
+class TTSService:
+    """Client for Rhema TTS service."""
+
+    def __init__(self, settings: TTSSettings):
+        self.settings = settings
+        self._client: Optional[httpx.AsyncClient] = None
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        if self._client is None or self._client.is_closed:
+            headers = {}
+            if self.settings.api_key:
+                headers["Authorization"] = f"Bearer {self.settings.api_key}"
+            self._client = httpx.AsyncClient(
+                base_url=self.settings.base_url,
+                timeout=httpx.Timeout(self.settings.timeout, connect=5.0),
+                headers=headers,
+            )
+        return self._client
+
+    async def synthesize(
+        self,
+        text: str,
+        voice: Optional[str] = None,
+        response_format: str = "wav",
+    ) -> bytes:
+        """Synthesize speech and return audio bytes."""
+        if not text or not text.strip():
+            return b""
+
+        client = await self._get_client()
+        body = {
+            "model": self.settings.model,
+            "input": text,
+            "voice": voice or self.settings.voice,
+            "response_format": response_format,
+            "sample_rate": self.settings.sample_rate,
+        }
+
+        try:
+            response = await client.post("/v1/audio/speech", json=body)
+            response.raise_for_status()
+            return response.content
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Rhema TTS error: {e.response.status_code} {e.response.text}")
+            return b""
+        except httpx.ConnectError:
+            logger.error(f"Cannot connect to Rhema at {self.settings.base_url}")
+            return b""
+        except Exception as e:
+            logger.error(f"TTS synthesis failed: {e}")
+            return b""
+
+    async def synthesize_to_file(
+        self,
+        text: str,
+        filepath: str | Path,
+        voice: Optional[str] = None,
+    ) -> bool:
+        """Synthesize to a WAV file. Returns True on success."""
+        audio = await self.synthesize(text, voice=voice, response_format="wav")
+        if not audio:
+            return False
+        path = Path(filepath)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_bytes(audio)
+        logger.debug(f"TTS wrote {len(audio)} bytes to {path}")
+        return True
+
+    async def close(self) -> None:
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+            self._client = None