hold-slayer/services/receptionist.py

"""
AI Receptionist — Screens inbound calls, then routes or takes a message.

State machine:
  GREET    → TTS greeting plays into the call leg
  LISTEN   → buffer audio from the leg's tap until end-of-utterance
  CLASSIFY → LLM extracts intent, urgency, recommended action
  DECIDE   → combine LLM recommendation with the routing decision
             (rules win on conflict)
  RING     → ring_chain devices; bridge on pickup
  RECORD   → TTS prompt + WAV record up to message_max_seconds; transcribe
             and notify
"""

import asyncio
import logging
import time as _time
import uuid
from datetime import datetime
from pathlib import Path
from typing import Optional

from models.call import ActiveCall, CallStatus
from models.events import EventType, GatewayEvent
from models.routing import RoutingAction, RoutingActionType, RoutingDecision

logger = logging.getLogger(__name__)


class ReceptionistService:
    """Drives the receptionist state machine for a single inbound call."""

    def __init__(self, gateway):
        self.gateway = gateway
        self.settings = gateway.settings.receptionist

    async def handle(
        self,
        call: ActiveCall,
        sip_leg_id: str,
        routing_decision: Optional[RoutingDecision] = None,
    ) -> None:
        """Run the full receptionist flow for an inbound call."""
        try:
            await self._greet(call, sip_leg_id)

            transcript = await self._listen(call, sip_leg_id)
            if transcript:
                call.transcript_chunks.append(f"caller: {transcript}")
                await self.gateway.event_bus.publish(GatewayEvent(
                    type=EventType.TRANSCRIPT_CHUNK,
                    call_id=call.id,
                    data={"text": transcript, "speaker": "caller"},
                    message=f"📝 caller: {transcript[:80]}",
                ))

            classification = await self._classify(call, transcript, routing_decision)
            call.intent = classification.get("intent")

            await self.gateway.event_bus.publish(GatewayEvent(
                type=EventType.RECEPTIONIST_CAPTURED_INTENT,
                call_id=call.id,
                data=classification,
                message=f"Intent: {classification.get('intent', '?')}",
            ))

            action = self._decide(routing_decision, classification)

            await self.gateway.event_bus.publish(GatewayEvent(
                type=EventType.RECEPTIONIST_ROUTING,
                call_id=call.id,
                data={"action": action.type.value},
                message=f"Routing decision: {action.type.value}",
            ))

            if action.type in (RoutingActionType.REJECT, RoutingActionType.DND):
                if action.message:
                    await self._speak(call, sip_leg_id, action.message)
                await self._hangup(call, sip_leg_id)
                return

            if action.type in (RoutingActionType.RING_DEVICE, RoutingActionType.RING_CHAIN):
                devices = self._resolve_device_list(action, classification)
                if not devices:
                    logger.info("Receptionist: no devices to ring, falling back to message")
                    await self._take_message(call, sip_leg_id)
                    return

                await self._speak(
                    call, sip_leg_id, "One moment, I'll connect you now."
                )
                answered = await self.gateway._routing.ring_chain(
                    call.id, devices, action.ring_timeout
                )
                if answered:
                    return  # Bridged to a device — receptionist done
                # Nobody home — take a message
                await self._take_message(call, sip_leg_id)
                return

            # Default: take a message
            await self._take_message(call, sip_leg_id)

        except Exception as e:
            logger.error(f"Receptionist failed for {call.id}: {e}", exc_info=True)
            try:
                await self._hangup(call, sip_leg_id)
            except Exception:
                pass

    # ----------------------------------------------------------------
    # State machine steps
    # ----------------------------------------------------------------

    async def _greet(self, call: ActiveCall, sip_leg_id: str) -> None:
        await self.gateway.event_bus.publish(GatewayEvent(
            type=EventType.RECEPTIONIST_GREETING,
            call_id=call.id,
            data={"text": self.settings.greeting_template},
            message="Playing greeting",
        ))
        await self._speak(call, sip_leg_id, self.settings.greeting_template)

    async def _listen(self, call: ActiveCall, sip_leg_id: str) -> str:
        """Buffer audio from the call's tap until silence or timeout."""
        await self.gateway.event_bus.publish(GatewayEvent(
            type=EventType.RECEPTIONIST_LISTENING,
            call_id=call.id,
            message="Listening for caller",
        ))

        media = self.gateway.media_pipeline
        if media is None:
            return ""

        tap = media.create_tap(sip_leg_id)
        audio = bytearray()
        deadline = _time.monotonic() + self.settings.listen_timeout_s
        silent_for = 0.0
        frame_ms = 20

        try:
            while _time.monotonic() < deadline:
                remaining = max(0.05, deadline - _time.monotonic())
                frame = await tap.read_frame(timeout=min(0.5, remaining))
                if frame is None:
                    silent_for += 0.5
                else:
                    audio.extend(frame)
                    if self._frame_is_silent(frame):
                        silent_for += frame_ms / 1000.0
                    else:
                        silent_for = 0.0
                if silent_for >= self.settings.end_of_utterance_silence_s and audio:
                    break
        finally:
            tap.close()

        if not audio:
            return ""

        return await self.gateway._transcription.transcribe(bytes(audio))

    async def _classify(
        self,
        call: ActiveCall,
        transcript: str,
        routing_decision: Optional[RoutingDecision],
    ) -> dict:
        """Ask the LLM to interpret the caller's utterance."""
        from services.hold_slayer import _get_llm

        llm = _get_llm()
        if llm is None or not transcript.strip():
            return {
                "intent": transcript or "unknown",
                "urgency": "normal",
                "recommended_action": "ring",
                "device_hint": None,
            }

        rules_summary = ""
        if routing_decision and routing_decision.matched_rule_name:
            rules_summary = (
                f"A routing rule already matched: '{routing_decision.matched_rule_name}' "
                f"(action: {routing_decision.action.type.value})."
            )

        try:
            return await llm.chat_json(
                user_message=(
                    f"Caller: {call.remote_number}\n"
                    f"Transcript: {transcript}\n"
                    f"{rules_summary}\n\n"
                    "Return JSON with keys: intent (short string), "
                    "urgency (low|normal|high), "
                    "recommended_action (ring|message|reject), "
                    "device_hint (string or null)."
                ),
                system=self.settings.llm_persona,
            )
        except Exception as e:
            logger.warning(f"Receptionist LLM classify failed: {e}")
            return {
                "intent": transcript,
                "urgency": "normal",
                "recommended_action": "ring",
                "device_hint": None,
            }

    def _decide(
        self,
        routing_decision: Optional[RoutingDecision],
        classification: dict,
    ) -> RoutingAction:
        """Rules win on conflict; otherwise use the LLM's recommendation."""
        if routing_decision and routing_decision.action.type not in (
            RoutingActionType.TAKE_MESSAGE,
        ):
            return routing_decision.action

        recommended = (classification.get("recommended_action") or "ring").lower()
        if recommended == "reject":
            return RoutingAction(type=RoutingActionType.REJECT,
                                 message="Sorry, I can't connect that call right now.")
        if recommended == "message":
            return RoutingAction(type=RoutingActionType.TAKE_MESSAGE)
        return RoutingAction(type=RoutingActionType.RING_CHAIN)

    def _resolve_device_list(
        self, action: RoutingAction, classification: dict
    ) -> list[str]:
        if action.type == RoutingActionType.RING_DEVICE and action.device_id:
            return [action.device_id]
        if action.device_ids:
            return action.device_ids
        # Default chain: every device that can take a call, in priority order
        devices = sorted(
            (d for d in self.gateway.devices.values() if d.can_receive_call),
            key=lambda d: d.priority,
        )
        return [d.id for d in devices]

    async def _take_message(self, call: ActiveCall, sip_leg_id: str) -> None:
        await self._speak(call, sip_leg_id, self.settings.message_prompt)

        media = self.gateway.media_pipeline
        recording_svc = getattr(self.gateway, "_recording_service", None)
        if recording_svc is None or media is None:
            logger.warning("Receptionist: recording unavailable, ending call")
            await self._hangup(call, sip_leg_id)
            return

        # RecordingService writes a WAV file and the recordings row.
        session = await recording_svc.start_recording(
            call.id, media_pipeline=media, leg_ids=[sip_leg_id]
        )
        try:
            await asyncio.sleep(self.settings.message_max_seconds)
        finally:
            session = await recording_svc.stop_recording(
                call.id, media_pipeline=media
            )

        message_text = ""
        rec_path = session.filepath_mixed if session else None
        if rec_path and Path(rec_path).exists():
            try:
                audio_bytes = Path(rec_path).read_bytes()
                message_text = await self.gateway._transcription.transcribe(audio_bytes)
            except Exception as e:
                logger.warning(f"Receptionist transcribe failed: {e}")

        if message_text:
            call.transcript_chunks.append(f"caller_message: {message_text}")

        await self.gateway.event_bus.publish(GatewayEvent(
            type=EventType.RECEPTIONIST_MESSAGE_SAVED,
            call_id=call.id,
            data={
                "path": rec_path,
                "transcript": message_text,
                "caller": call.remote_number,
            },
            message=f"📥 Message saved from {call.remote_number}",
        ))

        await self._hangup(call, sip_leg_id)

    # ----------------------------------------------------------------
    # Helpers
    # ----------------------------------------------------------------

    async def _speak(self, call: ActiveCall, sip_leg_id: str, text: str) -> None:
        tts = self.gateway._tts
        media = self.gateway.media_pipeline
        if tts is None or media is None or not text.strip():
            return

        import os
        import tempfile

        fd, tmp_path = tempfile.mkstemp(suffix=".wav", prefix=f"recept_{call.id}_")
        os.close(fd)
        try:
            ok = await tts.synthesize_to_file(text, tmp_path)
            if not ok:
                return
            await media.play_wav(sip_leg_id, tmp_path)
            await self.gateway.event_bus.publish(GatewayEvent(
                type=EventType.SPEAK_PLAYED,
                call_id=call.id,
                data={"text": text, "speaker": "receptionist"},
                message=f"🗣️ {text[:80]}",
            ))
        finally:
            try:
                os.unlink(tmp_path)
            except OSError:
                pass

    async def _hangup(self, call: ActiveCall, sip_leg_id: str) -> None:
        try:
            await self.gateway.sip_engine.hangup(sip_leg_id)
        except Exception as e:
            logger.warning(f"Receptionist hangup failed: {e}")
        await self.gateway.call_manager.end_call(call.id, CallStatus.COMPLETED)

    @staticmethod
    def _frame_is_silent(frame: bytes, threshold: int = 500) -> bool:
        """Crude RMS-style check on a 16-bit PCM frame (mono, signed LE)."""
        if not frame or len(frame) < 2:
            return True
        # Inline RMS — `audioop` was removed in Python 3.13.
        import struct

        n = len(frame) // 2
        if n == 0:
            return True
        samples = struct.unpack_from(f"<{n}h", frame)
        sq = 0
        for s in samples:
            sq += s * s
        rms = (sq / n) ** 0.5
        return rms < threshold