feat: add call history API endpoints and TTS service client
Adds read-only access to persisted call records for the dashboard and implements a client for the Rhema text-to-speech service. - api/call_history.py: New router providing paged call lists and detailed call records with transcript metadata. - services/tts.py: Async client for OpenAI-compatible TTS endpoints (Rhema/Kokoro) used for call-flow steps.
This commit is contained in:
345
services/receptionist.py
Normal file
345
services/receptionist.py
Normal file
@@ -0,0 +1,345 @@
|
||||
"""
|
||||
AI Receptionist — Screens inbound calls, then routes or takes a message.
|
||||
|
||||
State machine:
|
||||
GREET → TTS greeting plays into the call leg
|
||||
LISTEN → buffer audio from the leg's tap until end-of-utterance
|
||||
CLASSIFY → LLM extracts intent, urgency, recommended action
|
||||
DECIDE → combine LLM recommendation with the routing decision
|
||||
(rules win on conflict)
|
||||
RING → ring_chain devices; bridge on pickup
|
||||
RECORD → TTS prompt + WAV record up to message_max_seconds; transcribe
|
||||
and notify
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time as _time
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from models.call import ActiveCall, CallStatus
|
||||
from models.events import EventType, GatewayEvent
|
||||
from models.routing import RoutingAction, RoutingActionType, RoutingDecision
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ReceptionistService:
|
||||
"""Drives the receptionist state machine for a single inbound call."""
|
||||
|
||||
def __init__(self, gateway):
|
||||
self.gateway = gateway
|
||||
self.settings = gateway.settings.receptionist
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
call: ActiveCall,
|
||||
sip_leg_id: str,
|
||||
routing_decision: Optional[RoutingDecision] = None,
|
||||
) -> None:
|
||||
"""Run the full receptionist flow for an inbound call."""
|
||||
try:
|
||||
await self._greet(call, sip_leg_id)
|
||||
|
||||
transcript = await self._listen(call, sip_leg_id)
|
||||
if transcript:
|
||||
call.transcript_chunks.append(f"caller: {transcript}")
|
||||
await self.gateway.event_bus.publish(GatewayEvent(
|
||||
type=EventType.TRANSCRIPT_CHUNK,
|
||||
call_id=call.id,
|
||||
data={"text": transcript, "speaker": "caller"},
|
||||
message=f"📝 caller: {transcript[:80]}",
|
||||
))
|
||||
|
||||
classification = await self._classify(call, transcript, routing_decision)
|
||||
call.intent = classification.get("intent")
|
||||
|
||||
await self.gateway.event_bus.publish(GatewayEvent(
|
||||
type=EventType.RECEPTIONIST_CAPTURED_INTENT,
|
||||
call_id=call.id,
|
||||
data=classification,
|
||||
message=f"Intent: {classification.get('intent', '?')}",
|
||||
))
|
||||
|
||||
action = self._decide(routing_decision, classification)
|
||||
|
||||
await self.gateway.event_bus.publish(GatewayEvent(
|
||||
type=EventType.RECEPTIONIST_ROUTING,
|
||||
call_id=call.id,
|
||||
data={"action": action.type.value},
|
||||
message=f"Routing decision: {action.type.value}",
|
||||
))
|
||||
|
||||
if action.type in (RoutingActionType.REJECT, RoutingActionType.DND):
|
||||
if action.message:
|
||||
await self._speak(call, sip_leg_id, action.message)
|
||||
await self._hangup(call, sip_leg_id)
|
||||
return
|
||||
|
||||
if action.type in (RoutingActionType.RING_DEVICE, RoutingActionType.RING_CHAIN):
|
||||
devices = self._resolve_device_list(action, classification)
|
||||
if not devices:
|
||||
logger.info("Receptionist: no devices to ring, falling back to message")
|
||||
await self._take_message(call, sip_leg_id)
|
||||
return
|
||||
|
||||
await self._speak(
|
||||
call, sip_leg_id, "One moment, I'll connect you now."
|
||||
)
|
||||
answered = await self.gateway._routing.ring_chain(
|
||||
call.id, devices, action.ring_timeout
|
||||
)
|
||||
if answered:
|
||||
return # Bridged to a device — receptionist done
|
||||
# Nobody home — take a message
|
||||
await self._take_message(call, sip_leg_id)
|
||||
return
|
||||
|
||||
# Default: take a message
|
||||
await self._take_message(call, sip_leg_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Receptionist failed for {call.id}: {e}", exc_info=True)
|
||||
try:
|
||||
await self._hangup(call, sip_leg_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# State machine steps
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
async def _greet(self, call: ActiveCall, sip_leg_id: str) -> None:
|
||||
await self.gateway.event_bus.publish(GatewayEvent(
|
||||
type=EventType.RECEPTIONIST_GREETING,
|
||||
call_id=call.id,
|
||||
data={"text": self.settings.greeting_template},
|
||||
message="Playing greeting",
|
||||
))
|
||||
await self._speak(call, sip_leg_id, self.settings.greeting_template)
|
||||
|
||||
async def _listen(self, call: ActiveCall, sip_leg_id: str) -> str:
|
||||
"""Buffer audio from the call's tap until silence or timeout."""
|
||||
await self.gateway.event_bus.publish(GatewayEvent(
|
||||
type=EventType.RECEPTIONIST_LISTENING,
|
||||
call_id=call.id,
|
||||
message="Listening for caller",
|
||||
))
|
||||
|
||||
media = self.gateway.media_pipeline
|
||||
if media is None:
|
||||
return ""
|
||||
|
||||
tap = media.create_tap(sip_leg_id)
|
||||
audio = bytearray()
|
||||
deadline = _time.monotonic() + self.settings.listen_timeout_s
|
||||
silent_for = 0.0
|
||||
frame_ms = 20
|
||||
|
||||
try:
|
||||
while _time.monotonic() < deadline:
|
||||
remaining = max(0.05, deadline - _time.monotonic())
|
||||
frame = await tap.read_frame(timeout=min(0.5, remaining))
|
||||
if frame is None:
|
||||
silent_for += 0.5
|
||||
else:
|
||||
audio.extend(frame)
|
||||
if self._frame_is_silent(frame):
|
||||
silent_for += frame_ms / 1000.0
|
||||
else:
|
||||
silent_for = 0.0
|
||||
if silent_for >= self.settings.end_of_utterance_silence_s and audio:
|
||||
break
|
||||
finally:
|
||||
tap.close()
|
||||
|
||||
if not audio:
|
||||
return ""
|
||||
|
||||
return await self.gateway._transcription.transcribe(bytes(audio))
|
||||
|
||||
async def _classify(
|
||||
self,
|
||||
call: ActiveCall,
|
||||
transcript: str,
|
||||
routing_decision: Optional[RoutingDecision],
|
||||
) -> dict:
|
||||
"""Ask the LLM to interpret the caller's utterance."""
|
||||
from services.hold_slayer import _get_llm
|
||||
|
||||
llm = _get_llm()
|
||||
if llm is None or not transcript.strip():
|
||||
return {
|
||||
"intent": transcript or "unknown",
|
||||
"urgency": "normal",
|
||||
"recommended_action": "ring",
|
||||
"device_hint": None,
|
||||
}
|
||||
|
||||
rules_summary = ""
|
||||
if routing_decision and routing_decision.matched_rule_name:
|
||||
rules_summary = (
|
||||
f"A routing rule already matched: '{routing_decision.matched_rule_name}' "
|
||||
f"(action: {routing_decision.action.type.value})."
|
||||
)
|
||||
|
||||
try:
|
||||
return await llm.chat_json(
|
||||
user_message=(
|
||||
f"Caller: {call.remote_number}\n"
|
||||
f"Transcript: {transcript}\n"
|
||||
f"{rules_summary}\n\n"
|
||||
"Return JSON with keys: intent (short string), "
|
||||
"urgency (low|normal|high), "
|
||||
"recommended_action (ring|message|reject), "
|
||||
"device_hint (string or null)."
|
||||
),
|
||||
system=self.settings.llm_persona,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Receptionist LLM classify failed: {e}")
|
||||
return {
|
||||
"intent": transcript,
|
||||
"urgency": "normal",
|
||||
"recommended_action": "ring",
|
||||
"device_hint": None,
|
||||
}
|
||||
|
||||
def _decide(
|
||||
self,
|
||||
routing_decision: Optional[RoutingDecision],
|
||||
classification: dict,
|
||||
) -> RoutingAction:
|
||||
"""Rules win on conflict; otherwise use the LLM's recommendation."""
|
||||
if routing_decision and routing_decision.action.type not in (
|
||||
RoutingActionType.TAKE_MESSAGE,
|
||||
):
|
||||
return routing_decision.action
|
||||
|
||||
recommended = (classification.get("recommended_action") or "ring").lower()
|
||||
if recommended == "reject":
|
||||
return RoutingAction(type=RoutingActionType.REJECT,
|
||||
message="Sorry, I can't connect that call right now.")
|
||||
if recommended == "message":
|
||||
return RoutingAction(type=RoutingActionType.TAKE_MESSAGE)
|
||||
return RoutingAction(type=RoutingActionType.RING_CHAIN)
|
||||
|
||||
def _resolve_device_list(
|
||||
self, action: RoutingAction, classification: dict
|
||||
) -> list[str]:
|
||||
if action.type == RoutingActionType.RING_DEVICE and action.device_id:
|
||||
return [action.device_id]
|
||||
if action.device_ids:
|
||||
return action.device_ids
|
||||
# Default chain: every device that can take a call, in priority order
|
||||
devices = sorted(
|
||||
(d for d in self.gateway.devices.values() if d.can_receive_call),
|
||||
key=lambda d: d.priority,
|
||||
)
|
||||
return [d.id for d in devices]
|
||||
|
||||
async def _take_message(self, call: ActiveCall, sip_leg_id: str) -> None:
|
||||
await self._speak(call, sip_leg_id, self.settings.message_prompt)
|
||||
|
||||
media = self.gateway.media_pipeline
|
||||
recording_svc = getattr(self.gateway, "_recording_service", None)
|
||||
if recording_svc is None or media is None:
|
||||
logger.warning("Receptionist: recording unavailable, ending call")
|
||||
await self._hangup(call, sip_leg_id)
|
||||
return
|
||||
|
||||
# RecordingService writes a WAV file and the recordings row.
|
||||
session = await recording_svc.start_recording(
|
||||
call.id, media_pipeline=media, leg_ids=[sip_leg_id]
|
||||
)
|
||||
try:
|
||||
await asyncio.sleep(self.settings.message_max_seconds)
|
||||
finally:
|
||||
session = await recording_svc.stop_recording(
|
||||
call.id, media_pipeline=media
|
||||
)
|
||||
|
||||
message_text = ""
|
||||
rec_path = session.filepath_mixed if session else None
|
||||
if rec_path and Path(rec_path).exists():
|
||||
try:
|
||||
audio_bytes = Path(rec_path).read_bytes()
|
||||
message_text = await self.gateway._transcription.transcribe(audio_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"Receptionist transcribe failed: {e}")
|
||||
|
||||
if message_text:
|
||||
call.transcript_chunks.append(f"caller_message: {message_text}")
|
||||
|
||||
await self.gateway.event_bus.publish(GatewayEvent(
|
||||
type=EventType.RECEPTIONIST_MESSAGE_SAVED,
|
||||
call_id=call.id,
|
||||
data={
|
||||
"path": rec_path,
|
||||
"transcript": message_text,
|
||||
"caller": call.remote_number,
|
||||
},
|
||||
message=f"📥 Message saved from {call.remote_number}",
|
||||
))
|
||||
|
||||
await self._hangup(call, sip_leg_id)
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Helpers
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
async def _speak(self, call: ActiveCall, sip_leg_id: str, text: str) -> None:
|
||||
tts = self.gateway._tts
|
||||
media = self.gateway.media_pipeline
|
||||
if tts is None or media is None or not text.strip():
|
||||
return
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
fd, tmp_path = tempfile.mkstemp(suffix=".wav", prefix=f"recept_{call.id}_")
|
||||
os.close(fd)
|
||||
try:
|
||||
ok = await tts.synthesize_to_file(text, tmp_path)
|
||||
if not ok:
|
||||
return
|
||||
await media.play_wav(sip_leg_id, tmp_path)
|
||||
await self.gateway.event_bus.publish(GatewayEvent(
|
||||
type=EventType.SPEAK_PLAYED,
|
||||
call_id=call.id,
|
||||
data={"text": text, "speaker": "receptionist"},
|
||||
message=f"🗣️ {text[:80]}",
|
||||
))
|
||||
finally:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
async def _hangup(self, call: ActiveCall, sip_leg_id: str) -> None:
|
||||
try:
|
||||
await self.gateway.sip_engine.hangup(sip_leg_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"Receptionist hangup failed: {e}")
|
||||
await self.gateway.call_manager.end_call(call.id, CallStatus.COMPLETED)
|
||||
|
||||
@staticmethod
|
||||
def _frame_is_silent(frame: bytes, threshold: int = 500) -> bool:
|
||||
"""Crude RMS-style check on a 16-bit PCM frame (mono, signed LE)."""
|
||||
if not frame or len(frame) < 2:
|
||||
return True
|
||||
# Inline RMS — `audioop` was removed in Python 3.13.
|
||||
import struct
|
||||
|
||||
n = len(frame) // 2
|
||||
if n == 0:
|
||||
return True
|
||||
samples = struct.unpack_from(f"<{n}h", frame)
|
||||
sq = 0
|
||||
for s in samples:
|
||||
sq += s * s
|
||||
rms = (sq / n) ** 0.5
|
||||
return rms < threshold
|
||||
Reference in New Issue
Block a user