Files
hold-slayer/models/call.py
Robert Helewka 63f1a270bb feat: add call history API endpoints and TTS service client
Adds read-only access to persisted call records for the dashboard
and implements a client for the Rhema text-to-speech service.

- api/call_history.py: New router providing paged call lists
  and detailed call records with transcript metadata.
- services/tts.py: Async client for OpenAI-compatible TTS
  endpoints (Rhema/Kokoro) used for call-flow steps.
2026-05-22 06:28:33 -04:00

171 lines
5.3 KiB
Python

"""
Call models — Active call state, requests, and responses.
"""
from datetime import datetime
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field
class CallStatus(str, Enum):
"""Call lifecycle states."""
INITIATING = "initiating"
RINGING = "ringing"
CONNECTED = "connected"
NAVIGATING_IVR = "navigating_ivr"
ON_HOLD = "on_hold"
HUMAN_DETECTED = "human_detected"
TRANSFERRING = "transferring"
BRIDGED = "bridged" # User is connected to the remote party
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class CallMode(str, Enum):
"""How the call should be handled."""
DIRECT = "direct" # Call and connect immediately
HOLD_SLAYER = "hold_slayer" # Navigate IVR, wait on hold, transfer when human
AI_ASSISTED = "ai_assisted" # Connect with transcription, recording, noise cancel
RECEPTIONIST = "receptionist" # AI screens inbound caller, then routes or takes a message
class AudioClassification(str, Enum):
"""What kind of audio is currently playing."""
SILENCE = "silence"
MUSIC = "music" # Hold music
IVR_PROMPT = "ivr_prompt" # Automated voice (TTS/recording)
LIVE_HUMAN = "live_human" # Real person talking
RINGING = "ringing" # Ring-back tone
DTMF = "dtmf" # Touch tones
UNKNOWN = "unknown"
class ClassificationResult(BaseModel):
"""A single audio classification at a point in time."""
timestamp: float # Unix timestamp
audio_type: AudioClassification
confidence: float # 0.0 - 1.0
details: Optional[dict] = None # Extra analysis data
class ActiveCall(BaseModel):
"""In-memory state for an active call."""
id: str
direction: str = "outbound"
remote_number: str
status: CallStatus = CallStatus.INITIATING
mode: CallMode = CallMode.DIRECT
intent: Optional[str] = None
call_flow_id: Optional[str] = None
device: Optional[str] = None
started_at: datetime = Field(default_factory=datetime.now)
connected_at: Optional[datetime] = None
hold_started_at: Optional[datetime] = None
current_classification: AudioClassification = AudioClassification.UNKNOWN
classification_history: list[ClassificationResult] = Field(default_factory=list)
transcript_chunks: list[str] = Field(default_factory=list)
current_step_id: Optional[str] = None # Current position in call flow
services: list[str] = Field(default_factory=list) # Active services on this call
@property
def duration(self) -> int:
"""Total call duration in seconds."""
if self.connected_at:
return int((datetime.now() - self.connected_at).total_seconds())
return 0
@property
def hold_time(self) -> int:
"""Time spent on hold in seconds."""
if self.hold_started_at and self.status == CallStatus.ON_HOLD:
return int((datetime.now() - self.hold_started_at).total_seconds())
return 0
@property
def transcript(self) -> str:
"""Full transcript so far."""
return "\n".join(self.transcript_chunks)
def summary(self) -> dict:
"""Compact summary for list views."""
return {
"call_id": self.id,
"remote_number": self.remote_number,
"status": self.status.value,
"mode": self.mode.value,
"duration": self.duration,
"hold_time": self.hold_time,
"audio_type": self.current_classification.value,
"intent": self.intent,
}
# ============================================================
# API Request/Response Models
# ============================================================
class CallRequest(BaseModel):
"""Request to place an outbound call."""
number: str # E.164 format
mode: CallMode = CallMode.DIRECT
intent: Optional[str] = None # What you need (for hold_slayer IVR navigation)
device: Optional[str] = None # Target device to ring / transfer to
call_flow_id: Optional[str] = None # Use a stored IVR tree
services: list[str] = Field(
default_factory=lambda: ["recording", "transcription"]
)
class HoldSlayerRequest(BaseModel):
"""Request to launch the Hold Slayer."""
number: str # E.164 format
intent: str # "dispute a charge on my December statement"
call_flow_id: Optional[str] = None # Optional: use stored IVR tree
transfer_to: Optional[str] = None # Device to ring when human detected
notify: list[str] = Field(default_factory=lambda: ["push"]) # Notification channels
class CallResponse(BaseModel):
"""Response after initiating a call."""
call_id: str
status: str
number: str
mode: str
message: Optional[str] = None
class CallStatusResponse(BaseModel):
"""Full status of an active or completed call."""
call_id: str
status: str
direction: str
remote_number: str
mode: str
duration: int
hold_time: int
audio_type: str
intent: Optional[str] = None
transcript_excerpt: Optional[str] = None # Last N chars
classification_history: list[ClassificationResult] = Field(default_factory=list)
current_step: Optional[str] = None
services: list[str] = Field(default_factory=list)
class TransferRequest(BaseModel):
"""Request to transfer a call to a device."""
device: str # Device ID or type