Files
hold-slayer/models/call.py
Robert Helewka ecf37658ce feat: add initial Hold Slayer AI telephony gateway implementation
Complete project scaffolding and core implementation of an AI-powered
telephony system that calls companies, navigates IVR menus, waits on
hold, and transfers to the user when a human answers.

Key components:
- FastAPI server with REST API, WebSocket, and MCP (SSE) interfaces
- SIP/VoIP call management via PJSUA2 with RTP audio streaming
- LLM-powered IVR navigation using OpenAI/Anthropic with tool calling
- Hold detection service combining audio analysis and silence detection
- Real-time STT (Whisper/Deepgram) and TTS (OpenAI/Piper) pipelines
- Call recording with per-channel and mixed audio capture
- Event bus (asyncio pub/sub) for real-time client updates
- Web dashboard with live call monitoring
- SQLite persistence via SQLAlchemy with call history and analytics
- Notification support (email, SMS, webhook, desktop)
- Docker Compose deployment with Opal VoIP and Opal Media containers
- Comprehensive test suite with unit, integration, and E2E tests
- Simplified .gitignore and full project documentation in README
2026-03-21 19:23:26 +00:00

170 lines
5.2 KiB
Python

"""
Call models — Active call state, requests, and responses.
"""
from datetime import datetime
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field
class CallStatus(str, Enum):
"""Call lifecycle states."""
INITIATING = "initiating"
RINGING = "ringing"
CONNECTED = "connected"
NAVIGATING_IVR = "navigating_ivr"
ON_HOLD = "on_hold"
HUMAN_DETECTED = "human_detected"
TRANSFERRING = "transferring"
BRIDGED = "bridged" # User is connected to the remote party
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class CallMode(str, Enum):
"""How the call should be handled."""
DIRECT = "direct" # Call and connect immediately
HOLD_SLAYER = "hold_slayer" # Navigate IVR, wait on hold, transfer when human
AI_ASSISTED = "ai_assisted" # Connect with transcription, recording, noise cancel
class AudioClassification(str, Enum):
"""What kind of audio is currently playing."""
SILENCE = "silence"
MUSIC = "music" # Hold music
IVR_PROMPT = "ivr_prompt" # Automated voice (TTS/recording)
LIVE_HUMAN = "live_human" # Real person talking
RINGING = "ringing" # Ring-back tone
DTMF = "dtmf" # Touch tones
UNKNOWN = "unknown"
class ClassificationResult(BaseModel):
"""A single audio classification at a point in time."""
timestamp: float # Unix timestamp
audio_type: AudioClassification
confidence: float # 0.0 - 1.0
details: Optional[dict] = None # Extra analysis data
class ActiveCall(BaseModel):
"""In-memory state for an active call."""
id: str
direction: str = "outbound"
remote_number: str
status: CallStatus = CallStatus.INITIATING
mode: CallMode = CallMode.DIRECT
intent: Optional[str] = None
call_flow_id: Optional[str] = None
device: Optional[str] = None
started_at: datetime = Field(default_factory=datetime.now)
connected_at: Optional[datetime] = None
hold_started_at: Optional[datetime] = None
current_classification: AudioClassification = AudioClassification.UNKNOWN
classification_history: list[ClassificationResult] = Field(default_factory=list)
transcript_chunks: list[str] = Field(default_factory=list)
current_step_id: Optional[str] = None # Current position in call flow
services: list[str] = Field(default_factory=list) # Active services on this call
@property
def duration(self) -> int:
"""Total call duration in seconds."""
if self.connected_at:
return int((datetime.now() - self.connected_at).total_seconds())
return 0
@property
def hold_time(self) -> int:
"""Time spent on hold in seconds."""
if self.hold_started_at and self.status == CallStatus.ON_HOLD:
return int((datetime.now() - self.hold_started_at).total_seconds())
return 0
@property
def transcript(self) -> str:
"""Full transcript so far."""
return "\n".join(self.transcript_chunks)
def summary(self) -> dict:
"""Compact summary for list views."""
return {
"call_id": self.id,
"remote_number": self.remote_number,
"status": self.status.value,
"mode": self.mode.value,
"duration": self.duration,
"hold_time": self.hold_time,
"audio_type": self.current_classification.value,
"intent": self.intent,
}
# ============================================================
# API Request/Response Models
# ============================================================
class CallRequest(BaseModel):
"""Request to place an outbound call."""
number: str # E.164 format
mode: CallMode = CallMode.DIRECT
intent: Optional[str] = None # What you need (for hold_slayer IVR navigation)
device: Optional[str] = None # Target device to ring / transfer to
call_flow_id: Optional[str] = None # Use a stored IVR tree
services: list[str] = Field(
default_factory=lambda: ["recording", "transcription"]
)
class HoldSlayerRequest(BaseModel):
"""Request to launch the Hold Slayer."""
number: str # E.164 format
intent: str # "dispute a charge on my December statement"
call_flow_id: Optional[str] = None # Optional: use stored IVR tree
transfer_to: Optional[str] = None # Device to ring when human detected
notify: list[str] = Field(default_factory=lambda: ["push"]) # Notification channels
class CallResponse(BaseModel):
"""Response after initiating a call."""
call_id: str
status: str
number: str
mode: str
message: Optional[str] = None
class CallStatusResponse(BaseModel):
"""Full status of an active or completed call."""
call_id: str
status: str
direction: str
remote_number: str
mode: str
duration: int
hold_time: int
audio_type: str
intent: Optional[str] = None
transcript_excerpt: Optional[str] = None # Last N chars
classification_history: list[ClassificationResult] = Field(default_factory=list)
current_step: Optional[str] = None
services: list[str] = Field(default_factory=list)
class TransferRequest(BaseModel):
"""Request to transfer a call to a device."""
device: str # Device ID or type