feat: add initial Hold Slayer AI telephony gateway implementation
Complete project scaffolding and core implementation of an AI-powered telephony system that calls companies, navigates IVR menus, waits on hold, and transfers to the user when a human answers. Key components: - FastAPI server with REST API, WebSocket, and MCP (SSE) interfaces - SIP/VoIP call management via PJSUA2 with RTP audio streaming - LLM-powered IVR navigation using OpenAI/Anthropic with tool calling - Hold detection service combining audio analysis and silence detection - Real-time STT (Whisper/Deepgram) and TTS (OpenAI/Piper) pipelines - Call recording with per-channel and mixed audio capture - Event bus (asyncio pub/sub) for real-time client updates - Web dashboard with live call monitoring - SQLite persistence via SQLAlchemy with call history and analytics - Notification support (email, SMS, webhook, desktop) - Docker Compose deployment with Opal VoIP and Opal Media containers - Comprehensive test suite with unit, integration, and E2E tests - Simplified .gitignore and full project documentation in README
This commit is contained in:
1
models/__init__.py
Normal file
1
models/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Pydantic models — call flows, calls, contacts, devices, events."""
|
||||
169
models/call.py
Normal file
169
models/call.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""
|
||||
Call models — Active call state, requests, and responses.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class CallStatus(str, Enum):
|
||||
"""Call lifecycle states."""
|
||||
|
||||
INITIATING = "initiating"
|
||||
RINGING = "ringing"
|
||||
CONNECTED = "connected"
|
||||
NAVIGATING_IVR = "navigating_ivr"
|
||||
ON_HOLD = "on_hold"
|
||||
HUMAN_DETECTED = "human_detected"
|
||||
TRANSFERRING = "transferring"
|
||||
BRIDGED = "bridged" # User is connected to the remote party
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
class CallMode(str, Enum):
|
||||
"""How the call should be handled."""
|
||||
|
||||
DIRECT = "direct" # Call and connect immediately
|
||||
HOLD_SLAYER = "hold_slayer" # Navigate IVR, wait on hold, transfer when human
|
||||
AI_ASSISTED = "ai_assisted" # Connect with transcription, recording, noise cancel
|
||||
|
||||
|
||||
class AudioClassification(str, Enum):
|
||||
"""What kind of audio is currently playing."""
|
||||
|
||||
SILENCE = "silence"
|
||||
MUSIC = "music" # Hold music
|
||||
IVR_PROMPT = "ivr_prompt" # Automated voice (TTS/recording)
|
||||
LIVE_HUMAN = "live_human" # Real person talking
|
||||
RINGING = "ringing" # Ring-back tone
|
||||
DTMF = "dtmf" # Touch tones
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class ClassificationResult(BaseModel):
|
||||
"""A single audio classification at a point in time."""
|
||||
|
||||
timestamp: float # Unix timestamp
|
||||
audio_type: AudioClassification
|
||||
confidence: float # 0.0 - 1.0
|
||||
details: Optional[dict] = None # Extra analysis data
|
||||
|
||||
|
||||
class ActiveCall(BaseModel):
|
||||
"""In-memory state for an active call."""
|
||||
|
||||
id: str
|
||||
direction: str = "outbound"
|
||||
remote_number: str
|
||||
status: CallStatus = CallStatus.INITIATING
|
||||
mode: CallMode = CallMode.DIRECT
|
||||
intent: Optional[str] = None
|
||||
call_flow_id: Optional[str] = None
|
||||
device: Optional[str] = None
|
||||
started_at: datetime = Field(default_factory=datetime.now)
|
||||
connected_at: Optional[datetime] = None
|
||||
hold_started_at: Optional[datetime] = None
|
||||
current_classification: AudioClassification = AudioClassification.UNKNOWN
|
||||
classification_history: list[ClassificationResult] = Field(default_factory=list)
|
||||
transcript_chunks: list[str] = Field(default_factory=list)
|
||||
current_step_id: Optional[str] = None # Current position in call flow
|
||||
services: list[str] = Field(default_factory=list) # Active services on this call
|
||||
|
||||
@property
|
||||
def duration(self) -> int:
|
||||
"""Total call duration in seconds."""
|
||||
if self.connected_at:
|
||||
return int((datetime.now() - self.connected_at).total_seconds())
|
||||
return 0
|
||||
|
||||
@property
|
||||
def hold_time(self) -> int:
|
||||
"""Time spent on hold in seconds."""
|
||||
if self.hold_started_at and self.status == CallStatus.ON_HOLD:
|
||||
return int((datetime.now() - self.hold_started_at).total_seconds())
|
||||
return 0
|
||||
|
||||
@property
|
||||
def transcript(self) -> str:
|
||||
"""Full transcript so far."""
|
||||
return "\n".join(self.transcript_chunks)
|
||||
|
||||
def summary(self) -> dict:
|
||||
"""Compact summary for list views."""
|
||||
return {
|
||||
"call_id": self.id,
|
||||
"remote_number": self.remote_number,
|
||||
"status": self.status.value,
|
||||
"mode": self.mode.value,
|
||||
"duration": self.duration,
|
||||
"hold_time": self.hold_time,
|
||||
"audio_type": self.current_classification.value,
|
||||
"intent": self.intent,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# API Request/Response Models
|
||||
# ============================================================
|
||||
|
||||
|
||||
class CallRequest(BaseModel):
|
||||
"""Request to place an outbound call."""
|
||||
|
||||
number: str # E.164 format
|
||||
mode: CallMode = CallMode.DIRECT
|
||||
intent: Optional[str] = None # What you need (for hold_slayer IVR navigation)
|
||||
device: Optional[str] = None # Target device to ring / transfer to
|
||||
call_flow_id: Optional[str] = None # Use a stored IVR tree
|
||||
services: list[str] = Field(
|
||||
default_factory=lambda: ["recording", "transcription"]
|
||||
)
|
||||
|
||||
|
||||
class HoldSlayerRequest(BaseModel):
|
||||
"""Request to launch the Hold Slayer."""
|
||||
|
||||
number: str # E.164 format
|
||||
intent: str # "dispute a charge on my December statement"
|
||||
call_flow_id: Optional[str] = None # Optional: use stored IVR tree
|
||||
transfer_to: Optional[str] = None # Device to ring when human detected
|
||||
notify: list[str] = Field(default_factory=lambda: ["push"]) # Notification channels
|
||||
|
||||
|
||||
class CallResponse(BaseModel):
|
||||
"""Response after initiating a call."""
|
||||
|
||||
call_id: str
|
||||
status: str
|
||||
number: str
|
||||
mode: str
|
||||
message: Optional[str] = None
|
||||
|
||||
|
||||
class CallStatusResponse(BaseModel):
|
||||
"""Full status of an active or completed call."""
|
||||
|
||||
call_id: str
|
||||
status: str
|
||||
direction: str
|
||||
remote_number: str
|
||||
mode: str
|
||||
duration: int
|
||||
hold_time: int
|
||||
audio_type: str
|
||||
intent: Optional[str] = None
|
||||
transcript_excerpt: Optional[str] = None # Last N chars
|
||||
classification_history: list[ClassificationResult] = Field(default_factory=list)
|
||||
current_step: Optional[str] = None
|
||||
services: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class TransferRequest(BaseModel):
|
||||
"""Request to transfer a call to a device."""
|
||||
|
||||
device: str # Device ID or type
|
||||
108
models/call_flow.py
Normal file
108
models/call_flow.py
Normal file
@@ -0,0 +1,108 @@
|
||||
"""
|
||||
Call Flow models — IVR navigation trees.
|
||||
|
||||
Store known IVR structures for phone numbers you call regularly.
|
||||
The Hold Slayer follows the map instead of exploring blind.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ActionType(str, Enum):
|
||||
"""Actions the Hold Slayer can take at each IVR step."""
|
||||
|
||||
DTMF = "dtmf" # Press a button
|
||||
SPEAK = "speak" # Say something (for speech-recognition IVRs)
|
||||
WAIT = "wait" # Wait for prompt
|
||||
LISTEN = "listen" # Listen and let LLM decide
|
||||
HOLD = "hold" # On hold — activate hold detection
|
||||
TRANSFER = "transfer" # Transfer to user's device
|
||||
|
||||
|
||||
class CallFlowStep(BaseModel):
|
||||
"""A single step in an IVR navigation tree."""
|
||||
|
||||
id: str
|
||||
description: str # Human-readable: "Main menu"
|
||||
expect: Optional[str] = None # What we expect to hear (regex or keywords)
|
||||
action: ActionType
|
||||
action_value: Optional[str] = None # DTMF digit(s), speech text, device target
|
||||
timeout: int = 30 # Seconds to wait before retry/fallback
|
||||
next_step: Optional[str] = None # Next step ID on success
|
||||
fallback_step: Optional[str] = None # Step ID if unexpected response
|
||||
notes: Optional[str] = None # "They changed this menu in Jan 2025"
|
||||
|
||||
|
||||
class CallFlow(BaseModel):
|
||||
"""A complete IVR navigation tree for a phone number."""
|
||||
|
||||
id: str
|
||||
name: str # "Chase Bank - Main Line"
|
||||
phone_number: str # "+18005551234"
|
||||
description: str = ""
|
||||
last_verified: Optional[datetime] = None
|
||||
steps: list[CallFlowStep]
|
||||
tags: list[str] = Field(default_factory=list)
|
||||
notes: Optional[str] = None
|
||||
|
||||
# Stats from previous runs
|
||||
avg_hold_time: Optional[int] = None # seconds
|
||||
success_rate: Optional[float] = None # 0.0 - 1.0
|
||||
last_used: Optional[datetime] = None
|
||||
times_used: int = 0
|
||||
|
||||
def get_step(self, step_id: str) -> Optional[CallFlowStep]:
|
||||
"""Look up a step by ID."""
|
||||
for step in self.steps:
|
||||
if step.id == step_id:
|
||||
return step
|
||||
return None
|
||||
|
||||
def first_step(self) -> Optional[CallFlowStep]:
|
||||
"""Get the first step in the flow."""
|
||||
return self.steps[0] if self.steps else None
|
||||
|
||||
def steps_by_id(self) -> dict[str, CallFlowStep]:
|
||||
"""Return a dict mapping step ID -> step for fast lookups."""
|
||||
return {s.id: s for s in self.steps}
|
||||
|
||||
|
||||
class CallFlowCreate(BaseModel):
|
||||
"""Request model for creating a new call flow."""
|
||||
|
||||
name: str
|
||||
phone_number: str
|
||||
description: str = ""
|
||||
steps: list[CallFlowStep]
|
||||
tags: list[str] = Field(default_factory=list)
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
class CallFlowUpdate(BaseModel):
|
||||
"""Request model for updating an existing call flow."""
|
||||
|
||||
name: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
steps: Optional[list[CallFlowStep]] = None
|
||||
tags: Optional[list[str]] = None
|
||||
notes: Optional[str] = None
|
||||
last_verified: Optional[datetime] = None
|
||||
|
||||
|
||||
class CallFlowSummary(BaseModel):
|
||||
"""Lightweight summary for list views."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
phone_number: str
|
||||
description: str = ""
|
||||
step_count: int
|
||||
avg_hold_time: Optional[int] = None
|
||||
success_rate: Optional[float] = None
|
||||
last_used: Optional[datetime] = None
|
||||
times_used: int = 0
|
||||
tags: list[str] = Field(default_factory=list)
|
||||
60
models/contact.py
Normal file
60
models/contact.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
Contact models — People and organizations you call.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class PhoneNumber(BaseModel):
|
||||
"""A phone number associated with a contact."""
|
||||
|
||||
number: str # E.164 format
|
||||
label: str = "main" # main, mobile, work, home, fax, etc.
|
||||
primary: bool = False
|
||||
|
||||
|
||||
class ContactBase(BaseModel):
|
||||
"""Shared contact fields."""
|
||||
|
||||
name: str
|
||||
phone_numbers: list[PhoneNumber]
|
||||
category: Optional[str] = None # personal / business / service
|
||||
routing_preference: Optional[str] = None # how to handle their calls
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
class Contact(ContactBase):
|
||||
"""Full contact model."""
|
||||
|
||||
id: str
|
||||
call_count: int = 0
|
||||
last_call: Optional[datetime] = None
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
@property
|
||||
def primary_number(self) -> Optional[str]:
|
||||
"""Get the primary phone number."""
|
||||
for pn in self.phone_numbers:
|
||||
if pn.primary:
|
||||
return pn.number
|
||||
return self.phone_numbers[0].number if self.phone_numbers else None
|
||||
|
||||
|
||||
class ContactCreate(ContactBase):
|
||||
"""Request model for creating a contact."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ContactUpdate(BaseModel):
|
||||
"""Request model for updating a contact."""
|
||||
|
||||
name: Optional[str] = None
|
||||
phone_numbers: Optional[list[PhoneNumber]] = None
|
||||
category: Optional[str] = None
|
||||
routing_preference: Optional[str] = None
|
||||
notes: Optional[str] = None
|
||||
81
models/device.py
Normal file
81
models/device.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""
|
||||
Device models — SIP phones, softphones, cell phones.
|
||||
|
||||
Devices register with the gateway and can receive transferred calls.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class DeviceType(str, Enum):
|
||||
"""Types of devices that can connect to the gateway."""
|
||||
|
||||
SIP_PHONE = "sip_phone" # Hardware SIP phone
|
||||
SOFTPHONE = "softphone" # Software SIP client
|
||||
CELL = "cell" # Cell phone (reached via PSTN trunk)
|
||||
TABLET = "tablet" # Tablet with SIP client
|
||||
WEBRTC = "webrtc" # Browser-based WebRTC client
|
||||
|
||||
|
||||
class DeviceBase(BaseModel):
|
||||
"""Shared device fields."""
|
||||
|
||||
name: str # "Office SIP Phone"
|
||||
type: DeviceType
|
||||
extension: Optional[int] = None # 221-299, auto-assigned if omitted
|
||||
sip_uri: Optional[str] = None # sip:robert@gateway.helu.ca
|
||||
phone_number: Optional[str] = None # For PSTN devices (E.164)
|
||||
priority: int = 10 # Routing priority (lower = higher priority)
|
||||
capabilities: list[str] = Field(default_factory=lambda: ["voice"])
|
||||
|
||||
|
||||
class Device(DeviceBase):
|
||||
"""Full device model."""
|
||||
|
||||
id: str
|
||||
is_online: bool = False
|
||||
last_seen: Optional[datetime] = None
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
@property
|
||||
def can_receive_call(self) -> bool:
|
||||
"""Can this device receive a call right now?"""
|
||||
if self.type in (DeviceType.SIP_PHONE, DeviceType.SOFTPHONE, DeviceType.WEBRTC):
|
||||
return self.is_online and self.sip_uri is not None
|
||||
if self.type == DeviceType.CELL:
|
||||
return self.phone_number is not None
|
||||
return False
|
||||
|
||||
|
||||
class DeviceCreate(DeviceBase):
|
||||
"""Request model for registering a new device."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class DeviceUpdate(BaseModel):
|
||||
"""Request model for updating a device."""
|
||||
|
||||
name: Optional[str] = None
|
||||
type: Optional[DeviceType] = None
|
||||
extension: Optional[int] = None
|
||||
sip_uri: Optional[str] = None
|
||||
phone_number: Optional[str] = None
|
||||
priority: Optional[int] = None
|
||||
capabilities: Optional[list[str]] = None
|
||||
|
||||
|
||||
class DeviceStatus(BaseModel):
|
||||
"""Lightweight device status for list views."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
type: DeviceType
|
||||
is_online: bool
|
||||
last_seen: Optional[datetime] = None
|
||||
can_receive_call: bool
|
||||
69
models/events.py
Normal file
69
models/events.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""
|
||||
Event models — Real-time events published via WebSocket and event bus.
|
||||
|
||||
These events drive the dashboard, notifications, and MCP updates.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class EventType(str, Enum):
|
||||
"""Types of events the gateway can emit."""
|
||||
|
||||
# Call lifecycle
|
||||
CALL_INITIATED = "call.initiated"
|
||||
CALL_RINGING = "call.ringing"
|
||||
CALL_CONNECTED = "call.connected"
|
||||
CALL_ENDED = "call.ended"
|
||||
CALL_FAILED = "call.failed"
|
||||
|
||||
# Hold Slayer
|
||||
IVR_STEP = "holdslayer.ivr_step"
|
||||
IVR_DTMF_SENT = "holdslayer.dtmf_sent"
|
||||
HOLD_DETECTED = "holdslayer.hold_detected"
|
||||
HUMAN_DETECTED = "holdslayer.human_detected"
|
||||
TRANSFER_STARTED = "holdslayer.transfer_started"
|
||||
TRANSFER_COMPLETE = "holdslayer.transfer_complete"
|
||||
|
||||
# Audio
|
||||
AUDIO_CLASSIFIED = "audio.classified"
|
||||
TRANSCRIPT_CHUNK = "audio.transcript_chunk"
|
||||
|
||||
# Device
|
||||
DEVICE_REGISTERED = "device.registered"
|
||||
DEVICE_ONLINE = "device.online"
|
||||
DEVICE_OFFLINE = "device.offline"
|
||||
|
||||
# System
|
||||
GATEWAY_STARTED = "system.gateway_started"
|
||||
GATEWAY_STOPPING = "system.gateway_stopping"
|
||||
ERROR = "system.error"
|
||||
|
||||
# SIP Trunk
|
||||
SIP_TRUNK_REGISTERED = "sip.trunk.registered"
|
||||
SIP_TRUNK_REGISTRATION_FAILED = "sip.trunk.registration_failed"
|
||||
SIP_TRUNK_UNREGISTERED = "sip.trunk.unregistered"
|
||||
|
||||
|
||||
class GatewayEvent(BaseModel):
|
||||
"""A real-time event from the gateway."""
|
||||
|
||||
type: EventType
|
||||
call_id: Optional[str] = None
|
||||
timestamp: datetime = Field(default_factory=datetime.now)
|
||||
data: dict[str, Any] = Field(default_factory=dict)
|
||||
message: Optional[str] = None # Human-readable description
|
||||
|
||||
def to_ws_message(self) -> dict:
|
||||
"""Serialize for WebSocket transmission."""
|
||||
return {
|
||||
"type": self.type.value,
|
||||
"call_id": self.call_id,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
"data": self.data,
|
||||
"message": self.message,
|
||||
}
|
||||
Reference in New Issue
Block a user