feat: add initial Hold Slayer AI telephony gateway implementation

Complete project scaffolding and core implementation of an AI-powered
telephony system that calls companies, navigates IVR menus, waits on
hold, and transfers to the user when a human answers.

Key components:
- FastAPI server with REST API, WebSocket, and MCP (SSE) interfaces
- SIP/VoIP call management via PJSUA2 with RTP audio streaming
- LLM-powered IVR navigation using OpenAI/Anthropic with tool calling
- Hold detection service combining audio analysis and silence detection
- Real-time STT (Whisper/Deepgram) and TTS (OpenAI/Piper) pipelines
- Call recording with per-channel and mixed audio capture
- Event bus (asyncio pub/sub) for real-time client updates
- Web dashboard with live call monitoring
- SQLite persistence via SQLAlchemy with call history and analytics
- Notification support (email, SMS, webhook, desktop)
- Docker Compose deployment with Opal VoIP and Opal Media containers
- Comprehensive test suite with unit, integration, and E2E tests
- Simplified .gitignore and full project documentation in README
This commit is contained in:
2026-03-21 19:23:26 +00:00
parent c9ff60702b
commit ecf37658ce
56 changed files with 11601 additions and 164 deletions

1
models/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Pydantic models — call flows, calls, contacts, devices, events."""

169
models/call.py Normal file
View File

@@ -0,0 +1,169 @@
"""
Call models — Active call state, requests, and responses.
"""
from datetime import datetime
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field
class CallStatus(str, Enum):
"""Call lifecycle states."""
INITIATING = "initiating"
RINGING = "ringing"
CONNECTED = "connected"
NAVIGATING_IVR = "navigating_ivr"
ON_HOLD = "on_hold"
HUMAN_DETECTED = "human_detected"
TRANSFERRING = "transferring"
BRIDGED = "bridged" # User is connected to the remote party
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class CallMode(str, Enum):
"""How the call should be handled."""
DIRECT = "direct" # Call and connect immediately
HOLD_SLAYER = "hold_slayer" # Navigate IVR, wait on hold, transfer when human
AI_ASSISTED = "ai_assisted" # Connect with transcription, recording, noise cancel
class AudioClassification(str, Enum):
"""What kind of audio is currently playing."""
SILENCE = "silence"
MUSIC = "music" # Hold music
IVR_PROMPT = "ivr_prompt" # Automated voice (TTS/recording)
LIVE_HUMAN = "live_human" # Real person talking
RINGING = "ringing" # Ring-back tone
DTMF = "dtmf" # Touch tones
UNKNOWN = "unknown"
class ClassificationResult(BaseModel):
"""A single audio classification at a point in time."""
timestamp: float # Unix timestamp
audio_type: AudioClassification
confidence: float # 0.0 - 1.0
details: Optional[dict] = None # Extra analysis data
class ActiveCall(BaseModel):
"""In-memory state for an active call."""
id: str
direction: str = "outbound"
remote_number: str
status: CallStatus = CallStatus.INITIATING
mode: CallMode = CallMode.DIRECT
intent: Optional[str] = None
call_flow_id: Optional[str] = None
device: Optional[str] = None
started_at: datetime = Field(default_factory=datetime.now)
connected_at: Optional[datetime] = None
hold_started_at: Optional[datetime] = None
current_classification: AudioClassification = AudioClassification.UNKNOWN
classification_history: list[ClassificationResult] = Field(default_factory=list)
transcript_chunks: list[str] = Field(default_factory=list)
current_step_id: Optional[str] = None # Current position in call flow
services: list[str] = Field(default_factory=list) # Active services on this call
@property
def duration(self) -> int:
"""Total call duration in seconds."""
if self.connected_at:
return int((datetime.now() - self.connected_at).total_seconds())
return 0
@property
def hold_time(self) -> int:
"""Time spent on hold in seconds."""
if self.hold_started_at and self.status == CallStatus.ON_HOLD:
return int((datetime.now() - self.hold_started_at).total_seconds())
return 0
@property
def transcript(self) -> str:
"""Full transcript so far."""
return "\n".join(self.transcript_chunks)
def summary(self) -> dict:
"""Compact summary for list views."""
return {
"call_id": self.id,
"remote_number": self.remote_number,
"status": self.status.value,
"mode": self.mode.value,
"duration": self.duration,
"hold_time": self.hold_time,
"audio_type": self.current_classification.value,
"intent": self.intent,
}
# ============================================================
# API Request/Response Models
# ============================================================
class CallRequest(BaseModel):
"""Request to place an outbound call."""
number: str # E.164 format
mode: CallMode = CallMode.DIRECT
intent: Optional[str] = None # What you need (for hold_slayer IVR navigation)
device: Optional[str] = None # Target device to ring / transfer to
call_flow_id: Optional[str] = None # Use a stored IVR tree
services: list[str] = Field(
default_factory=lambda: ["recording", "transcription"]
)
class HoldSlayerRequest(BaseModel):
"""Request to launch the Hold Slayer."""
number: str # E.164 format
intent: str # "dispute a charge on my December statement"
call_flow_id: Optional[str] = None # Optional: use stored IVR tree
transfer_to: Optional[str] = None # Device to ring when human detected
notify: list[str] = Field(default_factory=lambda: ["push"]) # Notification channels
class CallResponse(BaseModel):
"""Response after initiating a call."""
call_id: str
status: str
number: str
mode: str
message: Optional[str] = None
class CallStatusResponse(BaseModel):
"""Full status of an active or completed call."""
call_id: str
status: str
direction: str
remote_number: str
mode: str
duration: int
hold_time: int
audio_type: str
intent: Optional[str] = None
transcript_excerpt: Optional[str] = None # Last N chars
classification_history: list[ClassificationResult] = Field(default_factory=list)
current_step: Optional[str] = None
services: list[str] = Field(default_factory=list)
class TransferRequest(BaseModel):
"""Request to transfer a call to a device."""
device: str # Device ID or type

108
models/call_flow.py Normal file
View File

@@ -0,0 +1,108 @@
"""
Call Flow models — IVR navigation trees.
Store known IVR structures for phone numbers you call regularly.
The Hold Slayer follows the map instead of exploring blind.
"""
from datetime import datetime
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field
class ActionType(str, Enum):
"""Actions the Hold Slayer can take at each IVR step."""
DTMF = "dtmf" # Press a button
SPEAK = "speak" # Say something (for speech-recognition IVRs)
WAIT = "wait" # Wait for prompt
LISTEN = "listen" # Listen and let LLM decide
HOLD = "hold" # On hold — activate hold detection
TRANSFER = "transfer" # Transfer to user's device
class CallFlowStep(BaseModel):
"""A single step in an IVR navigation tree."""
id: str
description: str # Human-readable: "Main menu"
expect: Optional[str] = None # What we expect to hear (regex or keywords)
action: ActionType
action_value: Optional[str] = None # DTMF digit(s), speech text, device target
timeout: int = 30 # Seconds to wait before retry/fallback
next_step: Optional[str] = None # Next step ID on success
fallback_step: Optional[str] = None # Step ID if unexpected response
notes: Optional[str] = None # "They changed this menu in Jan 2025"
class CallFlow(BaseModel):
"""A complete IVR navigation tree for a phone number."""
id: str
name: str # "Chase Bank - Main Line"
phone_number: str # "+18005551234"
description: str = ""
last_verified: Optional[datetime] = None
steps: list[CallFlowStep]
tags: list[str] = Field(default_factory=list)
notes: Optional[str] = None
# Stats from previous runs
avg_hold_time: Optional[int] = None # seconds
success_rate: Optional[float] = None # 0.0 - 1.0
last_used: Optional[datetime] = None
times_used: int = 0
def get_step(self, step_id: str) -> Optional[CallFlowStep]:
"""Look up a step by ID."""
for step in self.steps:
if step.id == step_id:
return step
return None
def first_step(self) -> Optional[CallFlowStep]:
"""Get the first step in the flow."""
return self.steps[0] if self.steps else None
def steps_by_id(self) -> dict[str, CallFlowStep]:
"""Return a dict mapping step ID -> step for fast lookups."""
return {s.id: s for s in self.steps}
class CallFlowCreate(BaseModel):
"""Request model for creating a new call flow."""
name: str
phone_number: str
description: str = ""
steps: list[CallFlowStep]
tags: list[str] = Field(default_factory=list)
notes: Optional[str] = None
class CallFlowUpdate(BaseModel):
"""Request model for updating an existing call flow."""
name: Optional[str] = None
description: Optional[str] = None
steps: Optional[list[CallFlowStep]] = None
tags: Optional[list[str]] = None
notes: Optional[str] = None
last_verified: Optional[datetime] = None
class CallFlowSummary(BaseModel):
"""Lightweight summary for list views."""
id: str
name: str
phone_number: str
description: str = ""
step_count: int
avg_hold_time: Optional[int] = None
success_rate: Optional[float] = None
last_used: Optional[datetime] = None
times_used: int = 0
tags: list[str] = Field(default_factory=list)

60
models/contact.py Normal file
View File

@@ -0,0 +1,60 @@
"""
Contact models — People and organizations you call.
"""
from datetime import datetime
from typing import Optional
from pydantic import BaseModel, Field
class PhoneNumber(BaseModel):
"""A phone number associated with a contact."""
number: str # E.164 format
label: str = "main" # main, mobile, work, home, fax, etc.
primary: bool = False
class ContactBase(BaseModel):
"""Shared contact fields."""
name: str
phone_numbers: list[PhoneNumber]
category: Optional[str] = None # personal / business / service
routing_preference: Optional[str] = None # how to handle their calls
notes: Optional[str] = None
class Contact(ContactBase):
"""Full contact model."""
id: str
call_count: int = 0
last_call: Optional[datetime] = None
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
@property
def primary_number(self) -> Optional[str]:
"""Get the primary phone number."""
for pn in self.phone_numbers:
if pn.primary:
return pn.number
return self.phone_numbers[0].number if self.phone_numbers else None
class ContactCreate(ContactBase):
"""Request model for creating a contact."""
pass
class ContactUpdate(BaseModel):
"""Request model for updating a contact."""
name: Optional[str] = None
phone_numbers: Optional[list[PhoneNumber]] = None
category: Optional[str] = None
routing_preference: Optional[str] = None
notes: Optional[str] = None

81
models/device.py Normal file
View File

@@ -0,0 +1,81 @@
"""
Device models — SIP phones, softphones, cell phones.
Devices register with the gateway and can receive transferred calls.
"""
from datetime import datetime
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field
class DeviceType(str, Enum):
"""Types of devices that can connect to the gateway."""
SIP_PHONE = "sip_phone" # Hardware SIP phone
SOFTPHONE = "softphone" # Software SIP client
CELL = "cell" # Cell phone (reached via PSTN trunk)
TABLET = "tablet" # Tablet with SIP client
WEBRTC = "webrtc" # Browser-based WebRTC client
class DeviceBase(BaseModel):
"""Shared device fields."""
name: str # "Office SIP Phone"
type: DeviceType
extension: Optional[int] = None # 221-299, auto-assigned if omitted
sip_uri: Optional[str] = None # sip:robert@gateway.helu.ca
phone_number: Optional[str] = None # For PSTN devices (E.164)
priority: int = 10 # Routing priority (lower = higher priority)
capabilities: list[str] = Field(default_factory=lambda: ["voice"])
class Device(DeviceBase):
"""Full device model."""
id: str
is_online: bool = False
last_seen: Optional[datetime] = None
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
@property
def can_receive_call(self) -> bool:
"""Can this device receive a call right now?"""
if self.type in (DeviceType.SIP_PHONE, DeviceType.SOFTPHONE, DeviceType.WEBRTC):
return self.is_online and self.sip_uri is not None
if self.type == DeviceType.CELL:
return self.phone_number is not None
return False
class DeviceCreate(DeviceBase):
"""Request model for registering a new device."""
pass
class DeviceUpdate(BaseModel):
"""Request model for updating a device."""
name: Optional[str] = None
type: Optional[DeviceType] = None
extension: Optional[int] = None
sip_uri: Optional[str] = None
phone_number: Optional[str] = None
priority: Optional[int] = None
capabilities: Optional[list[str]] = None
class DeviceStatus(BaseModel):
"""Lightweight device status for list views."""
id: str
name: str
type: DeviceType
is_online: bool
last_seen: Optional[datetime] = None
can_receive_call: bool

69
models/events.py Normal file
View File

@@ -0,0 +1,69 @@
"""
Event models — Real-time events published via WebSocket and event bus.
These events drive the dashboard, notifications, and MCP updates.
"""
from datetime import datetime
from enum import Enum
from typing import Any, Optional
from pydantic import BaseModel, Field
class EventType(str, Enum):
"""Types of events the gateway can emit."""
# Call lifecycle
CALL_INITIATED = "call.initiated"
CALL_RINGING = "call.ringing"
CALL_CONNECTED = "call.connected"
CALL_ENDED = "call.ended"
CALL_FAILED = "call.failed"
# Hold Slayer
IVR_STEP = "holdslayer.ivr_step"
IVR_DTMF_SENT = "holdslayer.dtmf_sent"
HOLD_DETECTED = "holdslayer.hold_detected"
HUMAN_DETECTED = "holdslayer.human_detected"
TRANSFER_STARTED = "holdslayer.transfer_started"
TRANSFER_COMPLETE = "holdslayer.transfer_complete"
# Audio
AUDIO_CLASSIFIED = "audio.classified"
TRANSCRIPT_CHUNK = "audio.transcript_chunk"
# Device
DEVICE_REGISTERED = "device.registered"
DEVICE_ONLINE = "device.online"
DEVICE_OFFLINE = "device.offline"
# System
GATEWAY_STARTED = "system.gateway_started"
GATEWAY_STOPPING = "system.gateway_stopping"
ERROR = "system.error"
# SIP Trunk
SIP_TRUNK_REGISTERED = "sip.trunk.registered"
SIP_TRUNK_REGISTRATION_FAILED = "sip.trunk.registration_failed"
SIP_TRUNK_UNREGISTERED = "sip.trunk.unregistered"
class GatewayEvent(BaseModel):
"""A real-time event from the gateway."""
type: EventType
call_id: Optional[str] = None
timestamp: datetime = Field(default_factory=datetime.now)
data: dict[str, Any] = Field(default_factory=dict)
message: Optional[str] = None # Human-readable description
def to_ws_message(self) -> dict:
"""Serialize for WebSocket transmission."""
return {
"type": self.type.value,
"call_id": self.call_id,
"timestamp": self.timestamp.isoformat(),
"data": self.data,
"message": self.message,
}