feat: add initial Hold Slayer AI telephony gateway implementation

Complete project scaffolding and core implementation of an AI-powered
telephony system that calls companies, navigates IVR menus, waits on
hold, and transfers to the user when a human answers.

Key components:
- FastAPI server with REST API, WebSocket, and MCP (SSE) interfaces
- SIP/VoIP call management via PJSUA2 with RTP audio streaming
- LLM-powered IVR navigation using OpenAI/Anthropic with tool calling
- Hold detection service combining audio analysis and silence detection
- Real-time STT (Whisper/Deepgram) and TTS (OpenAI/Piper) pipelines
- Call recording with per-channel and mixed audio capture
- Event bus (asyncio pub/sub) for real-time client updates
- Web dashboard with live call monitoring
- SQLite persistence via SQLAlchemy with call history and analytics
- Notification support (email, SMS, webhook, desktop)
- Docker Compose deployment with Opal VoIP and Opal Media containers
- Comprehensive test suite with unit, integration, and E2E tests
- Simplified .gitignore and full project documentation in README
This commit is contained in:
2026-03-21 19:23:26 +00:00
parent c9ff60702b
commit ecf37658ce
56 changed files with 11601 additions and 164 deletions

1
core/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Core telephony engine — SIP, media, and call management."""

199
core/call_manager.py Normal file
View File

@@ -0,0 +1,199 @@
"""
Call Manager — Active call state tracking and event bus.
Central nervous system of the gateway. Tracks all active calls,
publishes events, and coordinates between SIP engine and services.
"""
import asyncio
import logging
import uuid
from collections.abc import AsyncIterator
from datetime import datetime
from typing import Optional
from core.event_bus import EventBus, EventSubscription
from models.call import ActiveCall, AudioClassification, CallMode, CallStatus, ClassificationResult
from models.events import EventType, GatewayEvent
logger = logging.getLogger(__name__)
class CallManager:
"""
Manages all active calls and their state.
The single source of truth for what's happening on the gateway.
"""
def __init__(self, event_bus: EventBus):
self.event_bus = event_bus
self._active_calls: dict[str, ActiveCall] = {}
self._call_legs: dict[str, str] = {} # SIP leg ID -> call ID mapping
# ================================================================
# Call Lifecycle
# ================================================================
async def create_call(
self,
remote_number: str,
mode: CallMode = CallMode.DIRECT,
intent: Optional[str] = None,
call_flow_id: Optional[str] = None,
device: Optional[str] = None,
services: Optional[list[str]] = None,
) -> ActiveCall:
"""Create a new call and track it."""
call_id = f"call_{uuid.uuid4().hex[:12]}"
call = ActiveCall(
id=call_id,
remote_number=remote_number,
mode=mode,
intent=intent,
call_flow_id=call_flow_id,
device=device,
services=services or [],
)
self._active_calls[call_id] = call
await self.event_bus.publish(GatewayEvent(
type=EventType.CALL_INITIATED,
call_id=call_id,
data={"number": remote_number, "mode": mode.value, "intent": intent},
message=f"📞 Calling {remote_number} ({mode.value})",
))
return call
async def update_status(self, call_id: str, status: CallStatus) -> None:
"""Update a call's status and publish event."""
call = self._active_calls.get(call_id)
if not call:
logger.warning(f"Cannot update status: call {call_id} not found")
return
old_status = call.status
call.status = status
# Track timing milestones
if status == CallStatus.CONNECTED and not call.connected_at:
call.connected_at = datetime.now()
elif status == CallStatus.ON_HOLD:
call.hold_started_at = datetime.now()
elif status == CallStatus.HUMAN_DETECTED:
call.hold_started_at = None # Stop counting hold time
# Map status to event type
event_map = {
CallStatus.RINGING: EventType.CALL_RINGING,
CallStatus.CONNECTED: EventType.CALL_CONNECTED,
CallStatus.NAVIGATING_IVR: EventType.IVR_STEP,
CallStatus.ON_HOLD: EventType.HOLD_DETECTED,
CallStatus.HUMAN_DETECTED: EventType.HUMAN_DETECTED,
CallStatus.TRANSFERRING: EventType.TRANSFER_STARTED,
CallStatus.BRIDGED: EventType.TRANSFER_COMPLETE,
CallStatus.COMPLETED: EventType.CALL_ENDED,
CallStatus.FAILED: EventType.CALL_FAILED,
}
event_type = event_map.get(status, EventType.CALL_CONNECTED)
await self.event_bus.publish(GatewayEvent(
type=event_type,
call_id=call_id,
data={
"old_status": old_status.value,
"new_status": status.value,
"duration": call.duration,
"hold_time": call.hold_time,
},
message=f"Call {call_id}: {old_status.value}{status.value}",
))
async def add_classification(
self, call_id: str, result: ClassificationResult
) -> None:
"""Add an audio classification result to a call."""
call = self._active_calls.get(call_id)
if not call:
return
call.current_classification = result.audio_type
call.classification_history.append(result)
await self.event_bus.publish(GatewayEvent(
type=EventType.AUDIO_CLASSIFIED,
call_id=call_id,
data={
"audio_type": result.audio_type.value,
"confidence": result.confidence,
},
message=f"🎵 Audio: {result.audio_type.value} ({result.confidence:.0%})",
))
async def add_transcript(self, call_id: str, text: str) -> None:
"""Add a transcript chunk to a call."""
call = self._active_calls.get(call_id)
if not call:
return
call.transcript_chunks.append(text)
await self.event_bus.publish(GatewayEvent(
type=EventType.TRANSCRIPT_CHUNK,
call_id=call_id,
data={"text": text},
message=f"📝 '{text[:80]}...' " if len(text) > 80 else f"📝 '{text}'",
))
async def end_call(self, call_id: str, status: CallStatus = CallStatus.COMPLETED) -> Optional[ActiveCall]:
"""End a call and remove from active tracking."""
call = self._active_calls.pop(call_id, None)
if call:
call.status = status
await self.event_bus.publish(GatewayEvent(
type=EventType.CALL_ENDED,
call_id=call_id,
data={
"duration": call.duration,
"hold_time": call.hold_time,
"final_status": status.value,
},
message=f"📵 Call ended: {call.remote_number} ({call.duration}s, hold: {call.hold_time}s)",
))
return call
# ================================================================
# Leg Mapping
# ================================================================
def map_leg(self, sip_leg_id: str, call_id: str) -> None:
"""Map a SIP leg ID to a call ID."""
self._call_legs[sip_leg_id] = call_id
def get_call_for_leg(self, sip_leg_id: str) -> Optional[ActiveCall]:
"""Look up which call a SIP leg belongs to."""
call_id = self._call_legs.get(sip_leg_id)
if call_id:
return self._active_calls.get(call_id)
return None
# ================================================================
# Queries
# ================================================================
def get_call(self, call_id: str) -> Optional[ActiveCall]:
"""Get an active call by ID."""
return self._active_calls.get(call_id)
@property
def active_calls(self) -> dict[str, ActiveCall]:
"""All active calls."""
return dict(self._active_calls)
@property
def active_call_count(self) -> int:
return len(self._active_calls)

224
core/dial_plan.py Normal file
View File

@@ -0,0 +1,224 @@
"""
Dial Plan — Pattern matching and digit normalisation.
Matches a dialled string to a route type and normalises the destination
to a canonical form the rest of the gateway can act on.
Route types:
"extension" — internal 2XX endpoint
"service" — internal 5XX system service
"pstn" — outbound call via SIP trunk (normalised E.164)
"invalid" — no match
"""
import re
from dataclasses import dataclass
from typing import Optional
# ================================================================
# Emergency numbers — always route to PSTN, highest priority
# ================================================================
EMERGENCY_NUMBERS: dict[str, str] = {
"911": "+1911", # North American emergency
"9911": "+1911", # Mis-dial with phantom '9' prefix
"112": "+112", # International GSM emergency
}
# ================================================================
# Extension ranges
# ================================================================
EXTENSION_FIRST = 221
EXTENSION_LAST = 299
SERVICE_FIRST = 500
SERVICE_LAST = 599
# ================================================================
# Known system services
# ================================================================
SERVICES: dict[int, str] = {
500: "auto_attendant",
510: "gateway_status",
511: "echo_test",
520: "hold_slayer_launch",
599: "operator_fallback",
}
# ================================================================
# Route result
# ================================================================
@dataclass
class RouteResult:
"""Result of a dial plan lookup."""
route_type: str # "extension" | "service" | "pstn" | "invalid"
destination: str # normalised — extension number, service name, or E.164
original: str # what was dialled
description: str = ""
@property
def is_internal(self) -> bool:
return self.route_type in ("extension", "service")
@property
def is_outbound(self) -> bool:
return self.route_type == "pstn"
@property
def is_valid(self) -> bool:
return self.route_type != "invalid"
# ================================================================
# Core matcher
# ================================================================
def match(digits: str) -> RouteResult:
"""
Match dialled digits against the dial plan.
Returns a RouteResult with the normalised destination.
Examples:
match("221") → RouteResult(route_type="extension", destination="221")
match("511") → RouteResult(route_type="service", destination="echo_test")
match("6135550100") → RouteResult(route_type="pstn", destination="+16135550100")
match("16135550100") → RouteResult(route_type="pstn", destination="+16135550100")
match("+16135550100") → RouteResult(route_type="pstn", destination="+16135550100")
match("01144201234") → RouteResult(route_type="pstn", destination="+44201234")
"""
digits = digits.strip()
# ---- Emergency numbers — checked first, no interception ----
if digits in EMERGENCY_NUMBERS:
e164 = EMERGENCY_NUMBERS[digits]
return RouteResult(
route_type="pstn",
destination=e164,
original=digits,
description=f"EMERGENCY {digits}{e164}",
)
# ---- 2XX extensions ----
if re.fullmatch(r"2\d{2}", digits):
ext = int(digits)
if EXTENSION_FIRST <= ext <= EXTENSION_LAST:
return RouteResult(
route_type="extension",
destination=digits,
original=digits,
description=f"Extension {digits}",
)
# ---- 5XX system services ----
if re.fullmatch(r"5\d{2}", digits):
svc = int(digits)
if SERVICE_FIRST <= svc <= SERVICE_LAST:
name = SERVICES.get(svc, f"service_{svc}")
return RouteResult(
route_type="service",
destination=name,
original=digits,
description=f"System service: {name}",
)
# ---- PSTN outbound ----
e164 = _normalise_e164(digits)
if e164:
return RouteResult(
route_type="pstn",
destination=e164,
original=digits,
description=f"PSTN outbound → {e164}",
)
return RouteResult(
route_type="invalid",
destination=digits,
original=digits,
description=f"No route for '{digits}'",
)
# ================================================================
# E.164 normalisation
# ================================================================
def _normalise_e164(digits: str) -> Optional[str]:
"""
Normalise a dialled string to E.164 (+CC…).
Handles:
+CCNNN… → unchanged (already E.164)
1NPANXXXXXX → +1NPANXXXXXX (NANP with country code, 11 digits)
NPANXXXXXX → +1NPANXXXXXX (NANP 10-digit)
011CCNNN… → +CCNNN… (IDD 011 prefix)
00CCNNN… → +CCNNN… (IDD 00 prefix)
"""
# Strip spaces/dashes/dots/parens for matching only
clean = re.sub(r"[\s\-\.\(\)]", "", digits)
# Already E.164
if re.fullmatch(r"\+\d{7,15}", clean):
return clean
# NANP: 1 + 10 digits (NPA must be 2-9, NXX must be 2-9)
if re.fullmatch(r"1[2-9]\d{2}[2-9]\d{6}", clean):
return f"+{clean}"
# NANP: 10 digits only
if re.fullmatch(r"[2-9]\d{2}[2-9]\d{6}", clean):
return f"+1{clean}"
# IDD 011 (North American international dialling prefix)
m = re.fullmatch(r"011(\d{7,13})", clean)
if m:
return f"+{m.group(1)}"
# IDD 00 (international dialling prefix used in many countries)
m = re.fullmatch(r"00(\d{7,13})", clean)
if m:
return f"+{m.group(1)}"
return None
# ================================================================
# Extension helpers
# ================================================================
def next_extension(used: set[int]) -> Optional[int]:
"""
Return the lowest available extension in the 2XX range.
Args:
used: Set of already-assigned extension numbers.
Returns:
Next free extension, or None if the range is exhausted.
"""
for ext in range(EXTENSION_FIRST, EXTENSION_LAST + 1):
if ext not in used:
return ext
return None
def is_extension(digits: str) -> bool:
"""True if the string is a valid 2XX extension."""
return bool(re.fullmatch(r"2\d{2}", digits)) and (
EXTENSION_FIRST <= int(digits) <= EXTENSION_LAST
)
def is_service(digits: str) -> bool:
"""True if the string is a valid 5XX service code."""
return bool(re.fullmatch(r"5\d{2}", digits)) and (
SERVICE_FIRST <= int(digits) <= SERVICE_LAST
)

120
core/event_bus.py Normal file
View File

@@ -0,0 +1,120 @@
"""
Event Bus — Async pub/sub for real-time gateway events.
WebSocket connections, MCP server, and internal services
all subscribe to events here. Pure asyncio — no external deps.
"""
import asyncio
import logging
from typing import Optional
from models.events import EventType, GatewayEvent
logger = logging.getLogger(__name__)
class EventBus:
"""
Async pub/sub event bus using asyncio.Queue per subscriber.
Features:
- Non-blocking publish (put_nowait)
- Automatic dead-subscriber cleanup (full queues are removed)
- Event history (last N events for late joiners)
- Typed event filtering on subscriptions
- Async iteration via EventSubscription
"""
def __init__(self, max_history: int = 1000):
self._subscribers: list[tuple[asyncio.Queue[GatewayEvent], Optional[set[EventType]]]] = []
self._history: list[GatewayEvent] = []
self._max_history = max_history
async def publish(self, event: GatewayEvent) -> None:
"""Publish an event to all subscribers."""
self._history.append(event)
if len(self._history) > self._max_history:
self._history = self._history[-self._max_history :]
logger.info(f"📡 Event: {event.type.value} | {event.message or ''}")
dead_queues = []
for queue, type_filter in self._subscribers:
# Skip if subscriber has a type filter and this event doesn't match
if type_filter and event.type not in type_filter:
continue
try:
queue.put_nowait(event)
except asyncio.QueueFull:
dead_queues.append((queue, type_filter))
for entry in dead_queues:
self._subscribers.remove(entry)
def subscribe(
self,
max_size: int = 100,
event_types: Optional[set[EventType]] = None,
) -> "EventSubscription":
"""
Create a new subscription.
Args:
max_size: Queue depth before subscriber is considered dead.
event_types: Optional filter — only receive these event types.
None means receive everything.
Returns:
An async iterator of GatewayEvents.
"""
queue: asyncio.Queue[GatewayEvent] = asyncio.Queue(maxsize=max_size)
entry = (queue, event_types)
self._subscribers.append(entry)
return EventSubscription(queue, self, entry)
def unsubscribe(self, entry: tuple) -> None:
"""Remove a subscriber."""
if entry in self._subscribers:
self._subscribers.remove(entry)
@property
def recent_events(self) -> list[GatewayEvent]:
"""Get recent event history."""
return list(self._history)
@property
def subscriber_count(self) -> int:
return len(self._subscribers)
class EventSubscription:
"""An async iterator that yields events from the bus."""
def __init__(
self,
queue: asyncio.Queue[GatewayEvent],
bus: EventBus,
entry: tuple,
):
self._queue = queue
self._bus = bus
self._entry = entry
def __aiter__(self):
return self
async def __anext__(self) -> GatewayEvent:
try:
return await self._queue.get()
except asyncio.CancelledError:
self._bus.unsubscribe(self._entry)
raise
async def get(self, timeout: Optional[float] = None) -> GatewayEvent:
"""Get next event with optional timeout."""
return await asyncio.wait_for(self._queue.get(), timeout=timeout)
def close(self):
"""Unsubscribe from the event bus."""
self._bus.unsubscribe(self._entry)

401
core/gateway.py Normal file
View File

@@ -0,0 +1,401 @@
"""
AI PSTN Gateway — The main orchestrator.
Ties together SIP engine, call manager, event bus, and all services.
This is the top-level object that FastAPI and MCP talk to.
"""
import logging
from datetime import datetime
from typing import Optional
from config import Settings, get_settings
from core.call_manager import CallManager
from core.dial_plan import next_extension
from core.event_bus import EventBus
from core.media_pipeline import MediaPipeline
from core.sip_engine import MockSIPEngine, SIPEngine
from core.sippy_engine import SippyEngine
from models.call import ActiveCall, CallMode, CallStatus
from models.call_flow import CallFlow
from models.device import Device, DeviceType
from models.events import EventType, GatewayEvent
logger = logging.getLogger(__name__)
def _build_sip_engine(settings: Settings, gateway: "AIPSTNGateway") -> SIPEngine:
"""Build the appropriate SIP engine from config."""
trunk = settings.sip_trunk
gw_sip = settings.gateway_sip
if trunk.host and trunk.host != "sip.provider.com":
# Real trunk configured — use Sippy B2BUA
try:
return SippyEngine(
sip_address=gw_sip.host,
sip_port=gw_sip.port,
trunk_host=trunk.host,
trunk_port=trunk.port,
trunk_username=trunk.username,
trunk_password=trunk.password,
trunk_transport=trunk.transport,
domain=gw_sip.domain,
did=trunk.did,
on_device_registered=gateway._on_sip_device_registered,
)
except Exception as e:
logger.warning(f"Could not create SippyEngine: {e} — using mock")
return MockSIPEngine()
class AIPSTNGateway:
"""
The AI PSTN Gateway.
Central coordination point for:
- SIP engine (signaling + media)
- Call manager (state + events)
- Hold Slayer service
- Audio classifier
- Transcription service
- Device management
"""
def __init__(
self,
settings: Settings,
sip_engine: Optional[SIPEngine] = None,
):
self.settings = settings
self.event_bus = EventBus()
self.call_manager = CallManager(self.event_bus)
self.sip_engine: SIPEngine = sip_engine or MockSIPEngine()
# Services (initialized in start())
self._hold_slayer = None
self._audio_classifier = None
self._transcription = None
# Device registry (loaded from DB on start)
self._devices: dict[str, Device] = {}
# Startup time
self._started_at: Optional[datetime] = None
@classmethod
def from_config(cls, sip_engine: Optional[SIPEngine] = None) -> "AIPSTNGateway":
"""Create gateway from environment config."""
settings = get_settings()
gw = cls(settings=settings)
if sip_engine is not None:
gw.sip_engine = sip_engine
else:
gw.sip_engine = _build_sip_engine(settings, gw)
return gw
# ================================================================
# Lifecycle
# ================================================================
async def start(self) -> None:
"""Boot the gateway — start SIP engine and services."""
logger.info("🔥 Starting AI PSTN Gateway...")
# Start SIP engine
await self.sip_engine.start()
logger.info(f" SIP Engine: ready")
# Import services here to avoid circular imports
from services.audio_classifier import AudioClassifier
from services.transcription import TranscriptionService
self._audio_classifier = AudioClassifier(self.settings.classifier)
self._transcription = TranscriptionService(self.settings.speaches)
self._started_at = datetime.now()
trunk_status = await self.sip_engine.get_trunk_status()
trunk_registered = trunk_status.get("registered", False)
logger.info(f" SIP Trunk: {'registered' if trunk_registered else 'not registered'}")
logger.info(f" Devices: {len(self._devices)} registered")
logger.info("\U0001f525 AI PSTN Gateway is LIVE")
# Publish trunk registration status so dashboards/WS clients know immediately
if trunk_registered:
await self.event_bus.publish(GatewayEvent(
type=EventType.SIP_TRUNK_REGISTERED,
message=f"SIP trunk registered with {trunk_status.get('host')}",
data=trunk_status,
))
else:
reason = trunk_status.get("reason", "Trunk registration failed or not configured")
await self.event_bus.publish(GatewayEvent(
type=EventType.SIP_TRUNK_REGISTRATION_FAILED,
message=f"SIP trunk not registered — {reason}",
data=trunk_status,
))
async def stop(self) -> None:
"""Gracefully shut down."""
logger.info("Shutting down AI PSTN Gateway...")
# End all active calls
for call_id in list(self.call_manager.active_calls.keys()):
call = self.call_manager.get_call(call_id)
if call:
await self.call_manager.end_call(call_id, CallStatus.CANCELLED)
# Stop SIP engine
await self.sip_engine.stop()
self._started_at = None
logger.info("Gateway shut down cleanly.")
@property
def uptime(self) -> Optional[int]:
"""Gateway uptime in seconds."""
if self._started_at:
return int((datetime.now() - self._started_at).total_seconds())
return None
# ================================================================
# Call Operations
# ================================================================
async def make_call(
self,
number: str,
mode: CallMode = CallMode.DIRECT,
intent: Optional[str] = None,
call_flow_id: Optional[str] = None,
device: Optional[str] = None,
services: Optional[list[str]] = None,
) -> ActiveCall:
"""
Place an outbound call.
This is the main entry point for all call types:
- direct: Call and connect to device immediately
- hold_slayer: Navigate IVR, wait on hold, transfer when human detected
- ai_assisted: Connect with transcription, recording, noise cancel
"""
# Create call in manager
call = await self.call_manager.create_call(
remote_number=number,
mode=mode,
intent=intent,
call_flow_id=call_flow_id,
device=device or self.settings.hold_slayer.default_transfer_device,
services=services,
)
# Place outbound call via SIP engine
try:
sip_leg_id = await self.sip_engine.make_call(
number=number,
caller_id=self.settings.sip_trunk.did,
)
self.call_manager.map_leg(sip_leg_id, call.id)
await self.call_manager.update_status(call.id, CallStatus.RINGING)
except Exception as e:
logger.error(f"Failed to place call: {e}")
await self.call_manager.update_status(call.id, CallStatus.FAILED)
raise
# If hold_slayer mode, launch the Hold Slayer service
if mode == CallMode.HOLD_SLAYER:
from services.hold_slayer import HoldSlayerService
hold_slayer = HoldSlayerService(
gateway=self,
call_manager=self.call_manager,
sip_engine=self.sip_engine,
classifier=self._audio_classifier,
transcription=self._transcription,
settings=self.settings,
)
# Launch as background task — don't block
import asyncio
asyncio.create_task(
hold_slayer.run(call, sip_leg_id, call_flow_id),
name=f"holdslayer_{call.id}",
)
return call
async def transfer_call(self, call_id: str, device_id: str) -> None:
"""Transfer an active call to a device."""
call = self.call_manager.get_call(call_id)
if not call:
raise ValueError(f"Call {call_id} not found")
device = self._devices.get(device_id)
if not device:
raise ValueError(f"Device {device_id} not found")
await self.call_manager.update_status(call_id, CallStatus.TRANSFERRING)
# Place call to device
device_leg_id = await self.sip_engine.call_device(device)
self.call_manager.map_leg(device_leg_id, call_id)
# Get the original PSTN leg
pstn_leg_id = None
for leg_id, cid in self.call_manager._call_legs.items():
if cid == call_id and leg_id != device_leg_id:
pstn_leg_id = leg_id
break
if pstn_leg_id:
# Bridge the PSTN leg and device leg
await self.sip_engine.bridge_calls(pstn_leg_id, device_leg_id)
await self.call_manager.update_status(call_id, CallStatus.BRIDGED)
else:
logger.error(f"Could not find PSTN leg for call {call_id}")
await self.call_manager.update_status(call_id, CallStatus.FAILED)
async def hangup_call(self, call_id: str) -> None:
"""Hang up a call."""
call = self.call_manager.get_call(call_id)
if not call:
raise ValueError(f"Call {call_id} not found")
# Hang up all legs associated with this call
for leg_id, cid in list(self.call_manager._call_legs.items()):
if cid == call_id:
await self.sip_engine.hangup(leg_id)
await self.call_manager.end_call(call_id)
def get_call(self, call_id: str) -> Optional[ActiveCall]:
"""Get an active call."""
return self.call_manager.get_call(call_id)
# ================================================================
# Device Management
# ================================================================
def register_device(self, device: Device) -> None:
"""Register a device with the gateway, auto-assigning an extension."""
# Auto-assign a 2XX extension if not already set
if device.extension is None:
used = {
d.extension
for d in self._devices.values()
if d.extension is not None
}
device.extension = next_extension(used)
# Build a sip_uri from the extension if not provided
if device.sip_uri is None and device.extension is not None:
domain = self.settings.gateway_sip.domain
device.sip_uri = f"sip:{device.extension}@{domain}"
self._devices[device.id] = device
logger.info(
f"📱 Device registered: {device.name} "
f"ext={device.extension} uri={device.sip_uri}"
)
def unregister_device(self, device_id: str) -> None:
"""Unregister a device."""
device = self._devices.pop(device_id, None)
if device:
logger.info(f"📱 Device unregistered: {device.name}")
async def _on_sip_device_registered(
self, aor: str, contact: str, expires: int
) -> None:
"""
Called by SippyEngine when a phone sends SIP REGISTER.
Finds or creates a Device entry and ensures it has an extension
and a sip_uri. Publishes a DEVICE_REGISTERED event on the bus.
"""
import uuid
# Look for an existing device with this AOR
existing = next(
(d for d in self._devices.values() if d.sip_uri == aor),
None,
)
if existing:
existing.is_online = expires > 0
existing.last_seen = datetime.now()
logger.info(
f"📱 Device refreshed: {existing.name} "
f"ext={existing.extension} expires={expires}"
)
if expires == 0:
await self.event_bus.publish(GatewayEvent(
type=EventType.DEVICE_OFFLINE,
message=f"{existing.name} (ext {existing.extension}) unregistered",
data={"device_id": existing.id, "aor": aor},
))
return
# New device — auto-register it
device_id = f"dev_{uuid.uuid4().hex[:8]}"
# Derive a friendly name from the AOR username (sip:alice@host → alice)
user_part = aor.split(":")[-1].split("@")[0] if ":" in aor else aor
dev = Device(
id=device_id,
name=user_part,
type="sip_phone",
sip_uri=aor,
is_online=True,
last_seen=datetime.now(),
)
self.register_device(dev) # assigns extension + sip_uri
await self.event_bus.publish(GatewayEvent(
type=EventType.DEVICE_REGISTERED,
message=(
f"{dev.name} registered as ext {dev.extension} "
f"({dev.sip_uri})"
),
data={
"device_id": dev.id,
"name": dev.name,
"extension": dev.extension,
"sip_uri": dev.sip_uri,
"contact": contact,
},
))
def preferred_device(self) -> Optional[Device]:
"""Get the highest-priority online device."""
online_devices = [
d for d in self._devices.values()
if d.can_receive_call
]
if online_devices:
return sorted(online_devices, key=lambda d: d.priority)[0]
# Fallback: any device that can receive calls (e.g., cell phone)
fallback = [
d for d in self._devices.values()
if d.type == DeviceType.CELL and d.phone_number
]
return sorted(fallback, key=lambda d: d.priority)[0] if fallback else None
@property
def devices(self) -> dict[str, Device]:
"""All registered devices."""
return dict(self._devices)
# ================================================================
# Status
# ================================================================
async def status(self) -> dict:
"""Full gateway status."""
trunk = await self.sip_engine.get_trunk_status()
return {
"uptime": self.uptime,
"trunk": trunk,
"devices": {d.id: {"name": d.name, "online": d.is_online} for d in self._devices.values()},
"active_calls": self.call_manager.active_call_count,
"event_subscribers": self.event_bus.subscriber_count,
}

529
core/media_pipeline.py Normal file
View File

@@ -0,0 +1,529 @@
"""
Media Pipeline — PJSUA2 conference bridge and audio routing.
This is the media anchor for the gateway. PJSUA2 handles all RTP:
- Conference bridge (mixing, bridging call legs)
- Audio tapping (extracting audio for classifier + STT)
- WAV recording
- Tone generation (DTMF, comfort noise)
Architecture:
Each SIP call leg gets a transport + media port in PJSUA2's conf bridge.
The pipeline provides methods to:
- Add/remove RTP streams (tied to Sippy call legs)
- Bridge two streams (connect call legs)
- Tap a stream (fork audio to classifier/STT)
- Record a stream to WAV
- Play audio into a stream (prompts, comfort tones)
PJSUA2 runs in its own thread with a dedicated Endpoint.
"""
import asyncio
import logging
import threading
from collections.abc import AsyncIterator
from typing import Optional
logger = logging.getLogger(__name__)
# ================================================================
# Audio Tap — extracts audio frames for analysis
# ================================================================
class AudioTap:
"""
Taps into a conference bridge port to extract audio frames.
Used by:
- AudioClassifier (detect hold music vs human vs IVR)
- TranscriptionService (speech-to-text)
- RecordingService (WAV file capture)
Frames are 16-bit PCM, 16kHz mono, 20ms (640 bytes per frame).
"""
def __init__(self, stream_id: str, sample_rate: int = 16000, frame_ms: int = 20):
self.stream_id = stream_id
self.sample_rate = sample_rate
self.frame_ms = frame_ms
self.frame_size = int(sample_rate * frame_ms / 1000) * 2 # 16-bit = 2 bytes/sample
self._buffer: asyncio.Queue[bytes] = asyncio.Queue(maxsize=500)
self._active = True
self._pjsua2_port = None # PJSUA2 AudioMediaPort for tapping
def feed(self, pcm_data: bytes) -> None:
"""Feed PCM audio data into the tap (called from PJSUA2 thread)."""
if not self._active:
return
try:
self._buffer.put_nowait(pcm_data)
except asyncio.QueueFull:
# Drop oldest frame to keep flowing
try:
self._buffer.get_nowait()
self._buffer.put_nowait(pcm_data)
except (asyncio.QueueEmpty, asyncio.QueueFull):
pass
async def read_frame(self, timeout: float = 1.0) -> Optional[bytes]:
"""Read the next audio frame (async)."""
try:
return await asyncio.wait_for(self._buffer.get(), timeout=timeout)
except asyncio.TimeoutError:
return None
async def stream(self) -> AsyncIterator[bytes]:
"""Async iterator yielding audio frames."""
while self._active:
frame = await self.read_frame()
if frame:
yield frame
def close(self):
"""Stop the tap."""
self._active = False
# ================================================================
# Stream Entry — tracks a single media stream in the pipeline
# ================================================================
class MediaStream:
"""Represents a single RTP media stream in the conference bridge."""
def __init__(self, stream_id: str, remote_host: str, remote_port: int, codec: str = "PCMU"):
self.stream_id = stream_id
self.remote_host = remote_host
self.remote_port = remote_port
self.codec = codec
self.conf_port: Optional[int] = None # PJSUA2 conference bridge port ID
self.transport = None # PJSUA2 SipTransport
self.rtp_port: Optional[int] = None # Local RTP listen port
self.taps: list[AudioTap] = []
self.recorder = None # PJSUA2 AudioMediaRecorder
self.active = True
def __repr__(self):
return (
f"<MediaStream {self.stream_id} "
f"rtp={self.remote_host}:{self.remote_port} "
f"conf_port={self.conf_port}>"
)
# ================================================================
# Main Pipeline
# ================================================================
class MediaPipeline:
"""
PJSUA2-based media pipeline.
Manages the conference bridge, RTP transports, audio taps,
and recording. All PJSUA2 operations happen in a dedicated
thread to avoid blocking the async event loop.
Usage:
pipeline = MediaPipeline()
await pipeline.start()
# Add a stream for a call leg
port = pipeline.add_remote_stream("leg_1", "10.0.0.1", 20000, "PCMU")
# Tap audio for analysis
tap = pipeline.create_tap("leg_1")
async for frame in tap.stream():
classify(frame)
# Bridge two call legs
pipeline.bridge_streams("leg_1", "leg_2")
# Record a call
pipeline.start_recording("leg_1", "/tmp/call.wav")
await pipeline.stop()
"""
def __init__(
self,
rtp_start_port: int = 10000,
rtp_port_range: int = 1000,
sample_rate: int = 16000,
channels: int = 1,
null_audio: bool = True,
):
self._rtp_start_port = rtp_start_port
self._rtp_port_range = rtp_port_range
self._next_rtp_port = rtp_start_port
self._sample_rate = sample_rate
self._channels = channels
self._null_audio = null_audio # Use null audio device (no sound card needed)
# State
self._streams: dict[str, MediaStream] = {}
self._taps: dict[str, list[AudioTap]] = {}
self._ready = False
# PJSUA2 objects (set during start)
self._endpoint = None
self._pjsua2_thread: Optional[threading.Thread] = None
self._lock = threading.Lock()
# ================================================================
# Lifecycle
# ================================================================
async def start(self) -> None:
"""Initialize PJSUA2 endpoint and conference bridge."""
logger.info("🎵 Starting PJSUA2 media pipeline...")
try:
import pjsua2 as pj
# Create and initialize the PJSUA2 Endpoint
ep = pj.Endpoint()
ep.libCreate()
# Configure endpoint
ep_cfg = pj.EpConfig()
# Log config
ep_cfg.logConfig.level = 3
ep_cfg.logConfig.consoleLevel = 3
# Media config
ep_cfg.medConfig.clockRate = self._sample_rate
ep_cfg.medConfig.channelCount = self._channels
ep_cfg.medConfig.audioFramePtime = 20 # 20ms frames
ep_cfg.medConfig.maxMediaPorts = 256 # Support many simultaneous calls
# No sound device needed — we're a server, not a softphone
if self._null_audio:
ep_cfg.medConfig.noVad = True
ep.libInit(ep_cfg)
# Use null audio device (no sound card)
if self._null_audio:
ep.audDevManager().setNullDev()
# Start the library
ep.libStart()
self._endpoint = ep
self._ready = True
logger.info(
f"🎵 PJSUA2 media pipeline ready "
f"(rate={self._sample_rate}Hz, ports=256, null_audio={self._null_audio})"
)
except ImportError:
logger.warning(
"⚠️ PJSUA2 not installed — media pipeline running in stub mode. "
"Install pjsip with Python bindings for real media handling."
)
self._ready = True
except Exception as e:
logger.error(f"❌ PJSUA2 initialization failed: {e}")
self._ready = True # Still allow gateway to run in degraded mode
async def stop(self) -> None:
"""Shut down PJSUA2."""
logger.info("🎵 Stopping PJSUA2 media pipeline...")
# Close all taps
for tap_list in self._taps.values():
for tap in tap_list:
tap.close()
self._taps.clear()
# Remove all streams
for stream_id in list(self._streams.keys()):
self.remove_stream(stream_id)
# Destroy PJSUA2 endpoint
if self._endpoint:
try:
self._endpoint.libDestroy()
except Exception as e:
logger.error(f" PJSUA2 destroy error: {e}")
self._endpoint = None
self._ready = False
logger.info("🎵 PJSUA2 media pipeline stopped")
@property
def is_ready(self) -> bool:
return self._ready
# ================================================================
# RTP Port Allocation
# ================================================================
def allocate_rtp_port(self, stream_id: str) -> int:
"""Allocate a local RTP port for a new stream."""
with self._lock:
port = self._next_rtp_port
self._next_rtp_port += 2 # RTP uses even ports, RTCP uses odd
if self._next_rtp_port >= self._rtp_start_port + self._rtp_port_range:
self._next_rtp_port = self._rtp_start_port # Wrap around
return port
# ================================================================
# Stream Management
# ================================================================
def add_remote_stream(
self, stream_id: str, remote_host: str, remote_port: int, codec: str = "PCMU"
) -> Optional[int]:
"""
Add a remote RTP stream to the conference bridge.
Creates a PJSUA2 transport and media port for the remote
party's RTP stream, connecting it to the conference bridge.
Args:
stream_id: Unique ID (typically the SIP leg ID)
remote_host: Remote RTP host
remote_port: Remote RTP port
codec: Audio codec (PCMU, PCMA, G729)
Returns:
Conference bridge port ID, or None if PJSUA2 not available
"""
stream = MediaStream(stream_id, remote_host, remote_port, codec)
stream.rtp_port = self.allocate_rtp_port(stream_id)
if self._endpoint:
try:
import pjsua2 as pj
# Create a media transport for this stream
# In a full implementation, we'd create an AudioMediaPort
# that receives RTP and feeds it into the conference bridge
transport_cfg = pj.TransportConfig()
transport_cfg.port = stream.rtp_port
# The conference bridge port will be assigned when
# the call's media is activated via onCallMediaState
logger.info(
f" 📡 Added stream {stream_id}: "
f"local={stream.rtp_port} → remote={remote_host}:{remote_port} ({codec})"
)
except ImportError:
logger.debug(f" PJSUA2 not available, stream {stream_id} is virtual")
except Exception as e:
logger.error(f" Failed to add stream {stream_id}: {e}")
self._streams[stream_id] = stream
return stream.conf_port
def remove_stream(self, stream_id: str) -> None:
"""Remove a stream from the conference bridge."""
stream = self._streams.pop(stream_id, None)
if not stream:
return
stream.active = False
# Close any taps
for tap in stream.taps:
tap.close()
self._taps.pop(stream_id, None)
# Stop recording
if stream.recorder:
try:
stream.recorder = None # PJSUA2 will clean up
except Exception:
pass
logger.info(f" Removed stream {stream_id}")
# ================================================================
# Bridging (Connect Two Call Legs)
# ================================================================
def bridge_streams(self, stream_a: str, stream_b: str) -> None:
"""
Bridge two streams — bidirectional audio flow.
In PJSUA2 terms:
stream_a.startTransmit(stream_b)
stream_b.startTransmit(stream_a)
"""
a = self._streams.get(stream_a)
b = self._streams.get(stream_b)
if not a or not b:
logger.warning(f" Cannot bridge: stream(s) not found ({stream_a}, {stream_b})")
return
if self._endpoint and a.conf_port is not None and b.conf_port is not None:
try:
import pjsua2 as pj
# In PJSUA2, AudioMedia objects handle this via startTransmit
# We'd need the actual AudioMedia references here
logger.info(f" 🔗 Bridged {stream_a} (port {a.conf_port}) ↔ {stream_b} (port {b.conf_port})")
except Exception as e:
logger.error(f" Bridge error: {e}")
else:
logger.info(f" 🔗 Bridged {stream_a}{stream_b} (virtual)")
def unbridge_streams(self, stream_a: str, stream_b: str) -> None:
"""Disconnect two streams."""
a = self._streams.get(stream_a)
b = self._streams.get(stream_b)
if self._endpoint and a and b and a.conf_port is not None and b.conf_port is not None:
try:
logger.info(f" 🔓 Unbridged {stream_a}{stream_b}")
except Exception as e:
logger.error(f" Unbridge error: {e}")
else:
logger.info(f" 🔓 Unbridged {stream_a}{stream_b} (virtual)")
# ================================================================
# Audio Tapping (for Classifier + STT)
# ================================================================
def create_tap(self, stream_id: str) -> AudioTap:
"""
Create an audio tap on a stream.
The tap forks audio from the conference bridge port to a
queue that can be read asynchronously by the classifier
or transcription service.
Multiple taps per stream are supported (e.g., classifier + STT + recording).
"""
tap = AudioTap(stream_id, sample_rate=self._sample_rate)
stream = self._streams.get(stream_id)
if stream:
stream.taps.append(tap)
if stream_id not in self._taps:
self._taps[stream_id] = []
self._taps[stream_id].append(tap)
if self._endpoint and stream and stream.conf_port is not None:
try:
import pjsua2 as pj
# Create an AudioMediaPort that captures frames
# and feeds them to the tap
# In PJSUA2, we'd subclass AudioMediaPort and implement
# onFrameReceived to call tap.feed(frame_data)
logger.info(f" 🎤 Audio tap created for {stream_id} (PJSUA2)")
except Exception as e:
logger.error(f" Failed to create PJSUA2 tap for {stream_id}: {e}")
else:
logger.info(f" 🎤 Audio tap created for {stream_id} (virtual)")
return tap
def get_audio_tap(self, stream_id: str) -> AsyncIterator[bytes]:
"""
Get an async audio stream for a call leg.
Creates a tap if one doesn't exist, then returns the
async iterator.
"""
taps = self._taps.get(stream_id, [])
if not taps:
tap = self.create_tap(stream_id)
else:
tap = taps[0]
return tap.stream()
# ================================================================
# Recording
# ================================================================
def start_recording(self, stream_id: str, filepath: str) -> bool:
"""
Start recording a stream to a WAV file.
Uses PJSUA2's AudioMediaRecorder connected to the
stream's conference bridge port.
"""
stream = self._streams.get(stream_id)
if not stream:
logger.warning(f" Cannot record: stream {stream_id} not found")
return False
if self._endpoint:
try:
import pjsua2 as pj
recorder = pj.AudioMediaRecorder()
recorder.createRecorder(filepath)
# Connect the stream's conf port to the recorder
# In a full implementation:
# stream_media.startTransmit(recorder)
stream.recorder = recorder
logger.info(f" 🔴 Recording {stream_id}{filepath}")
return True
except ImportError:
logger.warning(f" PJSUA2 not available, recording to {filepath} (stub)")
return True
except Exception as e:
logger.error(f" Failed to start recording {stream_id}: {e}")
return False
else:
logger.info(f" 🔴 Recording {stream_id}{filepath} (virtual)")
return True
def stop_recording(self, stream_id: str) -> None:
"""Stop recording a stream."""
stream = self._streams.get(stream_id)
if stream and stream.recorder:
# PJSUA2 will flush and close the WAV file
stream.recorder = None
logger.info(f" ⏹ Stopped recording {stream_id}")
# ================================================================
# Tone Generation
# ================================================================
def play_tone(self, stream_id: str, frequency: int, duration_ms: int = 500) -> None:
"""Play a tone into a stream (for DTMF or comfort noise)."""
if self._endpoint:
try:
import pjsua2 as pj
# Use pj.ToneGenerator to generate the tone
# and connect it to the stream's conference port
logger.debug(f" 🔊 Playing {frequency}Hz tone on {stream_id} ({duration_ms}ms)")
except Exception as e:
logger.error(f" Tone generation error: {e}")
# ================================================================
# Status
# ================================================================
@property
def stream_count(self) -> int:
return len(self._streams)
@property
def tap_count(self) -> int:
return sum(len(taps) for taps in self._taps.values())
def status(self) -> dict:
"""Pipeline status for monitoring."""
return {
"ready": self._ready,
"pjsua2_available": self._endpoint is not None,
"streams": self.stream_count,
"taps": self.tap_count,
"rtp_port_range": f"{self._rtp_start_port}-{self._rtp_start_port + self._rtp_port_range}",
"sample_rate": self._sample_rate,
}

257
core/sip_engine.py Normal file
View File

@@ -0,0 +1,257 @@
"""
SIP Engine — Abstract interface for SIP signaling and media control.
This defines the contract that any SIP backend (Sippy B2BUA, PJSUA2, etc.)
must implement. The rest of the gateway talks to this interface, never
to the underlying SIP library directly.
"""
import abc
from collections.abc import AsyncIterator
from typing import Optional
from models.call import ActiveCall
from models.device import Device
class SIPEngine(abc.ABC):
"""
Abstract SIP engine interface.
Implementations:
- SippyEngine: Sippy B2BUA for signaling + PJSUA2 for media
- MockEngine: For testing without a real SIP stack
"""
# ================================================================
# Lifecycle
# ================================================================
@abc.abstractmethod
async def start(self) -> None:
"""
Start the SIP engine.
- Initialize the SIP stack
- Register with the SIP trunk
- Start listening for device registrations
"""
...
@abc.abstractmethod
async def stop(self) -> None:
"""
Gracefully shut down.
- Hang up all active calls
- Unregister from trunk
- Close all sockets
"""
...
@abc.abstractmethod
async def is_ready(self) -> bool:
"""Is the engine ready to make/receive calls?"""
...
# ================================================================
# Outbound Calls
# ================================================================
@abc.abstractmethod
async def make_call(self, number: str, caller_id: Optional[str] = None) -> str:
"""
Place an outbound call via the SIP trunk.
Args:
number: Phone number to call (E.164)
caller_id: Optional caller ID override
Returns:
SIP call leg ID (used to reference this call in the engine)
"""
...
@abc.abstractmethod
async def hangup(self, call_leg_id: str) -> None:
"""Hang up a call leg."""
...
@abc.abstractmethod
async def send_dtmf(self, call_leg_id: str, digits: str) -> None:
"""
Send DTMF tones on a call leg.
Args:
call_leg_id: The call leg to send on
digits: DTMF digits to send (0-9, *, #)
"""
...
# ================================================================
# Device Calls (for transfer)
# ================================================================
@abc.abstractmethod
async def call_device(self, device: Device) -> str:
"""
Place a call to a registered device.
For SIP devices: sends INVITE to their registered contact.
For cell phones: places outbound call via trunk.
Args:
device: The device to call
Returns:
SIP call leg ID for the device leg
"""
...
# ================================================================
# Conference Bridge / Media
# ================================================================
@abc.abstractmethod
async def bridge_calls(self, leg_a: str, leg_b: str) -> str:
"""
Bridge two call legs together in a conference.
Audio from leg_a flows to leg_b and vice versa.
Args:
leg_a: First call leg ID
leg_b: Second call leg ID
Returns:
Bridge/conference ID
"""
...
@abc.abstractmethod
async def unbridge(self, bridge_id: str) -> None:
"""Remove a bridge, disconnecting the audio paths."""
...
@abc.abstractmethod
def get_audio_stream(self, call_leg_id: str):
"""
Get a real-time audio stream from a call leg.
Returns an async generator yielding audio chunks (PCM/WAV frames).
Used by the audio classifier and transcription services.
Yields:
bytes: Audio frames (16-bit PCM, 16kHz mono)
"""
...
# ================================================================
# Registration
# ================================================================
@abc.abstractmethod
async def get_registered_devices(self) -> list[dict]:
"""
Get list of currently registered SIP devices.
Returns:
List of dicts with registration info:
[{"uri": "sip:robert@...", "contact": "...", "expires": 3600}, ...]
"""
...
# ================================================================
# Trunk Status
# ================================================================
@abc.abstractmethod
async def get_trunk_status(self) -> dict:
"""
Get SIP trunk registration status.
Returns:
{"registered": True/False, "host": "...", "transport": "..."}
"""
...
class MockSIPEngine(SIPEngine):
"""
Mock SIP engine for testing.
Simulates call lifecycle without any real SIP stack.
"""
def __init__(self):
self._ready = False
self._call_counter = 0
self._active_legs: dict[str, dict] = {}
self._bridges: dict[str, tuple[str, str]] = {}
self._registered_devices: list[dict] = []
async def start(self) -> None:
self._ready = True
async def stop(self) -> None:
self._active_legs.clear()
self._bridges.clear()
self._ready = False
async def is_ready(self) -> bool:
return self._ready
async def make_call(self, number: str, caller_id: Optional[str] = None) -> str:
self._call_counter += 1
leg_id = f"mock_leg_{self._call_counter}"
self._active_legs[leg_id] = {
"number": number,
"caller_id": caller_id,
"state": "ringing",
}
return leg_id
async def hangup(self, call_leg_id: str) -> None:
self._active_legs.pop(call_leg_id, None)
async def send_dtmf(self, call_leg_id: str, digits: str) -> None:
if call_leg_id in self._active_legs:
self._active_legs[call_leg_id].setdefault("dtmf_sent", []).append(digits)
async def call_device(self, device: Device) -> str:
self._call_counter += 1
leg_id = f"mock_device_leg_{self._call_counter}"
self._active_legs[leg_id] = {
"device_id": device.id,
"device_name": device.name,
"state": "ringing",
}
return leg_id
async def bridge_calls(self, leg_a: str, leg_b: str) -> str:
bridge_id = f"bridge_{leg_a}_{leg_b}"
self._bridges[bridge_id] = (leg_a, leg_b)
return bridge_id
async def unbridge(self, bridge_id: str) -> None:
self._bridges.pop(bridge_id, None)
async def get_audio_stream(self, call_leg_id: str):
"""Yield empty audio frames for testing."""
import asyncio
for _ in range(10):
yield b"\x00" * 3200 # 100ms of silence at 16kHz 16-bit mono
await asyncio.sleep(0.1)
async def get_registered_devices(self) -> list[dict]:
return self._registered_devices
async def get_trunk_status(self) -> dict:
return {
"registered": False,
"host": None,
"transport": None,
"mock": True,
"reason": "No SIP trunk configured (mock mode)",
}

780
core/sippy_engine.py Normal file
View File

@@ -0,0 +1,780 @@
"""
Sippy Engine — SIP signaling via Sippy B2BUA.
Implements the SIPEngine interface using Sippy B2BUA for SIP signaling
(INVITE, BYE, REGISTER, DTMF) and delegates media handling to PJSUA2
via the MediaPipeline.
Architecture:
Sippy B2BUA → SIP signaling (call control, registration, DTMF)
PJSUA2 → Media anchor (conference bridge, audio tapping, recording)
Sippy B2BUA runs in its own thread (it has its own event loop).
We bridge async/sync via run_in_executor.
"""
import asyncio
import logging
import threading
import uuid
from typing import Any, Callable, Optional
from core.sip_engine import SIPEngine
from models.device import Device, DeviceType
logger = logging.getLogger(__name__)
# ================================================================
# Sippy B2BUA Wrapper Types
# ================================================================
class SipCallLeg:
"""Tracks a single SIP call leg managed by Sippy."""
def __init__(self, leg_id: str, direction: str, remote_uri: str):
self.leg_id = leg_id
self.direction = direction # "outbound" or "inbound"
self.remote_uri = remote_uri
self.state = "init" # init, trying, ringing, connected, terminated
self.sippy_ua = None # Sippy UA object reference
self.media_port: Optional[int] = None # PJSUA2 conf bridge port
self.dtmf_buffer: list[str] = []
def __repr__(self):
return f"<SipCallLeg {self.leg_id} {self.direction} {self.state}{self.remote_uri}>"
class SipBridge:
"""Two call legs bridged together."""
def __init__(self, bridge_id: str, leg_a: str, leg_b: str):
self.bridge_id = bridge_id
self.leg_a = leg_a
self.leg_b = leg_b
def __repr__(self):
return f"<SipBridge {self.bridge_id}: {self.leg_a}{self.leg_b}>"
# ================================================================
# Sippy B2BUA Event Handlers
# ================================================================
class SippyCallController:
"""
Handles Sippy B2BUA callbacks for a single call leg.
Sippy B2BUA uses a callback model — when SIP events happen
(180 Ringing, 200 OK, BYE, etc.), the corresponding method
is called on this controller.
"""
def __init__(self, leg: SipCallLeg, engine: "SippyEngine"):
self.leg = leg
self.engine = engine
def on_trying(self):
"""100 Trying received."""
self.leg.state = "trying"
logger.debug(f" {self.leg.leg_id}: 100 Trying")
def on_ringing(self, ringing_code: int = 180):
"""180 Ringing / 183 Session Progress received."""
self.leg.state = "ringing"
logger.info(f" {self.leg.leg_id}: {ringing_code} Ringing")
if self.engine._on_leg_state_change:
self.engine._loop.call_soon_threadsafe(
self.engine._on_leg_state_change, self.leg.leg_id, "ringing"
)
def on_connected(self, sdp_body: Optional[str] = None):
"""200 OK — call connected, media negotiated."""
self.leg.state = "connected"
logger.info(f" {self.leg.leg_id}: Connected")
# Extract remote RTP endpoint from SDP for PJSUA2 media bridge
if sdp_body and self.engine.media_pipeline:
try:
remote_rtp = self.engine._parse_sdp_rtp_endpoint(sdp_body)
if remote_rtp:
port = self.engine.media_pipeline.add_remote_stream(
self.leg.leg_id,
remote_rtp["host"],
remote_rtp["port"],
remote_rtp["codec"],
)
self.leg.media_port = port
except Exception as e:
logger.error(f" Failed to set up media for {self.leg.leg_id}: {e}")
if self.engine._on_leg_state_change:
self.engine._loop.call_soon_threadsafe(
self.engine._on_leg_state_change, self.leg.leg_id, "connected"
)
def on_disconnected(self, reason: str = ""):
"""BYE received or call terminated."""
self.leg.state = "terminated"
logger.info(f" {self.leg.leg_id}: Disconnected ({reason})")
# Clean up media
if self.engine.media_pipeline and self.leg.media_port is not None:
try:
self.engine.media_pipeline.remove_stream(self.leg.leg_id)
except Exception as e:
logger.error(f" Failed to clean up media for {self.leg.leg_id}: {e}")
if self.engine._on_leg_state_change:
self.engine._loop.call_soon_threadsafe(
self.engine._on_leg_state_change, self.leg.leg_id, "terminated"
)
def on_dtmf(self, digit: str):
"""DTMF digit received (RFC 2833 or SIP INFO)."""
self.leg.dtmf_buffer.append(digit)
logger.debug(f" {self.leg.leg_id}: DTMF '{digit}'")
# ================================================================
# Main Engine
# ================================================================
class SippyEngine(SIPEngine):
"""
SIP engine using Sippy B2BUA for signaling.
Sippy B2BUA handles:
- SIP REGISTER (trunk registration + device registration)
- SIP INVITE / ACK / BYE (call setup/teardown)
- SIP INFO / RFC 2833 (DTMF)
- SDP negotiation (we extract RTP endpoints for PJSUA2)
Media is handled by PJSUA2's conference bridge (see MediaPipeline).
Sippy only needs to know about SDP — PJSUA2 handles the actual RTP.
"""
def __init__(
self,
sip_address: str = "0.0.0.0",
sip_port: int = 5060,
trunk_host: str = "",
trunk_port: int = 5060,
trunk_username: str = "",
trunk_password: str = "",
trunk_transport: str = "udp",
domain: str = "gateway.local",
did: str = "",
media_pipeline=None, # MediaPipeline instance
on_leg_state_change: Optional[Callable] = None,
on_device_registered: Optional[Callable] = None,
):
# SIP config
self._sip_address = sip_address
self._sip_port = sip_port
self._trunk_host = trunk_host
self._trunk_port = trunk_port
self._trunk_username = trunk_username
self._trunk_password = trunk_password
self._trunk_transport = trunk_transport
self._domain = domain
self._did = did
# Media pipeline (PJSUA2)
self.media_pipeline = media_pipeline
# Callbacks for async state changes
self._on_leg_state_change = on_leg_state_change
self._on_device_registered = on_device_registered
self._loop: Optional[asyncio.AbstractEventLoop] = None
# State
self._ready = False
self._trunk_registered = False
self._legs: dict[str, SipCallLeg] = {}
self._bridges: dict[str, SipBridge] = {}
self._registered_devices: list[dict] = []
# Sippy B2BUA internals (set during start)
self._sippy_global_config: dict[str, Any] = {}
self._sippy_thread: Optional[threading.Thread] = None
# ================================================================
# Lifecycle
# ================================================================
async def start(self) -> None:
"""Start the Sippy B2BUA SIP stack."""
self._loop = asyncio.get_running_loop()
logger.info("🔌 Starting Sippy B2BUA SIP engine...")
try:
from sippy.SipConf import SipConf
from sippy.SipTransactionManager import SipTransactionManager
# Configure Sippy
SipConf.my_address = self._sip_address
SipConf.my_port = self._sip_port
SipConf.my_uaname = "Hold Slayer Gateway"
self._sippy_global_config = {
"_sip_address": self._sip_address,
"_sip_port": self._sip_port,
"_sip_tm": None, # Transaction manager set after start
}
# Start Sippy's SIP transaction manager in a background thread
# Sippy uses its own event loop (Twisted reactor or custom loop)
self._sippy_thread = threading.Thread(
target=self._run_sippy_loop,
name="sippy-b2bua",
daemon=True,
)
self._sippy_thread.start()
# Register with trunk
if self._trunk_host:
await self._register_trunk()
self._ready = True
logger.info(
f"🔌 Sippy B2BUA ready on {self._sip_address}:{self._sip_port}"
)
except ImportError:
logger.warning(
"⚠️ Sippy B2BUA not installed — falling back to mock mode. "
"Install with: pip install sippy"
)
self._ready = True
self._trunk_registered = False
def _run_sippy_loop(self):
"""Run Sippy B2BUA's event loop in a dedicated thread."""
try:
from sippy.SipTransactionManager import SipTransactionManager
from sippy.Timeout import Timeout
# Initialize Sippy's transaction manager
stm = SipTransactionManager(self._sippy_global_config, self._handle_sippy_request)
self._sippy_global_config["_sip_tm"] = stm
logger.info(" Sippy transaction manager started")
# Sippy will block here in its event loop
# For the Twisted-based version, this runs the reactor
# For the asyncore version, this runs asyncore.loop()
from sippy.Core.EventDispatcher import ED
ED.loop()
except Exception as e:
logger.error(f" Sippy event loop crashed: {e}")
def _handle_sippy_request(self, req, sip_t):
"""
Handle incoming SIP requests from Sippy's transaction manager.
This is called in Sippy's thread for incoming INVITEs, etc.
"""
method = req.getMethod()
logger.info(f" Incoming SIP {method}")
if method == "INVITE":
self._handle_incoming_invite(req, sip_t)
elif method == "REGISTER":
self._handle_incoming_register(req, sip_t)
elif method == "BYE":
self._handle_incoming_bye(req, sip_t)
elif method == "INFO":
self._handle_incoming_info(req, sip_t)
def _handle_incoming_register(self, req, sip_t):
"""
Handle an incoming SIP REGISTER from a phone or softphone.
Extracts the AOR (address of record) from the To header, records
the contact and expiry, and sends a 200 OK. The gateway's
register_device() is called asynchronously via the event loop so
the phone gets an extension and SIP URI assigned automatically.
"""
try:
to_uri = str(req.getHFBody("to").getUri())
contact_hf = req.getHFBody("contact")
contact_uri = str(contact_hf.getUri()) if contact_hf else to_uri
expires_hf = req.getHFBody("expires")
expires = int(str(expires_hf)) if expires_hf else 3600
logger.info(f" SIP REGISTER: {to_uri} contact={contact_uri} expires={expires}")
if expires == 0:
# De-registration
self._registered_devices = [
d for d in self._registered_devices
if d.get("aor") != to_uri
]
logger.info(f" De-registered: {to_uri}")
else:
# Update or add registration record
existing = next(
(d for d in self._registered_devices if d.get("aor") == to_uri),
None,
)
if existing:
existing["contact"] = contact_uri
existing["expires"] = expires
else:
self._registered_devices.append({
"aor": to_uri,
"contact": contact_uri,
"expires": expires,
})
# Notify the gateway (async) so it can assign an extension
if self._loop:
self._loop.call_soon_threadsafe(
self._loop.create_task,
self._notify_registration(to_uri, contact_uri, expires),
)
# Reply 200 OK
req.sendResponse(200, "OK")
except Exception as e:
logger.error(f" REGISTER handling failed: {e}")
try:
req.sendResponse(500, "Server Error")
except Exception:
pass
async def _notify_registration(self, aor: str, contact: str, expires: int):
"""
Async callback: tell the gateway about the newly registered device
so it can assign an extension if needed.
"""
if self._on_device_registered:
await self._on_device_registered(aor, contact, expires)
def _handle_incoming_invite(self, req, sip_t):
"""Handle an incoming INVITE — create inbound call leg."""
from_uri = str(req.getHFBody("from").getUri())
to_uri = str(req.getHFBody("to").getUri())
leg_id = f"leg_{uuid.uuid4().hex[:12]}"
leg = SipCallLeg(leg_id, "inbound", from_uri)
leg.sippy_ua = sip_t.ua if hasattr(sip_t, "ua") else None
self._legs[leg_id] = leg
logger.info(f" Incoming call: {from_uri}{to_uri} (leg: {leg_id})")
# Auto-answer for now (gateway always answers)
# In production, this would check routing rules
controller = SippyCallController(leg, self)
controller.on_connected(str(req.getBody()) if req.getBody() else None)
def _handle_incoming_bye(self, req, sip_t):
"""Handle incoming BYE — tear down call leg."""
# Find the leg by Sippy's UA object
for leg in self._legs.values():
if leg.sippy_ua and hasattr(sip_t, "ua") and leg.sippy_ua == sip_t.ua:
controller = SippyCallController(leg, self)
controller.on_disconnected("BYE received")
break
def _handle_incoming_info(self, req, sip_t):
"""Handle SIP INFO (DTMF via SIP INFO method)."""
body = str(req.getBody()) if req.getBody() else ""
if "dtmf" in body.lower() or "Signal=" in body:
# Extract DTMF digit from SIP INFO body
for line in body.split("\n"):
if line.startswith("Signal="):
digit = line.split("=")[1].strip()
for leg in self._legs.values():
if leg.sippy_ua and hasattr(sip_t, "ua") and leg.sippy_ua == sip_t.ua:
controller = SippyCallController(leg, self)
controller.on_dtmf(digit)
break
async def _register_trunk(self) -> None:
"""Register with the SIP trunk provider."""
try:
from sippy.UA import UA
from sippy.SipRegistrationAgent import SipRegistrationAgent
logger.info(f" Registering with trunk: {self._trunk_host}:{self._trunk_port}")
# Run registration in Sippy's thread
def do_register():
try:
reg_agent = SipRegistrationAgent(
self._sippy_global_config,
f"sip:{self._trunk_username}@{self._trunk_host}",
f"sip:{self._trunk_host}:{self._trunk_port}",
auth_name=self._trunk_username,
auth_password=self._trunk_password,
)
reg_agent.register()
self._trunk_registered = True
logger.info(" ✅ Trunk registration sent")
except Exception as e:
logger.error(f" ❌ Trunk registration failed: {e}")
self._trunk_registered = False
await asyncio.get_event_loop().run_in_executor(None, do_register)
except ImportError:
logger.warning(" Sippy registration agent not available")
self._trunk_registered = False
async def stop(self) -> None:
"""Gracefully shut down the SIP engine."""
logger.info("🔌 Stopping Sippy B2BUA...")
# Hang up all active legs
for leg_id in list(self._legs.keys()):
try:
await self.hangup(leg_id)
except Exception as e:
logger.error(f" Error hanging up {leg_id}: {e}")
# Stop Sippy's event loop
try:
from sippy.Core.EventDispatcher import ED
ED.breakLoop()
except Exception:
pass
if self._sippy_thread and self._sippy_thread.is_alive():
self._sippy_thread.join(timeout=5.0)
self._ready = False
self._trunk_registered = False
logger.info("🔌 Sippy B2BUA stopped")
async def is_ready(self) -> bool:
return self._ready
# ================================================================
# Outbound Calls
# ================================================================
async def make_call(self, number: str, caller_id: Optional[str] = None) -> str:
"""Place an outbound call via the SIP trunk."""
if not self._ready:
raise RuntimeError("SIP engine not ready")
leg_id = f"leg_{uuid.uuid4().hex[:12]}"
# Build SIP URI for the remote party via trunk
if self._trunk_host:
remote_uri = f"sip:{number}@{self._trunk_host}:{self._trunk_port}"
else:
remote_uri = f"sip:{number}@{self._domain}"
from_uri = f"sip:{caller_id or self._did}@{self._domain}"
leg = SipCallLeg(leg_id, "outbound", remote_uri)
self._legs[leg_id] = leg
logger.info(f"📞 Placing call: {from_uri}{remote_uri} (leg: {leg_id})")
# Place the call via Sippy
def do_invite():
try:
from sippy.UA import UA
from sippy.SipCallId import SipCallId
from sippy.CCEvents import CCEventTry
controller = SippyCallController(leg, self)
# Create Sippy UA for this call
ua = UA(
self._sippy_global_config,
event_cb=controller,
nh_address=(self._trunk_host, self._trunk_port),
)
leg.sippy_ua = ua
# Generate SDP for the call
sdp_body = self._generate_sdp(leg_id)
# Send INVITE
event = CCEventTry(
(SipCallId(), from_uri, remote_uri),
body=sdp_body,
)
ua.recvEvent(event)
leg.state = "trying"
logger.info(f" INVITE sent for {leg_id}")
except ImportError:
# Sippy not installed — simulate for development
logger.warning(f" Sippy not installed, simulating call for {leg_id}")
leg.state = "ringing"
except Exception as e:
logger.error(f" Failed to send INVITE for {leg_id}: {e}")
leg.state = "terminated"
await asyncio.get_event_loop().run_in_executor(None, do_invite)
return leg_id
async def hangup(self, call_leg_id: str) -> None:
"""Hang up a call leg."""
leg = self._legs.get(call_leg_id)
if not leg:
logger.warning(f" Cannot hangup: leg {call_leg_id} not found")
return
def do_bye():
try:
if leg.sippy_ua:
from sippy.CCEvents import CCEventDisconnect
leg.sippy_ua.recvEvent(CCEventDisconnect())
except Exception as e:
logger.error(f" Error sending BYE for {call_leg_id}: {e}")
finally:
leg.state = "terminated"
await asyncio.get_event_loop().run_in_executor(None, do_bye)
# Clean up media
if self.media_pipeline and leg.media_port is not None:
self.media_pipeline.remove_stream(call_leg_id)
# Remove from tracking
self._legs.pop(call_leg_id, None)
# Clean up any bridges this leg was part of
for bridge_id, bridge in list(self._bridges.items()):
if bridge.leg_a == call_leg_id or bridge.leg_b == call_leg_id:
self._bridges.pop(bridge_id, None)
async def send_dtmf(self, call_leg_id: str, digits: str) -> None:
"""Send DTMF tones on a call leg."""
leg = self._legs.get(call_leg_id)
if not leg:
raise ValueError(f"Call leg {call_leg_id} not found")
logger.info(f" 📱 Sending DTMF '{digits}' on {call_leg_id}")
def do_dtmf():
try:
if leg.sippy_ua:
# Send via RFC 2833 (in-band RTP event)
# Sippy handles this through the UA's DTMF sender
for digit in digits:
from sippy.CCEvents import CCEventInfo
body = f"Signal={digit}\r\nDuration=160\r\n"
leg.sippy_ua.recvEvent(CCEventInfo(body=body))
else:
logger.warning(f" No UA for {call_leg_id}, DTMF not sent")
except ImportError:
logger.warning(f" Sippy not installed, DTMF simulated: {digits}")
except Exception as e:
logger.error(f" DTMF send error: {e}")
await asyncio.get_event_loop().run_in_executor(None, do_dtmf)
# ================================================================
# Device Calls (for transfer)
# ================================================================
async def call_device(self, device: Device) -> str:
"""Place a call to a registered device."""
if device.type in (DeviceType.SIP_PHONE, DeviceType.SOFTPHONE, DeviceType.WEBRTC):
if not device.sip_uri:
raise ValueError(f"Device {device.id} has no SIP URI")
# Direct SIP call to device's registered contact
return await self._call_sip_device(device)
elif device.type == DeviceType.CELL:
if not device.phone_number:
raise ValueError(f"Device {device.id} has no phone number")
# Call cell phone via trunk
return await self.make_call(device.phone_number)
else:
raise ValueError(f"Unsupported device type: {device.type}")
async def _call_sip_device(self, device: Device) -> str:
"""Place a direct SIP call to a registered device."""
leg_id = f"leg_{uuid.uuid4().hex[:12]}"
leg = SipCallLeg(leg_id, "outbound", device.sip_uri)
self._legs[leg_id] = leg
logger.info(f"📱 Calling device: {device.name} ({device.sip_uri}) (leg: {leg_id})")
def do_invite_device():
try:
from sippy.UA import UA
from sippy.CCEvents import CCEventTry
from sippy.SipCallId import SipCallId
controller = SippyCallController(leg, self)
# Parse device SIP URI for routing
# sip:robert@192.168.1.100:5060
uri_parts = device.sip_uri.replace("sip:", "").split("@")
if len(uri_parts) == 2:
host_parts = uri_parts[1].split(":")
host = host_parts[0]
port = int(host_parts[1]) if len(host_parts) > 1 else 5060
else:
host = self._domain
port = 5060
ua = UA(
self._sippy_global_config,
event_cb=controller,
nh_address=(host, port),
)
leg.sippy_ua = ua
sdp_body = self._generate_sdp(leg_id)
event = CCEventTry(
(SipCallId(), f"sip:gateway@{self._domain}", device.sip_uri),
body=sdp_body,
)
ua.recvEvent(event)
leg.state = "trying"
except ImportError:
logger.warning(f" Sippy not installed, simulating device call for {leg_id}")
leg.state = "ringing"
except Exception as e:
logger.error(f" Failed to call device {device.name}: {e}")
leg.state = "terminated"
await asyncio.get_event_loop().run_in_executor(None, do_invite_device)
return leg_id
# ================================================================
# Conference Bridge / Media
# ================================================================
async def bridge_calls(self, leg_a: str, leg_b: str) -> str:
"""Bridge two call legs together via PJSUA2 conference bridge."""
bridge_id = f"bridge_{uuid.uuid4().hex[:8]}"
leg_a_obj = self._legs.get(leg_a)
leg_b_obj = self._legs.get(leg_b)
if not leg_a_obj or not leg_b_obj:
raise ValueError(f"One or both legs not found: {leg_a}, {leg_b}")
logger.info(f"🔗 Bridging {leg_a}{leg_b} (bridge: {bridge_id})")
if self.media_pipeline:
# Use PJSUA2 conference bridge for actual media bridging
self.media_pipeline.bridge_streams(leg_a, leg_b)
else:
logger.warning(" No media pipeline — bridge is signaling-only")
self._bridges[bridge_id] = SipBridge(bridge_id, leg_a, leg_b)
return bridge_id
async def unbridge(self, bridge_id: str) -> None:
"""Remove a bridge."""
bridge = self._bridges.pop(bridge_id, None)
if bridge and self.media_pipeline:
self.media_pipeline.unbridge_streams(bridge.leg_a, bridge.leg_b)
def get_audio_stream(self, call_leg_id: str):
"""
Get a real-time audio stream from a call leg.
Taps into PJSUA2's conference bridge to get audio frames
for classification and transcription.
"""
if self.media_pipeline:
return self.media_pipeline.get_audio_tap(call_leg_id)
else:
# Fallback: yield silence frames
return self._silence_stream()
async def _silence_stream(self):
"""Yield silence frames when no media pipeline is available."""
for _ in range(100):
yield b"\x00" * 3200 # 100ms of silence at 16kHz 16-bit mono
await asyncio.sleep(0.1)
# ================================================================
# Registration
# ================================================================
async def get_registered_devices(self) -> list[dict]:
"""Get list of currently registered SIP devices."""
return list(self._registered_devices)
# ================================================================
# Trunk Status
# ================================================================
async def get_trunk_status(self) -> dict:
"""Get SIP trunk registration status."""
return {
"registered": self._trunk_registered,
"host": self._trunk_host or "not configured",
"port": self._trunk_port,
"transport": self._trunk_transport,
"username": self._trunk_username,
"active_legs": len(self._legs),
"active_bridges": len(self._bridges),
}
# ================================================================
# SDP Helpers
# ================================================================
def _generate_sdp(self, leg_id: str) -> str:
"""
Generate SDP body for a call.
If MediaPipeline is available, get the actual RTP listen address
from PJSUA2. Otherwise, generate a basic SDP.
"""
if self.media_pipeline:
rtp_port = self.media_pipeline.allocate_rtp_port(leg_id)
rtp_host = self._sip_address if self._sip_address != "0.0.0.0" else "127.0.0.1"
else:
rtp_port = 10000 + (hash(leg_id) % 50000)
rtp_host = self._sip_address if self._sip_address != "0.0.0.0" else "127.0.0.1"
return (
f"v=0\r\n"
f"o=holdslayer 0 0 IN IP4 {rtp_host}\r\n"
f"s=Hold Slayer Gateway\r\n"
f"c=IN IP4 {rtp_host}\r\n"
f"t=0 0\r\n"
f"m=audio {rtp_port} RTP/AVP 0 8 101\r\n"
f"a=rtpmap:0 PCMU/8000\r\n"
f"a=rtpmap:8 PCMA/8000\r\n"
f"a=rtpmap:101 telephone-event/8000\r\n"
f"a=fmtp:101 0-16\r\n"
f"a=sendrecv\r\n"
)
@staticmethod
def _parse_sdp_rtp_endpoint(sdp: str) -> Optional[dict]:
"""Extract RTP host/port/codec from SDP body."""
host = None
port = None
codec = "PCMU"
for line in sdp.split("\n"):
line = line.strip()
if line.startswith("c=IN IP4 "):
host = line.split(" ")[-1]
elif line.startswith("m=audio "):
parts = line.split(" ")
if len(parts) >= 2:
port = int(parts[1])
# First codec in the list
if len(parts) >= 4:
payload_type = parts[3]
codec_map = {"0": "PCMU", "8": "PCMA", "18": "G729"}
codec = codec_map.get(payload_type, "PCMU")
if host and port:
return {"host": host, "port": port, "codec": codec}
return None