feat: add initial Hold Slayer AI telephony gateway implementation

Complete project scaffolding and core implementation of an AI-powered telephony system that calls companies, navigates IVR menus, waits on hold, and transfers to the user when a human answers. Key components: - FastAPI server with REST API, WebSocket, and MCP (SSE) interfaces - SIP/VoIP call management via PJSUA2 with RTP audio streaming - LLM-powered IVR navigation using OpenAI/Anthropic with tool calling - Hold detection service combining audio analysis and silence detection - Real-time STT (Whisper/Deepgram) and TTS (OpenAI/Piper) pipelines - Call recording with per-channel and mixed audio capture - Event bus (asyncio pub/sub) for real-time client updates - Web dashboard with live call monitoring - SQLite persistence via SQLAlchemy with call history and analytics - Notification support (email, SMS, webhook, desktop) - Docker Compose deployment with Opal VoIP and Opal Media containers - Comprehensive test suite with unit, integration, and E2E tests - Simplified .gitignore and full project documentation in README
2026-03-21 19:23:26 +00:00
parent c9ff60702b
commit ecf37658ce
56 changed files with 11601 additions and 164 deletions
--- a/models/call.py
+++ b/models/call.py
@@ -0,0 +1,169 @@
+"""
+Call models — Active call state, requests, and responses.
+"""
+
+from datetime import datetime
+from enum import Enum
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class CallStatus(str, Enum):
+    """Call lifecycle states."""
+
+    INITIATING = "initiating"
+    RINGING = "ringing"
+    CONNECTED = "connected"
+    NAVIGATING_IVR = "navigating_ivr"
+    ON_HOLD = "on_hold"
+    HUMAN_DETECTED = "human_detected"
+    TRANSFERRING = "transferring"
+    BRIDGED = "bridged"  # User is connected to the remote party
+    COMPLETED = "completed"
+    FAILED = "failed"
+    CANCELLED = "cancelled"
+
+
+class CallMode(str, Enum):
+    """How the call should be handled."""
+
+    DIRECT = "direct"  # Call and connect immediately
+    HOLD_SLAYER = "hold_slayer"  # Navigate IVR, wait on hold, transfer when human
+    AI_ASSISTED = "ai_assisted"  # Connect with transcription, recording, noise cancel
+
+
+class AudioClassification(str, Enum):
+    """What kind of audio is currently playing."""
+
+    SILENCE = "silence"
+    MUSIC = "music"  # Hold music
+    IVR_PROMPT = "ivr_prompt"  # Automated voice (TTS/recording)
+    LIVE_HUMAN = "live_human"  # Real person talking
+    RINGING = "ringing"  # Ring-back tone
+    DTMF = "dtmf"  # Touch tones
+    UNKNOWN = "unknown"
+
+
+class ClassificationResult(BaseModel):
+    """A single audio classification at a point in time."""
+
+    timestamp: float  # Unix timestamp
+    audio_type: AudioClassification
+    confidence: float  # 0.0 - 1.0
+    details: Optional[dict] = None  # Extra analysis data
+
+
+class ActiveCall(BaseModel):
+    """In-memory state for an active call."""
+
+    id: str
+    direction: str = "outbound"
+    remote_number: str
+    status: CallStatus = CallStatus.INITIATING
+    mode: CallMode = CallMode.DIRECT
+    intent: Optional[str] = None
+    call_flow_id: Optional[str] = None
+    device: Optional[str] = None
+    started_at: datetime = Field(default_factory=datetime.now)
+    connected_at: Optional[datetime] = None
+    hold_started_at: Optional[datetime] = None
+    current_classification: AudioClassification = AudioClassification.UNKNOWN
+    classification_history: list[ClassificationResult] = Field(default_factory=list)
+    transcript_chunks: list[str] = Field(default_factory=list)
+    current_step_id: Optional[str] = None  # Current position in call flow
+    services: list[str] = Field(default_factory=list)  # Active services on this call
+
+    @property
+    def duration(self) -> int:
+        """Total call duration in seconds."""
+        if self.connected_at:
+            return int((datetime.now() - self.connected_at).total_seconds())
+        return 0
+
+    @property
+    def hold_time(self) -> int:
+        """Time spent on hold in seconds."""
+        if self.hold_started_at and self.status == CallStatus.ON_HOLD:
+            return int((datetime.now() - self.hold_started_at).total_seconds())
+        return 0
+
+    @property
+    def transcript(self) -> str:
+        """Full transcript so far."""
+        return "\n".join(self.transcript_chunks)
+
+    def summary(self) -> dict:
+        """Compact summary for list views."""
+        return {
+            "call_id": self.id,
+            "remote_number": self.remote_number,
+            "status": self.status.value,
+            "mode": self.mode.value,
+            "duration": self.duration,
+            "hold_time": self.hold_time,
+            "audio_type": self.current_classification.value,
+            "intent": self.intent,
+        }
+
+
+# ============================================================
+# API Request/Response Models
+# ============================================================
+
+
+class CallRequest(BaseModel):
+    """Request to place an outbound call."""
+
+    number: str  # E.164 format
+    mode: CallMode = CallMode.DIRECT
+    intent: Optional[str] = None  # What you need (for hold_slayer IVR navigation)
+    device: Optional[str] = None  # Target device to ring / transfer to
+    call_flow_id: Optional[str] = None  # Use a stored IVR tree
+    services: list[str] = Field(
+        default_factory=lambda: ["recording", "transcription"]
+    )
+
+
+class HoldSlayerRequest(BaseModel):
+    """Request to launch the Hold Slayer."""
+
+    number: str  # E.164 format
+    intent: str  # "dispute a charge on my December statement"
+    call_flow_id: Optional[str] = None  # Optional: use stored IVR tree
+    transfer_to: Optional[str] = None  # Device to ring when human detected
+    notify: list[str] = Field(default_factory=lambda: ["push"])  # Notification channels
+
+
+class CallResponse(BaseModel):
+    """Response after initiating a call."""
+
+    call_id: str
+    status: str
+    number: str
+    mode: str
+    message: Optional[str] = None
+
+
+class CallStatusResponse(BaseModel):
+    """Full status of an active or completed call."""
+
+    call_id: str
+    status: str
+    direction: str
+    remote_number: str
+    mode: str
+    duration: int
+    hold_time: int
+    audio_type: str
+    intent: Optional[str] = None
+    transcript_excerpt: Optional[str] = None  # Last N chars
+    classification_history: list[ClassificationResult] = Field(default_factory=list)
+    current_step: Optional[str] = None
+    services: list[str] = Field(default_factory=list)
+
+
+class TransferRequest(BaseModel):
+    """Request to transfer a call to a device."""
+
+    device: str  # Device ID or type