feat: add initial Hold Slayer AI telephony gateway implementation

Complete project scaffolding and core implementation of an AI-powered telephony system that calls companies, navigates IVR menus, waits on hold, and transfers to the user when a human answers. Key components: - FastAPI server with REST API, WebSocket, and MCP (SSE) interfaces - SIP/VoIP call management via PJSUA2 with RTP audio streaming - LLM-powered IVR navigation using OpenAI/Anthropic with tool calling - Hold detection service combining audio analysis and silence detection - Real-time STT (Whisper/Deepgram) and TTS (OpenAI/Piper) pipelines - Call recording with per-channel and mixed audio capture - Event bus (asyncio pub/sub) for real-time client updates - Web dashboard with live call monitoring - SQLite persistence via SQLAlchemy with call history and analytics - Notification support (email, SMS, webhook, desktop) - Docker Compose deployment with Opal VoIP and Opal Media containers - Comprehensive test suite with unit, integration, and E2E tests - Simplified .gitignore and full project documentation in README
2026-03-21 19:23:26 +00:00
parent c9ff60702b
commit ecf37658ce
56 changed files with 11601 additions and 164 deletions
--- a/services/init.py
+++ b/services/init.py
@@ -0,0 +1 @@
+"""AI services — hold detection, transcription, classification, and more."""
--- a/services/audio_classifier.py
+++ b/services/audio_classifier.py
@@ -0,0 +1,444 @@
+"""
+Audio Classifier — Spectral analysis for hold music, speech, and silence detection.
+
+This is the brain of the Hold Slayer. It analyzes audio in real-time to determine:
+- Is this hold music?
+- Is this an IVR prompt (automated voice)?
+- Is this a live human?
+- Is this silence?
+- Is this a ring-back tone?
+
+Uses spectral analysis (librosa/numpy) to classify audio without needing
+a trained ML model — just signal processing and heuristics.
+"""
+
+import logging
+import time
+from typing import Optional
+
+import numpy as np
+
+from config import ClassifierSettings
+from models.call import AudioClassification, ClassificationResult
+
+logger = logging.getLogger(__name__)
+
+# Audio constants
+SAMPLE_RATE = 16000  # 16kHz mono
+FRAME_SIZE = SAMPLE_RATE * 2  # 16-bit samples = 2 bytes per sample
+
+
+class AudioClassifier:
+    """
+    Real-time audio classifier using spectral analysis.
+
+    Classification strategy:
+    - Silence: Low RMS energy
+    - Music: High spectral flatness + sustained tonal content + rhythm
+    - IVR prompt: Speech-like spectral envelope but repetitive/synthetic
+    - Live human: Speech-like spectral envelope + natural variation
+    - Ringing: Very tonal, specific frequencies (~440Hz, ~480Hz for NA ring)
+    - DTMF: Dual-tone detection at known DTMF frequencies
+    """
+
+    def __init__(self, settings: ClassifierSettings):
+        self.settings = settings
+        self._window_buffer: list[bytes] = []
+        self._window_samples = int(settings.window_seconds * SAMPLE_RATE)
+        self._classification_history: list[AudioClassification] = []
+
+    def classify_chunk(self, audio_data: bytes) -> ClassificationResult:
+        """
+        Classify a chunk of audio data.
+
+        Args:
+            audio_data: Raw PCM audio (16-bit signed, 16kHz, mono)
+
+        Returns:
+            ClassificationResult with type and confidence
+        """
+        # Convert bytes to numpy array
+        samples = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32)
+
+        if len(samples) == 0:
+            return ClassificationResult(
+                timestamp=time.time(),
+                audio_type=AudioClassification.SILENCE,
+                confidence=1.0,
+            )
+
+        # Normalize to [-1.0, 1.0]
+        samples = samples / 32768.0
+
+        # Run all detectors
+        rms = self._compute_rms(samples)
+        spectral_flatness = self._compute_spectral_flatness(samples)
+        zcr = self._compute_zero_crossing_rate(samples)
+        dominant_freq = self._compute_dominant_frequency(samples)
+        spectral_centroid = self._compute_spectral_centroid(samples)
+        is_tonal = self._detect_tonality(samples)
+
+        # Build feature dict for debugging
+        features = {
+            "rms": float(rms),
+            "spectral_flatness": float(spectral_flatness),
+            "zcr": float(zcr),
+            "dominant_freq": float(dominant_freq),
+            "spectral_centroid": float(spectral_centroid),
+            "is_tonal": is_tonal,
+        }
+
+        # === Classification Logic ===
+
+        # 1. Silence detection
+        if rms < 0.01:
+            return ClassificationResult(
+                timestamp=time.time(),
+                audio_type=AudioClassification.SILENCE,
+                confidence=min(1.0, (0.01 - rms) / 0.01 + 0.5),
+                details=features,
+            )
+
+        # 2. DTMF detection (very specific dual-tone pattern)
+        dtmf_result = self._detect_dtmf(samples)
+        if dtmf_result:
+            return ClassificationResult(
+                timestamp=time.time(),
+                audio_type=AudioClassification.DTMF,
+                confidence=0.95,
+                details={**features, "dtmf_digit": dtmf_result},
+            )
+
+        # 3. Ring-back tone detection (440+480Hz in NA, periodic on/off)
+        if is_tonal and 400 < dominant_freq < 520 and rms > 0.02:
+            return ClassificationResult(
+                timestamp=time.time(),
+                audio_type=AudioClassification.RINGING,
+                confidence=0.8,
+                details=features,
+            )
+
+        # 4. Music vs Speech discrimination
+        #    Music: higher spectral flatness, more tonal, wider spectral spread
+        #    Speech: lower spectral flatness, concentrated energy, variable ZCR
+        music_score = self._compute_music_score(
+            spectral_flatness, is_tonal, spectral_centroid, zcr, rms
+        )
+        speech_score = self._compute_speech_score(
+            spectral_flatness, zcr, spectral_centroid, rms
+        )
+
+        # 5. If it's speech-like, is it live or automated?
+        if speech_score > music_score:
+            # Use history to distinguish live human from IVR
+            # IVR: repetitive patterns, synthetic prosody
+            # Human: natural variation, conversational rhythm
+            if self._looks_like_live_human(speech_score, zcr, rms):
+                return ClassificationResult(
+                    timestamp=time.time(),
+                    audio_type=AudioClassification.LIVE_HUMAN,
+                    confidence=speech_score,
+                    details=features,
+                )
+            else:
+                return ClassificationResult(
+                    timestamp=time.time(),
+                    audio_type=AudioClassification.IVR_PROMPT,
+                    confidence=speech_score * 0.8,
+                    details=features,
+                )
+
+        # 6. Music (hold music)
+        if music_score >= self.settings.music_threshold:
+            return ClassificationResult(
+                timestamp=time.time(),
+                audio_type=AudioClassification.MUSIC,
+                confidence=music_score,
+                details=features,
+            )
+
+        # 7. Unknown / low confidence
+        return ClassificationResult(
+            timestamp=time.time(),
+            audio_type=AudioClassification.UNKNOWN,
+            confidence=max(music_score, speech_score),
+            details=features,
+        )
+
+    # ================================================================
+    # Feature Extraction
+    # ================================================================
+
+    @staticmethod
+    def _compute_rms(samples: np.ndarray) -> float:
+        """Root Mean Square — overall energy level."""
+        return float(np.sqrt(np.mean(samples ** 2)))
+
+    @staticmethod
+    def _compute_spectral_flatness(samples: np.ndarray) -> float:
+        """
+        Spectral flatness (Wiener entropy).
+        
+        Close to 1.0 = noise-like (white noise)
+        Close to 0.0 = tonal (pure tone, music)
+        Speech is typically 0.1-0.4, music 0.05-0.3
+        """
+        fft = np.abs(np.fft.rfft(samples))
+        fft = fft[fft > 0]  # Avoid log(0)
+
+        if len(fft) == 0:
+            return 0.0
+
+        geometric_mean = np.exp(np.mean(np.log(fft + 1e-10)))
+        arithmetic_mean = np.mean(fft)
+
+        if arithmetic_mean == 0:
+            return 0.0
+
+        return float(geometric_mean / arithmetic_mean)
+
+    @staticmethod
+    def _compute_zero_crossing_rate(samples: np.ndarray) -> float:
+        """
+        Zero-crossing rate — how often the signal crosses zero.
+        
+        Higher for unvoiced speech and noise.
+        Lower for voiced speech and tonal music.
+        """
+        crossings = np.sum(np.abs(np.diff(np.sign(samples)))) / 2
+        return float(crossings / len(samples))
+
+    @staticmethod
+    def _compute_dominant_frequency(samples: np.ndarray) -> float:
+        """Find the dominant frequency in the signal."""
+        fft = np.abs(np.fft.rfft(samples))
+        freqs = np.fft.rfftfreq(len(samples), 1.0 / SAMPLE_RATE)
+
+        # Ignore DC and very low frequencies
+        mask = freqs > 50
+        if not np.any(mask):
+            return 0.0
+
+        fft_masked = fft[mask]
+        freqs_masked = freqs[mask]
+
+        return float(freqs_masked[np.argmax(fft_masked)])
+
+    @staticmethod
+    def _compute_spectral_centroid(samples: np.ndarray) -> float:
+        """
+        Spectral centroid — "center of mass" of the spectrum.
+        
+        Higher for bright/treble sounds, lower for bass-heavy sounds.
+        Speech typically 500-4000Hz, music varies widely.
+        """
+        fft = np.abs(np.fft.rfft(samples))
+        freqs = np.fft.rfftfreq(len(samples), 1.0 / SAMPLE_RATE)
+
+        total_energy = np.sum(fft)
+        if total_energy == 0:
+            return 0.0
+
+        return float(np.sum(freqs * fft) / total_energy)
+
+    @staticmethod
+    def _detect_tonality(samples: np.ndarray) -> bool:
+        """
+        Check if the signal is strongly tonal (has clear pitch).
+        Uses autocorrelation.
+        """
+        # Autocorrelation
+        correlation = np.correlate(samples, samples, mode="full")
+        correlation = correlation[len(correlation) // 2:]
+
+        # Normalize
+        if correlation[0] == 0:
+            return False
+        correlation = correlation / correlation[0]
+
+        # Look for a strong peak (indicating periodicity)
+        # Skip the first ~50 samples (very high frequencies)
+        min_lag = int(SAMPLE_RATE / 1000)  # ~16 samples (1000Hz max)
+        max_lag = int(SAMPLE_RATE / 50)  # ~320 samples (50Hz min)
+
+        search_region = correlation[min_lag:max_lag]
+        if len(search_region) == 0:
+            return False
+
+        peak_value = np.max(search_region)
+        return bool(peak_value > 0.5)
+
+    def _detect_dtmf(self, samples: np.ndarray) -> Optional[str]:
+        """
+        Detect DTMF tones using Goertzel algorithm (simplified).
+        
+        DTMF frequencies:
+            697, 770, 852, 941 Hz (row)
+            1209, 1336, 1477, 1633 Hz (column)
+        """
+        dtmf_freqs_low = [697, 770, 852, 941]
+        dtmf_freqs_high = [1209, 1336, 1477, 1633]
+        dtmf_map = {
+            (697, 1209): "1", (697, 1336): "2", (697, 1477): "3", (697, 1633): "A",
+            (770, 1209): "4", (770, 1336): "5", (770, 1477): "6", (770, 1633): "B",
+            (852, 1209): "7", (852, 1336): "8", (852, 1477): "9", (852, 1633): "C",
+            (941, 1209): "*", (941, 1336): "0", (941, 1477): "#", (941, 1633): "D",
+        }
+
+        # Compute power at each DTMF frequency
+        def goertzel_power(freq: int) -> float:
+            k = int(0.5 + len(samples) * freq / SAMPLE_RATE)
+            w = 2 * np.pi * k / len(samples)
+            coeff = 2 * np.cos(w)
+            s0, s1, s2 = 0.0, 0.0, 0.0
+            for sample in samples:
+                s0 = sample + coeff * s1 - s2
+                s2 = s1
+                s1 = s0
+            return float(s1 * s1 + s2 * s2 - coeff * s1 * s2)
+
+        # Find strongest low and high frequencies
+        low_powers = [(f, goertzel_power(f)) for f in dtmf_freqs_low]
+        high_powers = [(f, goertzel_power(f)) for f in dtmf_freqs_high]
+
+        best_low = max(low_powers, key=lambda x: x[1])
+        best_high = max(high_powers, key=lambda x: x[1])
+
+        # Threshold: both frequencies must be significantly present
+        total_power = np.sum(samples ** 2)
+        if total_power == 0:
+            return None
+
+        threshold = total_power * 0.1
+        if best_low[1] > threshold and best_high[1] > threshold:
+            key = (best_low[0], best_high[0])
+            return dtmf_map.get(key)
+
+        return None
+
+    # ================================================================
+    # Higher-Level Classification
+    # ================================================================
+
+    def _compute_music_score(
+        self,
+        spectral_flatness: float,
+        is_tonal: bool,
+        spectral_centroid: float,
+        zcr: float,
+        rms: float,
+    ) -> float:
+        """Compute a music likelihood score (0.0 - 1.0)."""
+        score = 0.0
+
+        # Music tends to be tonal
+        if is_tonal:
+            score += 0.3
+
+        # Music has moderate spectral flatness (more than pure tone, less than noise)
+        if 0.05 < spectral_flatness < 0.4:
+            score += 0.2
+
+        # Music has sustained energy
+        if rms > 0.03:
+            score += 0.15
+
+        # Music has wider spectral content than speech
+        if spectral_centroid > 1500:
+            score += 0.15
+
+        # Music tends to have lower ZCR than noise
+        if zcr < 0.15:
+            score += 0.2
+
+        return min(1.0, score)
+
+    def _compute_speech_score(
+        self,
+        spectral_flatness: float,
+        zcr: float,
+        spectral_centroid: float,
+        rms: float,
+    ) -> float:
+        """Compute a speech likelihood score (0.0 - 1.0)."""
+        score = 0.0
+
+        # Speech has moderate spectral flatness
+        if 0.1 < spectral_flatness < 0.5:
+            score += 0.25
+
+        # Speech centroid typically 500-4000 Hz
+        if 500 < spectral_centroid < 4000:
+            score += 0.25
+
+        # Speech has moderate ZCR
+        if 0.02 < zcr < 0.2:
+            score += 0.25
+
+        # Speech has moderate energy
+        if 0.01 < rms < 0.5:
+            score += 0.25
+
+        return min(1.0, score)
+
+    def _looks_like_live_human(
+        self,
+        speech_score: float,
+        zcr: float,
+        rms: float,
+    ) -> bool:
+        """
+        Distinguish live human from IVR/TTS.
+
+        Heuristics:
+        - IVR prompts are followed by silence (waiting for input)
+        - Live humans have more natural variation in energy and pitch
+        - After hold music → speech transition, it's likely a human
+
+        This is the hardest classification and benefits most from
+        the transcript context (Speaches STT).
+        """
+        # Look at recent classification history
+        recent = self._classification_history[-10:] if self._classification_history else []
+
+        # Key signal: if we were just listening to hold music and now
+        # hear speech, it's very likely a live human agent
+        if recent:
+            recent_types = [c for c in recent]
+            if AudioClassification.MUSIC in recent_types[-5:]:
+                # Transition from music to speech = agent picked up!
+                return True
+
+        # High speech score with good energy = more likely human
+        if speech_score > 0.7 and rms > 0.05:
+            return True
+
+        # Default: assume IVR until proven otherwise
+        return False
+
+    def update_history(self, classification: AudioClassification) -> None:
+        """Track classification history for pattern detection."""
+        self._classification_history.append(classification)
+        # Keep last 100 classifications
+        if len(self._classification_history) > 100:
+            self._classification_history = self._classification_history[-100:]
+
+    def detect_hold_to_human_transition(self) -> bool:
+        """
+        Detect the critical moment: hold music → live human.
+
+        Looks for pattern: MUSIC, MUSIC, MUSIC, ..., SPEECH/LIVE_HUMAN
+        """
+        recent = self._classification_history[-20:]
+        if len(recent) < 5:
+            return False
+
+        # Count recent music vs speech
+        music_count = sum(1 for c in recent[:-3] if c == AudioClassification.MUSIC)
+        speech_count = sum(
+            1 for c in recent[-3:]
+            if c in (AudioClassification.LIVE_HUMAN, AudioClassification.IVR_PROMPT)
+        )
+
+        # If we had a lot of music and now have speech, someone picked up
+        return music_count >= 3 and speech_count >= 2
--- a/services/call_analytics.py
+++ b/services/call_analytics.py
@@ -0,0 +1,324 @@
+"""
+Call Analytics Service — Tracks call metrics and generates insights.
+
+Monitors call patterns, hold times, success rates, and IVR navigation
+efficiency. Provides data for the dashboard and API.
+"""
+
+import logging
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Any, Optional
+
+from models.call import ActiveCall, AudioClassification, CallMode, CallStatus
+
+logger = logging.getLogger(__name__)
+
+
+class CallAnalytics:
+    """
+    In-memory call analytics engine.
+
+    Tracks:
+    - Call success/failure rates
+    - Hold time statistics (avg, min, max, p95)
+    - IVR navigation efficiency
+    - Human detection accuracy
+    - Per-number/company patterns
+    - Time-of-day patterns
+
+    In production, this would be backed by TimescaleDB or similar.
+    For now, we keep rolling windows in memory.
+    """
+
+    def __init__(self, max_history: int = 10000):
+        self._max_history = max_history
+        self._call_records: list[CallRecord] = []
+        self._company_stats: dict[str, CompanyStats] = defaultdict(CompanyStats)
+
+    # ================================================================
+    # Record Calls
+    # ================================================================
+
+    def record_call(self, call: ActiveCall) -> None:
+        """
+        Record a completed call for analytics.
+
+        Called when a call ends (from CallManager).
+        """
+        record = CallRecord(
+            call_id=call.id,
+            remote_number=call.remote_number,
+            mode=call.mode,
+            status=call.status,
+            intent=call.intent,
+            started_at=call.created_at,
+            duration_seconds=call.duration,
+            hold_time_seconds=call.hold_time,
+            classification_history=[
+                r.audio_type.value for r in call.classification_history
+            ],
+            transcript_chunks=list(call.transcript_chunks),
+            services=list(call.services),
+        )
+
+        self._call_records.append(record)
+
+        # Trim history
+        if len(self._call_records) > self._max_history:
+            self._call_records = self._call_records[-self._max_history :]
+
+        # Update company stats
+        company_key = self._normalize_number(call.remote_number)
+        self._company_stats[company_key].update(record)
+
+        logger.debug(
+            f"📊 Recorded call {call.id}: "
+            f"{call.status.value}, {call.duration}s, hold={call.hold_time}s"
+        )
+
+    # ================================================================
+    # Aggregate Stats
+    # ================================================================
+
+    def get_summary(self, hours: int = 24) -> dict[str, Any]:
+        """Get summary statistics for the last N hours."""
+        cutoff = datetime.now() - timedelta(hours=hours)
+        recent = [r for r in self._call_records if r.started_at >= cutoff]
+
+        if not recent:
+            return {
+                "period_hours": hours,
+                "total_calls": 0,
+                "success_rate": 0.0,
+                "avg_hold_time": 0.0,
+                "avg_duration": 0.0,
+            }
+
+        total = len(recent)
+        successful = sum(1 for r in recent if r.status in (
+            CallStatus.COMPLETED, CallStatus.BRIDGED, CallStatus.HUMAN_DETECTED
+        ))
+        failed = sum(1 for r in recent if r.status == CallStatus.FAILED)
+
+        hold_times = [r.hold_time_seconds for r in recent if r.hold_time_seconds > 0]
+        durations = [r.duration_seconds for r in recent if r.duration_seconds > 0]
+
+        hold_slayer_calls = [r for r in recent if r.mode == CallMode.HOLD_SLAYER]
+        hold_slayer_success = sum(
+            1 for r in hold_slayer_calls
+            if r.status in (CallStatus.BRIDGED, CallStatus.HUMAN_DETECTED)
+        )
+
+        return {
+            "period_hours": hours,
+            "total_calls": total,
+            "successful": successful,
+            "failed": failed,
+            "success_rate": round(successful / total, 3) if total else 0.0,
+            "avg_duration": round(sum(durations) / len(durations), 1) if durations else 0.0,
+            "max_duration": max(durations) if durations else 0,
+            "hold_time": {
+                "avg": round(sum(hold_times) / len(hold_times), 1) if hold_times else 0.0,
+                "min": min(hold_times) if hold_times else 0,
+                "max": max(hold_times) if hold_times else 0,
+                "p95": self._percentile(hold_times, 95) if hold_times else 0,
+                "total": sum(hold_times),
+            },
+            "hold_slayer": {
+                "total": len(hold_slayer_calls),
+                "success": hold_slayer_success,
+                "success_rate": round(
+                    hold_slayer_success / len(hold_slayer_calls), 3
+                ) if hold_slayer_calls else 0.0,
+            },
+            "by_mode": self._group_by_mode(recent),
+            "by_hour": self._group_by_hour(recent),
+        }
+
+    def get_company_stats(self, number: str) -> dict[str, Any]:
+        """Get stats for a specific company/number."""
+        key = self._normalize_number(number)
+        stats = self._company_stats.get(key)
+        if not stats:
+            return {"number": number, "total_calls": 0}
+        return stats.to_dict(number)
+
+    def get_top_numbers(self, limit: int = 10) -> list[dict[str, Any]]:
+        """Get the most-called numbers with their stats."""
+        sorted_stats = sorted(
+            self._company_stats.items(),
+            key=lambda x: x[1].total_calls,
+            reverse=True,
+        )[:limit]
+        return [stats.to_dict(number) for number, stats in sorted_stats]
+
+    # ================================================================
+    # Hold Time Trends
+    # ================================================================
+
+    def get_hold_time_trend(
+        self,
+        number: Optional[str] = None,
+        days: int = 7,
+    ) -> list[dict]:
+        """
+        Get hold time trend data for graphing.
+
+        Returns daily average hold times for the last N days.
+        """
+        cutoff = datetime.now() - timedelta(days=days)
+        records = [r for r in self._call_records if r.started_at >= cutoff]
+
+        if number:
+            key = self._normalize_number(number)
+            records = [r for r in records if self._normalize_number(r.remote_number) == key]
+
+        # Group by day
+        by_day: dict[str, list[int]] = defaultdict(list)
+        for r in records:
+            day = r.started_at.strftime("%Y-%m-%d")
+            if r.hold_time_seconds > 0:
+                by_day[day].append(r.hold_time_seconds)
+
+        trend = []
+        for i in range(days):
+            date = (datetime.now() - timedelta(days=days - 1 - i)).strftime("%Y-%m-%d")
+            times = by_day.get(date, [])
+            trend.append({
+                "date": date,
+                "avg_hold_time": round(sum(times) / len(times), 1) if times else 0,
+                "call_count": len(times),
+                "max_hold_time": max(times) if times else 0,
+            })
+
+        return trend
+
+    # ================================================================
+    # Helpers
+    # ================================================================
+
+    @staticmethod
+    def _normalize_number(number: str) -> str:
+        """Normalize phone number for grouping."""
+        # Strip formatting, keep last 10 digits
+        digits = "".join(c for c in number if c.isdigit())
+        return digits[-10:] if len(digits) >= 10 else digits
+
+    @staticmethod
+    def _percentile(values: list, pct: int) -> float:
+        """Calculate percentile value."""
+        if not values:
+            return 0.0
+        sorted_vals = sorted(values)
+        idx = int(len(sorted_vals) * pct / 100)
+        idx = min(idx, len(sorted_vals) - 1)
+        return float(sorted_vals[idx])
+
+    @staticmethod
+    def _group_by_mode(records: list["CallRecord"]) -> dict[str, int]:
+        """Group call counts by mode."""
+        by_mode: dict[str, int] = defaultdict(int)
+        for r in records:
+            by_mode[r.mode.value] += 1
+        return dict(by_mode)
+
+    @staticmethod
+    def _group_by_hour(records: list["CallRecord"]) -> dict[int, int]:
+        """Group call counts by hour of day."""
+        by_hour: dict[int, int] = defaultdict(int)
+        for r in records:
+            by_hour[r.started_at.hour] += 1
+        return dict(sorted(by_hour.items()))
+
+    @property
+    def total_calls_recorded(self) -> int:
+        return len(self._call_records)
+
+
+# ================================================================
+# Data Models
+# ================================================================
+
+class CallRecord:
+    """A completed call record for analytics."""
+
+    def __init__(
+        self,
+        call_id: str,
+        remote_number: str,
+        mode: CallMode,
+        status: CallStatus,
+        intent: Optional[str] = None,
+        started_at: Optional[datetime] = None,
+        duration_seconds: int = 0,
+        hold_time_seconds: int = 0,
+        classification_history: Optional[list[str]] = None,
+        transcript_chunks: Optional[list[str]] = None,
+        services: Optional[list[str]] = None,
+    ):
+        self.call_id = call_id
+        self.remote_number = remote_number
+        self.mode = mode
+        self.status = status
+        self.intent = intent
+        self.started_at = started_at or datetime.now()
+        self.duration_seconds = duration_seconds
+        self.hold_time_seconds = hold_time_seconds
+        self.classification_history = classification_history or []
+        self.transcript_chunks = transcript_chunks or []
+        self.services = services or []
+
+
+class CompanyStats:
+    """Aggregated stats for a specific company/phone number."""
+
+    def __init__(self):
+        self.total_calls = 0
+        self.successful_calls = 0
+        self.failed_calls = 0
+        self.total_hold_time = 0
+        self.hold_times: list[int] = []
+        self.total_duration = 0
+        self.last_called: Optional[datetime] = None
+        self.intents: dict[str, int] = defaultdict(int)
+
+    def update(self, record: CallRecord) -> None:
+        """Update stats with a new call record."""
+        self.total_calls += 1
+        self.total_duration += record.duration_seconds
+        self.last_called = record.started_at
+
+        if record.status in (CallStatus.COMPLETED, CallStatus.BRIDGED, CallStatus.HUMAN_DETECTED):
+            self.successful_calls += 1
+        elif record.status == CallStatus.FAILED:
+            self.failed_calls += 1
+
+        if record.hold_time_seconds > 0:
+            self.total_hold_time += record.hold_time_seconds
+            self.hold_times.append(record.hold_time_seconds)
+
+        if record.intent:
+            self.intents[record.intent] += 1
+
+    def to_dict(self, number: str) -> dict[str, Any]:
+        return {
+            "number": number,
+            "total_calls": self.total_calls,
+            "successful_calls": self.successful_calls,
+            "failed_calls": self.failed_calls,
+            "success_rate": round(
+                self.successful_calls / self.total_calls, 3
+            ) if self.total_calls else 0.0,
+            "avg_hold_time": round(
+                self.total_hold_time / len(self.hold_times), 1
+            ) if self.hold_times else 0.0,
+            "max_hold_time": max(self.hold_times) if self.hold_times else 0,
+            "avg_duration": round(
+                self.total_duration / self.total_calls, 1
+            ) if self.total_calls else 0.0,
+            "last_called": self.last_called.isoformat() if self.last_called else None,
+            "top_intents": dict(
+                sorted(self.intents.items(), key=lambda x: x[1], reverse=True)[:5]
+            ),
+        }
--- a/services/call_flow_learner.py
+++ b/services/call_flow_learner.py
@@ -0,0 +1,339 @@
+"""
+Call Flow Learner — Builds and refines call flows from exploration data.
+
+When Hold Slayer runs in exploration mode, it discovers IVR steps.
+This service takes those discoveries and:
+1. Builds a CallFlow tree that can be reused next time
+2. Merges new discoveries into existing flows (refining them)
+3. Uses LLM to label steps and infer menu structure
+
+Over time, each phone number builds up a reliable call flow
+that makes future calls faster and more accurate.
+"""
+
+import logging
+import re
+from datetime import datetime
+from typing import Any, Optional
+
+from models.call_flow import ActionType, CallFlow, CallFlowStep
+
+logger = logging.getLogger(__name__)
+
+
+class CallFlowLearner:
+    """
+    Learns IVR call flows from exploration data.
+
+    Usage:
+        learner = CallFlowLearner(llm_client=llm)
+
+        # After an exploration call completes:
+        flow = await learner.build_flow(
+            phone_number="+18005551234",
+            discovered_steps=steps_from_exploration,
+            intent="cancel my card",
+        )
+
+        # Next time we call, merge new discoveries:
+        updated = await learner.merge_discoveries(
+            existing_flow=flow,
+            new_steps=new_discoveries,
+        )
+    """
+
+    def __init__(self, llm_client=None):
+        self._llm = llm_client
+
+    # ================================================================
+    # Build Flow from Exploration
+    # ================================================================
+
+    async def build_flow(
+        self,
+        phone_number: str,
+        discovered_steps: list[dict],
+        intent: Optional[str] = None,
+        company_name: Optional[str] = None,
+    ) -> CallFlow:
+        """
+        Build a CallFlow from exploration discoveries.
+
+        Args:
+            phone_number: The number that was called.
+            discovered_steps: List of step dicts from exploration mode:
+                [{"timestamp": ..., "audio_type": "ivr_prompt",
+                  "transcript": "Press 1 for...", "action_taken": {"dtmf": "1"}}, ...]
+            intent: What the caller was trying to accomplish.
+            company_name: Optional company name for labeling.
+
+        Returns:
+            A CallFlow that can be stored and reused.
+        """
+        logger.info(
+            f"🧠 Building call flow from {len(discovered_steps)} discoveries "
+            f"for {phone_number}"
+        )
+
+        # Phase 1: Extract meaningful steps (skip silence, ringing)
+        meaningful = [
+            s for s in discovered_steps
+            if s.get("audio_type") in ("ivr_prompt", "live_human", "music")
+            or s.get("action_taken")
+        ]
+
+        if not meaningful:
+            logger.warning("  No meaningful steps discovered")
+            return self._empty_flow(phone_number, company_name)
+
+        # Phase 2: Convert discoveries to CallFlowSteps
+        flow_steps = []
+        for i, step in enumerate(meaningful):
+            flow_step = self._discovery_to_step(step, i, meaningful)
+            if flow_step:
+                flow_steps.append(flow_step)
+
+        # Phase 3: Link steps together (next_step pointers)
+        for i, step in enumerate(flow_steps[:-1]):
+            step.next_step = flow_steps[i + 1].id
+
+        # Phase 4: Use LLM to enhance step labels if available
+        if self._llm and flow_steps:
+            flow_steps = await self._llm_enhance_steps(flow_steps, intent)
+
+        # Build the flow
+        name = company_name or self._guess_company_name(phone_number)
+        flow = CallFlow(
+            id=f"flow_{phone_number.replace('+', '')}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
+            name=f"{name} — {intent or 'General'}",
+            phone_number=phone_number,
+            description=f"Auto-learned flow for {name}. Intent: {intent or 'general'}",
+            steps=flow_steps,
+            tags=["auto-learned"],
+            notes=f"Learned from exploration on {datetime.now().isoformat()}",
+            times_used=1,
+            last_used=datetime.now(),
+        )
+
+        logger.info(
+            f"  ✅ Built flow '{flow.name}' with {len(flow_steps)} steps"
+        )
+        return flow
+
+    def _discovery_to_step(
+        self,
+        discovery: dict,
+        index: int,
+        all_discoveries: list[dict],
+    ) -> Optional[CallFlowStep]:
+        """Convert a single exploration discovery to a CallFlowStep."""
+        audio_type = discovery.get("audio_type", "")
+        transcript = discovery.get("transcript", "")
+        action_taken = discovery.get("action_taken")
+
+        step_id = f"step_{index:03d}"
+
+        if audio_type == "ivr_prompt" and action_taken:
+            # IVR menu where we pressed a button
+            dtmf = action_taken.get("dtmf", "")
+            return CallFlowStep(
+                id=step_id,
+                description=self._summarize_menu(transcript) or f"IVR menu (pressed {dtmf})",
+                action=ActionType.DTMF,
+                action_value=dtmf,
+                expect=self._extract_expect_pattern(transcript),
+                timeout=15,
+            )
+
+        elif audio_type == "ivr_prompt" and not action_taken:
+            # IVR prompt we just listened to
+            return CallFlowStep(
+                id=step_id,
+                description=self._summarize_menu(transcript) or "IVR announcement",
+                action=ActionType.LISTEN,
+                timeout=30,
+            )
+
+        elif audio_type == "music":
+            # Hold music
+            return CallFlowStep(
+                id=step_id,
+                description="Hold music — waiting for agent",
+                action=ActionType.HOLD,
+                timeout=3600,
+            )
+
+        elif audio_type == "live_human":
+            # Human detected — this is the transfer point
+            return CallFlowStep(
+                id=step_id,
+                description="Live agent detected — transfer",
+                action=ActionType.TRANSFER,
+                action_value="preferred_device",
+            )
+
+        return None
+
+    # ================================================================
+    # Merge New Discoveries into Existing Flow
+    # ================================================================
+
+    async def merge_discoveries(
+        self,
+        existing_flow: CallFlow,
+        new_steps: list[dict],
+        intent: Optional[str] = None,
+    ) -> CallFlow:
+        """
+        Merge new exploration discoveries into an existing flow.
+
+        This refines the flow over time — updating timeouts,
+        confirming step order, adding alternative paths.
+        """
+        logger.info(
+            f"🔄 Merging {len(new_steps)} new discoveries into "
+            f"flow '{existing_flow.name}'"
+        )
+
+        # Build a new flow from the discoveries
+        new_flow = await self.build_flow(
+            phone_number=existing_flow.phone_number,
+            discovered_steps=new_steps,
+            intent=intent,
+        )
+
+        # Simple merge strategy: keep existing steps but update timeouts
+        # and add any new steps that weren't in the original
+        existing_by_action = {
+            (s.action, s.action_value): s for s in existing_flow.steps
+        }
+
+        for new_step in new_flow.steps:
+            key = (new_step.action, new_step.action_value)
+            if key in existing_by_action:
+                # Update timeout to be the average
+                old_step = existing_by_action[key]
+                if old_step.timeout and new_step.timeout:
+                    old_step.timeout = int(
+                        (old_step.timeout + new_step.timeout) / 2
+                    )
+            # New steps that don't exist are noted but not auto-added
+            # (to avoid corrupting a working flow)
+
+        # Update metadata
+        existing_flow.times_used = (existing_flow.times_used or 0) + 1
+        existing_flow.last_used = datetime.now()
+
+        logger.info(f"  ✅ Merged. Flow now has {len(existing_flow.steps)} steps")
+        return existing_flow
+
+    # ================================================================
+    # LLM Enhancement
+    # ================================================================
+
+    async def _llm_enhance_steps(
+        self,
+        steps: list[CallFlowStep],
+        intent: Optional[str],
+    ) -> list[CallFlowStep]:
+        """Use LLM to improve step descriptions and structure."""
+        if not self._llm:
+            return steps
+
+        try:
+            # Build a summary of the steps for the LLM
+            step_descriptions = []
+            for s in steps:
+                desc = f"- {s.action.value}"
+                if s.action_value:
+                    desc += f" ({s.action_value})"
+                if s.description:
+                    desc += f": {s.description}"
+                step_descriptions.append(desc)
+
+            prompt = (
+                f"These are steps discovered while navigating a phone IVR system.\n"
+                f"Intent: {intent or 'general inquiry'}\n\n"
+                f"Steps:\n" + "\n".join(step_descriptions) + "\n\n"
+                f"For each step, provide a clear, concise description of what "
+                f"that step does. Return JSON array of objects with 'step_index' "
+                f"and 'description' fields."
+            )
+
+            result = await self._llm.chat_json(
+                prompt,
+                system="You are labeling IVR phone menu steps for a call flow database.",
+            )
+
+            # Apply LLM descriptions
+            if isinstance(result, list):
+                for item in result:
+                    idx = item.get("step_index", -1)
+                    desc = item.get("description", "")
+                    if 0 <= idx < len(steps) and desc:
+                        steps[idx].description = desc
+            elif isinstance(result, dict) and "steps" in result:
+                for item in result["steps"]:
+                    idx = item.get("step_index", -1)
+                    desc = item.get("description", "")
+                    if 0 <= idx < len(steps) and desc:
+                        steps[idx].description = desc
+
+        except Exception as e:
+            logger.warning(f"  LLM enhancement failed (non-fatal): {e}")
+
+        return steps
+
+    # ================================================================
+    # Helpers
+    # ================================================================
+
+    @staticmethod
+    def _summarize_menu(transcript: str) -> Optional[str]:
+        """Create a short summary of an IVR menu transcript."""
+        if not transcript:
+            return None
+
+        # Count how many options
+        options = re.findall(r'press\s+\d+', transcript.lower())
+        if options:
+            return f"IVR menu with {len(options)} options"
+
+        # Truncate long transcripts
+        if len(transcript) > 80:
+            return transcript[:77] + "..."
+        return transcript
+
+    @staticmethod
+    def _extract_expect_pattern(transcript: str) -> Optional[str]:
+        """Extract a regex pattern to match this prompt next time."""
+        if not transcript:
+            return None
+
+        # Find the most distinctive phrase (>4 words, not generic)
+        words = transcript.split()
+        if len(words) >= 4:
+            # Use first meaningful phrase
+            phrase = " ".join(words[:6])
+            # Escape for regex
+            return re.escape(phrase.lower())
+
+        return None
+
+    @staticmethod
+    def _guess_company_name(phone_number: str) -> str:
+        """Guess company name from phone number (placeholder)."""
+        # In production, this would do a reverse lookup
+        return f"Company {phone_number[-4:]}"
+
+    @staticmethod
+    def _empty_flow(phone_number: str, company_name: Optional[str]) -> CallFlow:
+        """Create an empty flow placeholder."""
+        return CallFlow(
+            id=f"flow_{phone_number.replace('+', '')}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
+            name=f"{company_name or phone_number} — Empty",
+            phone_number=phone_number,
+            description="Empty flow — no meaningful steps discovered",
+            steps=[],
+            tags=["auto-learned", "empty"],
+        )
--- a/services/hold_slayer.py
+++ b/services/hold_slayer.py
@@ -0,0 +1,717 @@
+"""
+Hold Slayer Service — The main event.
+
+Navigate IVR trees, wait on hold, detect when a human picks up,
+and transfer you in. This is the state machine that orchestrates
+the entire hold-slaying process.
+
+Two modes:
+1. run_with_flow(): Follow a stored call flow tree (fast, reliable)
+2. run_exploration(): No stored flow — listen, transcribe, and figure it out
+"""
+
+import asyncio
+import logging
+import re
+import time
+from typing import Optional
+
+from config import Settings
+from core.call_manager import CallManager
+from core.sip_engine import SIPEngine
+from models.call import ActiveCall, AudioClassification, CallStatus, ClassificationResult
+from models.call_flow import ActionType, CallFlow, CallFlowStep
+from models.events import EventType, GatewayEvent
+from services.audio_classifier import AudioClassifier
+from services.transcription import TranscriptionService
+
+logger = logging.getLogger(__name__)
+
+# LLM client is optional — imported at use time
+_llm_client = None
+
+
+def _get_llm():
+    """Lazy-load LLM client (optional dependency)."""
+    global _llm_client
+    if _llm_client is None:
+        try:
+            from config import get_settings
+            from services.llm_client import LLMClient
+
+            settings = get_settings()
+            _llm_client = LLMClient(
+                base_url=settings.llm.base_url,
+                model=settings.llm.model,
+                api_key=settings.llm.api_key,
+                timeout=settings.llm.timeout,
+            )
+        except Exception as e:
+            logger.debug(f"LLM client not available: {e}")
+            _llm_client = False  # Sentinel: don't retry
+    return _llm_client if _llm_client is not False else None
+
+
+class HoldSlayerService:
+    """
+    The Hold Slayer.
+
+    Navigates IVR menus, waits on hold, detects live humans,
+    and transfers the call to your device.
+    """
+
+    def __init__(
+        self,
+        gateway,  # AIPSTNGateway (avoid circular import)
+        call_manager: CallManager,
+        sip_engine: SIPEngine,
+        classifier: AudioClassifier,
+        transcription: TranscriptionService,
+        settings: Settings,
+    ):
+        self.gateway = gateway
+        self.call_manager = call_manager
+        self.sip_engine = sip_engine
+        self.classifier = classifier
+        self.transcription = transcription
+        self.settings = settings
+
+    async def run(
+        self,
+        call: ActiveCall,
+        sip_leg_id: str,
+        call_flow_id: Optional[str] = None,
+    ) -> bool:
+        """
+        Main entry point. Run the Hold Slayer on a call.
+
+        Args:
+            call: The active call to work on
+            sip_leg_id: SIP leg ID for the PSTN call
+            call_flow_id: Optional stored call flow to follow
+
+        Returns:
+            True if successfully transferred to user, False otherwise
+        """
+        logger.info(f"🗡️ Hold Slayer activated for {call.remote_number}")
+        logger.info(f"   Intent: {call.intent}")
+        logger.info(f"   Call Flow: {call_flow_id or 'exploration mode'}")
+
+        try:
+            # Wait for call to be connected
+            await self._wait_for_connection(call, timeout=60)
+
+            if call_flow_id:
+                # Load the stored call flow from the database
+                flow = await self._load_call_flow(call_flow_id)
+                if flow:
+                    return await self.run_with_flow(call, sip_leg_id, flow)
+                else:
+                    logger.warning(f"Call flow '{call_flow_id}' not found, switching to exploration")
+
+            # No flow or flow not found — explore
+            return await self.run_exploration(call, sip_leg_id)
+
+        except asyncio.CancelledError:
+            logger.info(f"Hold Slayer cancelled for {call.id}")
+            return False
+        except Exception as e:
+            logger.error(f"Hold Slayer error: {e}", exc_info=True)
+            await self.call_manager.update_status(call.id, CallStatus.FAILED)
+            return False
+
+    # ================================================================
+    # Mode 1: Follow a Stored Call Flow
+    # ================================================================
+
+    async def run_with_flow(
+        self,
+        call: ActiveCall,
+        sip_leg_id: str,
+        flow: CallFlow,
+    ) -> bool:
+        """
+        Navigate using a stored call flow tree.
+        Falls back to exploration for unknown steps.
+        """
+        logger.info(f"📋 Following call flow: {flow.name}")
+        steps = flow.steps_by_id()
+        current_step_id = flow.steps[0].id if flow.steps else None
+
+        while current_step_id:
+            step = steps.get(current_step_id)
+            if not step:
+                logger.error(f"Step '{current_step_id}' not found in flow")
+                break
+
+            call.current_step_id = current_step_id
+            logger.info(f"📍 Step: {step.description}")
+
+            await self.call_manager.event_bus.publish(GatewayEvent(
+                type=EventType.IVR_STEP,
+                call_id=call.id,
+                data={"step_id": step.id, "description": step.description, "action": step.action.value},
+                message=f"📍 IVR Step: {step.description}",
+            ))
+
+            # === Execute the step based on its action type ===
+
+            if step.action == ActionType.HOLD:
+                # HOLD MODE: Audio classifier takes over
+                await self.call_manager.update_status(call.id, CallStatus.ON_HOLD)
+                logger.info(f"⏳ On hold. Activating hold detection...")
+
+                human_detected = await self._wait_for_human(
+                    call, sip_leg_id, timeout=step.timeout
+                )
+
+                if human_detected:
+                    current_step_id = step.next_step
+                else:
+                    logger.warning("⏰ Hold timeout reached!")
+                    break
+
+            elif step.action == ActionType.DTMF:
+                # Wait for the expected prompt, then send DTMF
+                await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)
+
+                if step.expect:
+                    heard = await self._wait_for_prompt(
+                        call, sip_leg_id, step.expect, step.timeout
+                    )
+                    if not heard and step.fallback_step:
+                        logger.info(f"⚠️ Didn't hear expected prompt, falling back")
+                        current_step_id = step.fallback_step
+                        continue
+
+                # Send the DTMF digits
+                if step.action_value:
+                    await self.sip_engine.send_dtmf(sip_leg_id, step.action_value)
+                    logger.info(f"📱 Pressed: {step.action_value}")
+
+                    await self.call_manager.event_bus.publish(GatewayEvent(
+                        type=EventType.IVR_DTMF_SENT,
+                        call_id=call.id,
+                        data={"digits": step.action_value, "step": step.id},
+                        message=f"📱 DTMF sent: {step.action_value}",
+                    ))
+
+                # Small delay after DTMF for the IVR to process
+                await asyncio.sleep(2.0)
+                current_step_id = step.next_step
+
+            elif step.action == ActionType.WAIT:
+                # Just wait for a prompt
+                if step.expect:
+                    await self._wait_for_prompt(
+                        call, sip_leg_id, step.expect, step.timeout
+                    )
+                else:
+                    await asyncio.sleep(step.timeout)
+                current_step_id = step.next_step
+
+            elif step.action == ActionType.LISTEN:
+                # Listen and decide — regex first, LLM fallback
+                await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)
+
+                transcript = await self._listen_for_menu(
+                    call, sip_leg_id, step.timeout
+                )
+
+                # Phase 1: Try regex-based keyword matching (fast, no API call)
+                decision = self._decide_menu_option(
+                    transcript, call.intent or "", step.expect
+                )
+
+                # Phase 2: LLM fallback if regex couldn't decide
+                if not decision and transcript:
+                    llm = _get_llm()
+                    if llm:
+                        try:
+                            logger.info("🤖 Regex inconclusive, asking LLM...")
+                            llm_result = await llm.analyze_ivr_menu(
+                                transcript=transcript,
+                                intent=call.intent or "",
+                                previous_selections=list(call.dtmf_history) if hasattr(call, 'dtmf_history') else None,
+                            )
+                            decision = llm_result.get("digit")
+                            if decision:
+                                confidence = llm_result.get("confidence", 0)
+                                reason = llm_result.get("reason", "")
+                                logger.info(
+                                    f"🤖 LLM decided: press {decision} "
+                                    f"(confidence={confidence}, reason='{reason}')"
+                                )
+                        except Exception as e:
+                            logger.warning(f"🤖 LLM fallback failed: {e}")
+
+                if decision:
+                    await self.sip_engine.send_dtmf(sip_leg_id, decision)
+                    logger.info(f"🧠 Decided: press {decision} (heard: '{transcript[:60]}...')")
+                else:
+                    # Default: press 0 for agent
+                    await self.sip_engine.send_dtmf(sip_leg_id, "0")
+                    logger.info(f"🧠 No clear match, pressing 0 for agent")
+
+                await asyncio.sleep(2.0)
+                current_step_id = step.next_step
+
+            elif step.action == ActionType.SPEAK:
+                # Say something into the call (TTS)
+                # TODO: Implement TTS integration
+                logger.info(f"🗣️ Would say: '{step.action_value}' (TTS not yet implemented)")
+                await asyncio.sleep(3.0)
+                current_step_id = step.next_step
+
+            elif step.action == ActionType.TRANSFER:
+                # We did it! Transfer to user's device
+                await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)
+                logger.info(f"🚨 TRANSFERRING TO {step.action_value}")
+
+                device_target = step.action_value or call.device or self.settings.hold_slayer.default_transfer_device
+                await self.gateway.transfer_call(call.id, device_target)
+                return True
+
+            else:
+                logger.warning(f"Unknown action type: {step.action}")
+                current_step_id = step.next_step
+
+        return False
+
+    # ================================================================
+    # Mode 2: Exploration (No Stored Flow)
+    # ================================================================
+
+    async def run_exploration(
+        self,
+        call: ActiveCall,
+        sip_leg_id: str,
+    ) -> bool:
+        """
+        No stored flow — explore the IVR blind.
+        Records what it discovers so we can build a flow for next time.
+        """
+        logger.info(f"🔍 Exploration mode: discovering IVR for {call.remote_number}")
+        await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)
+
+        discovered_steps: list[dict] = []
+        max_time = self.settings.hold_slayer.max_hold_time
+        start_time = time.time()
+
+        while time.time() - start_time < max_time:
+            # Check if call is still active
+            current_call = self.call_manager.get_call(call.id)
+            if not current_call or current_call.status in (
+                CallStatus.COMPLETED, CallStatus.FAILED, CallStatus.CANCELLED
+            ):
+                break
+
+            # Get audio and classify
+            audio_chunk = b""
+            try:
+                async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
+                    audio_chunk += chunk
+                    if len(audio_chunk) >= 16000 * 2 * 3:  # 3 seconds
+                        break
+            except Exception as e:
+                logger.error(f"Audio stream error: {e}")
+                await asyncio.sleep(1.0)
+                continue
+
+            if not audio_chunk:
+                await asyncio.sleep(1.0)
+                continue
+
+            # Classify the audio
+            classification = self.classifier.classify_chunk(audio_chunk)
+            self.classifier.update_history(classification.audio_type)
+            await self.call_manager.add_classification(call.id, classification)
+
+            # Transcribe if it sounds like speech
+            transcript = ""
+            if classification.audio_type in (
+                AudioClassification.IVR_PROMPT,
+                AudioClassification.LIVE_HUMAN,
+            ):
+                transcript = await self.transcription.transcribe(
+                    audio_chunk,
+                    prompt="Phone IVR menu, customer service, press 1 for..."
+                )
+                if transcript:
+                    await self.call_manager.add_transcript(call.id, transcript)
+
+            # Record discovery
+            discovered_steps.append({
+                "timestamp": time.time(),
+                "audio_type": classification.audio_type.value,
+                "confidence": classification.confidence,
+                "transcript": transcript,
+                "action_taken": None,
+            })
+
+            # === Decision Logic ===
+
+            if classification.audio_type == AudioClassification.LIVE_HUMAN:
+                # HUMAN DETECTED! Transfer!
+                logger.info("🚨 LIVE HUMAN DETECTED!")
+                await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)
+
+                device = call.device or self.settings.hold_slayer.default_transfer_device
+                await self.gateway.transfer_call(call.id, device)
+
+                logger.info(f"📋 Discovered {len(discovered_steps)} IVR steps")
+                return True
+
+            elif classification.audio_type == AudioClassification.MUSIC:
+                # On hold — just keep monitoring
+                if current_call.status != CallStatus.ON_HOLD:
+                    await self.call_manager.update_status(call.id, CallStatus.ON_HOLD)
+
+                # Check for hold→human transition
+                if self.classifier.detect_hold_to_human_transition():
+                    logger.info("🚨 Hold-to-human transition detected!")
+                    await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)
+
+                    device = call.device or self.settings.hold_slayer.default_transfer_device
+                    await self.gateway.transfer_call(call.id, device)
+                    return True
+
+            elif classification.audio_type == AudioClassification.IVR_PROMPT and transcript:
+                # IVR menu — try to navigate
+                decision = self._decide_menu_option(
+                    transcript, call.intent or "", None
+                )
+                if decision:
+                    await self.sip_engine.send_dtmf(sip_leg_id, decision)
+                    discovered_steps[-1]["action_taken"] = {"dtmf": decision}
+                    logger.info(f"🧠 Exploration: pressed {decision}")
+                    await asyncio.sleep(2.0)
+                else:
+                    # Try pressing 0 for agent
+                    await self.sip_engine.send_dtmf(sip_leg_id, "0")
+                    discovered_steps[-1]["action_taken"] = {"dtmf": "0", "reason": "default_agent"}
+                    logger.info("🧠 Exploration: pressed 0 (trying for agent)")
+                    await asyncio.sleep(2.0)
+
+            elif classification.audio_type == AudioClassification.SILENCE:
+                # Silence — wait a bit
+                await asyncio.sleep(2.0)
+
+            elif classification.audio_type == AudioClassification.RINGING:
+                # Still ringing
+                await asyncio.sleep(1.0)
+
+        logger.warning(f"Hold Slayer timed out after {max_time}s")
+        return False
+
+    # ================================================================
+    # Core Detection Methods
+    # ================================================================
+
+    async def _wait_for_human(
+        self,
+        call: ActiveCall,
+        sip_leg_id: str,
+        timeout: int = 7200,
+    ) -> bool:
+        """
+        Wait on hold until a live human is detected.
+
+        Continuously classifies audio and watches for the
+        music → speech transition.
+        """
+        check_interval = self.settings.hold_slayer.hold_check_interval
+        start_time = time.time()
+
+        while time.time() - start_time < timeout:
+            # Check if call is still active
+            current_call = self.call_manager.get_call(call.id)
+            if not current_call or current_call.status in (
+                CallStatus.COMPLETED, CallStatus.FAILED, CallStatus.CANCELLED
+            ):
+                return False
+
+            # Get audio chunk
+            audio_chunk = b""
+            try:
+                async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
+                    audio_chunk += chunk
+                    if len(audio_chunk) >= int(16000 * 2 * check_interval):
+                        break
+            except Exception:
+                await asyncio.sleep(check_interval)
+                continue
+
+            if not audio_chunk:
+                await asyncio.sleep(check_interval)
+                continue
+
+            # Classify
+            result = self.classifier.classify_chunk(audio_chunk)
+            self.classifier.update_history(result.audio_type)
+            await self.call_manager.add_classification(call.id, result)
+
+            # Check for human
+            if result.audio_type == AudioClassification.LIVE_HUMAN:
+                # Verify with transcription
+                transcript = await self.transcription.transcribe(audio_chunk)
+                if transcript:
+                    await self.call_manager.add_transcript(call.id, transcript)
+                    # If we got meaningful speech, it's probably a real person
+                    if len(transcript.split()) >= 3:
+                        logger.info(f"🚨 Human confirmed! Said: '{transcript[:100]}'")
+                        return True
+
+            # Check for the music→speech transition pattern
+            if self.classifier.detect_hold_to_human_transition():
+                logger.info("🚨 Hold-to-human transition detected!")
+                return True
+
+            # Log progress periodically
+            elapsed = int(time.time() - start_time)
+            if elapsed > 0 and elapsed % 60 == 0:
+                logger.info(
+                    f"⏳ Still on hold... {elapsed}s "
+                    f"(audio: {result.audio_type.value}, {result.confidence:.0%})"
+                )
+
+        return False
+
+    async def _wait_for_prompt(
+        self,
+        call: ActiveCall,
+        sip_leg_id: str,
+        expected_pattern: str,
+        timeout: int = 30,
+    ) -> bool:
+        """
+        Wait for an expected IVR prompt.
+
+        Listens, transcribes, and checks if the transcript matches
+        the expected pattern (regex or keywords).
+        """
+        start_time = time.time()
+
+        while time.time() - start_time < timeout:
+            audio_chunk = b""
+            try:
+                async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
+                    audio_chunk += chunk
+                    if len(audio_chunk) >= 16000 * 2 * 3:  # 3 seconds
+                        break
+            except Exception:
+                await asyncio.sleep(1.0)
+                continue
+
+            if not audio_chunk:
+                await asyncio.sleep(1.0)
+                continue
+
+            # Classify first
+            result = self.classifier.classify_chunk(audio_chunk)
+            if result.audio_type not in (
+                AudioClassification.IVR_PROMPT,
+                AudioClassification.LIVE_HUMAN,
+            ):
+                continue
+
+            # Transcribe
+            transcript = await self.transcription.transcribe(audio_chunk)
+            if not transcript:
+                continue
+
+            await self.call_manager.add_transcript(call.id, transcript)
+
+            # Check if it matches expected pattern
+            try:
+                if re.search(expected_pattern, transcript, re.IGNORECASE):
+                    logger.info(f"✅ Heard expected: '{transcript[:80]}'")
+                    return True
+            except re.error:
+                # Treat as keyword search if regex is invalid
+                if expected_pattern.lower() in transcript.lower():
+                    logger.info(f"✅ Heard expected: '{transcript[:80]}'")
+                    return True
+
+        logger.warning(f"⚠️ Didn't hear expected prompt within {timeout}s")
+        return False
+
+    async def _listen_for_menu(
+        self,
+        call: ActiveCall,
+        sip_leg_id: str,
+        timeout: int = 30,
+    ) -> str:
+        """Listen for an IVR menu and return the full transcript."""
+        transcript_parts: list[str] = []
+        start_time = time.time()
+
+        while time.time() - start_time < timeout:
+            audio_chunk = b""
+            try:
+                async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
+                    audio_chunk += chunk
+                    if len(audio_chunk) >= 16000 * 2 * 5:  # 5 seconds
+                        break
+            except Exception:
+                await asyncio.sleep(1.0)
+                continue
+
+            if not audio_chunk:
+                break
+
+            result = self.classifier.classify_chunk(audio_chunk)
+
+            # If we're getting silence after speech, the menu prompt is done
+            if result.audio_type == AudioClassification.SILENCE and transcript_parts:
+                break
+
+            if result.audio_type in (
+                AudioClassification.IVR_PROMPT,
+                AudioClassification.LIVE_HUMAN,
+            ):
+                text = await self.transcription.transcribe(audio_chunk)
+                if text:
+                    transcript_parts.append(text)
+
+        full_transcript = " ".join(transcript_parts)
+        if full_transcript:
+            await self.call_manager.add_transcript(call.id, full_transcript)
+
+        return full_transcript
+
+    async def _wait_for_connection(self, call: ActiveCall, timeout: int = 60) -> None:
+        """Wait for the call to be connected (answered)."""
+        start = time.time()
+        while time.time() - start < timeout:
+            current = self.call_manager.get_call(call.id)
+            if not current:
+                raise RuntimeError(f"Call {call.id} disappeared")
+            if current.status in (CallStatus.CONNECTED, CallStatus.NAVIGATING_IVR):
+                return
+            if current.status in (CallStatus.FAILED, CallStatus.CANCELLED):
+                raise RuntimeError(f"Call {call.id} failed: {current.status}")
+            await asyncio.sleep(0.5)
+        raise TimeoutError(f"Call {call.id} not connected within {timeout}s")
+
+    # ================================================================
+    # Menu Navigation Logic
+    # ================================================================
+
+    def _decide_menu_option(
+        self,
+        transcript: str,
+        intent: str,
+        expected_options: Optional[str],
+    ) -> Optional[str]:
+        """
+        Decide which menu option to select based on transcript and intent.
+
+        Simple keyword-based matching. This is where an LLM integration
+        would massively improve navigation accuracy.
+
+        Returns:
+            DTMF digit(s) to press, or None if can't decide
+        """
+        transcript_lower = transcript.lower()
+        intent_lower = intent.lower()
+
+        # Common IVR patterns: "press 1 for X, press 2 for Y"
+        # Extract options
+        options = re.findall(
+            r'(?:press|dial|say)\s+(\d+)\s+(?:for|to)\s+(.+?)(?:\.|,|press|dial|$)',
+            transcript_lower,
+        )
+
+        if not options:
+            # Try alternate patterns: "for X, press 1"
+            options = re.findall(
+                r'for\s+(.+?),?\s*(?:press|dial)\s+(\d+)',
+                transcript_lower,
+            )
+            # Swap order to be (digit, description)
+            options = [(digit, desc) for desc, digit in options]
+
+        if not options:
+            return None
+
+        # Score each option against the intent
+        best_match = None
+        best_score = 0
+
+        # Keywords that map intents to IVR options
+        intent_keywords = {
+            "cancel": ["cancel", "close", "end", "terminate"],
+            "dispute": ["dispute", "charge", "billing", "transaction", "statement"],
+            "balance": ["balance", "account", "summary"],
+            "agent": ["agent", "representative", "operator", "speak", "person", "human"],
+            "payment": ["payment", "pay", "bill"],
+            "card": ["card", "credit", "debit"],
+            "fraud": ["fraud", "unauthorized", "stolen", "lost"],
+            "transfer": ["transfer", "move", "send"],
+        }
+
+        for digit, description in options:
+            score = 0
+
+            # Direct keyword match in description
+            for keyword_group, keywords in intent_keywords.items():
+                if any(kw in intent_lower for kw in keywords):
+                    if any(kw in description for kw in keywords):
+                        score += 10
+
+            # Fuzzy: any word overlap between intent and description
+            intent_words = set(intent_lower.split())
+            desc_words = set(description.split())
+            overlap = intent_words & desc_words
+            score += len(overlap) * 3
+
+            # "Speak to agent" is usually what we want if nothing else matches
+            if any(w in description for w in ["agent", "representative", "operator", "person"]):
+                score += 5
+
+            if score > best_score:
+                best_score = score
+                best_match = digit
+
+        if best_match and best_score >= 3:
+            return best_match
+
+        # Default: look for "agent" or "representative" option
+        for digit, description in options:
+            if any(w in description for w in ["agent", "representative", "operator"]):
+                return digit
+
+        return None
+
+    async def _load_call_flow(self, flow_id: str) -> Optional[CallFlow]:
+        """Load a stored call flow from the database."""
+        from db.database import get_session_factory, StoredCallFlow
+        from sqlalchemy import select
+
+        try:
+            factory = get_session_factory()
+            async with factory() as session:
+                result = await session.execute(
+                    select(StoredCallFlow).where(StoredCallFlow.id == flow_id)
+                )
+                row = result.scalar_one_or_none()
+                if row:
+                    from models.call_flow import CallFlowStep
+                    return CallFlow(
+                        id=row.id,
+                        name=row.name,
+                        phone_number=row.phone_number,
+                        description=row.description or "",
+                        steps=[CallFlowStep(**s) for s in row.steps],
+                        tags=row.tags or [],
+                        notes=row.notes,
+                        avg_hold_time=row.avg_hold_time,
+                        success_rate=row.success_rate,
+                        last_used=row.last_used,
+                        times_used=row.times_used or 0,
+                    )
+        except Exception as e:
+            logger.error(f"Failed to load call flow '{flow_id}': {e}")
+
+        return None
--- a/services/llm_client.py
+++ b/services/llm_client.py
@@ -0,0 +1,391 @@
+"""
+LLM Client — Unified interface for LLM-powered decision making.
+
+Used by Hold Slayer (IVR navigation fallback), Call Flow Learner,
+Receptionist, and Smart Routing services.
+
+Supports OpenAI-compatible APIs (OpenAI, Ollama, LM Studio, etc.)
+via httpx async client. No SDK dependency — just HTTP.
+"""
+
+import json
+import logging
+import time
+from typing import Any, Optional
+
+import httpx
+
+from config import get_settings
+
+logger = logging.getLogger(__name__)
+
+
+class LLMClient:
+    """
+    Async LLM client for OpenAI-compatible chat completion APIs.
+
+    Works with:
+    - OpenAI API (api.openai.com)
+    - Ollama (localhost:11434)
+    - LM Studio (localhost:1234)
+    - Any OpenAI-compatible endpoint
+
+    Usage:
+        client = LLMClient(base_url="http://localhost:11434/v1", model="llama3")
+        response = await client.chat("What is 2+2?")
+        # or structured:
+        result = await client.chat_json(
+            "Extract the menu options from this IVR transcript...",
+            system="You are a phone menu parser.",
+        )
+    """
+
+    def __init__(
+        self,
+        base_url: str = "http://localhost:11434/v1",
+        model: str = "llama3",
+        api_key: str = "not-needed",
+        timeout: float = 30.0,
+        max_tokens: int = 1024,
+        temperature: float = 0.3,
+    ):
+        self.base_url = base_url.rstrip("/")
+        self.model = model
+        self.api_key = api_key
+        self.timeout = timeout
+        self.max_tokens = max_tokens
+        self.temperature = temperature
+
+        self._client = httpx.AsyncClient(
+            base_url=self.base_url,
+            headers={
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
+            },
+            timeout=httpx.Timeout(timeout),
+        )
+
+        # Stats
+        self._total_requests = 0
+        self._total_tokens = 0
+        self._total_errors = 0
+        self._avg_latency_ms = 0.0
+
+    async def close(self):
+        """Close the HTTP client."""
+        await self._client.aclose()
+
+    # ================================================================
+    # Core Chat Methods
+    # ================================================================
+
+    async def chat(
+        self,
+        user_message: str,
+        system: Optional[str] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+    ) -> str:
+        """
+        Send a chat completion request and return the text response.
+
+        Args:
+            user_message: The user's message/prompt.
+            system: Optional system prompt.
+            temperature: Override default temperature.
+            max_tokens: Override default max tokens.
+
+        Returns:
+            The assistant's response text.
+        """
+        messages = []
+        if system:
+            messages.append({"role": "system", "content": system})
+        messages.append({"role": "user", "content": user_message})
+
+        return await self._complete(
+            messages,
+            temperature=temperature or self.temperature,
+            max_tokens=max_tokens or self.max_tokens,
+        )
+
+    async def chat_json(
+        self,
+        user_message: str,
+        system: Optional[str] = None,
+        temperature: Optional[float] = None,
+    ) -> dict[str, Any]:
+        """
+        Chat completion that parses the response as JSON.
+
+        The system prompt is augmented to request JSON output.
+        Falls back to extracting JSON from markdown code blocks.
+
+        Returns:
+            Parsed JSON dict, or {"error": "..."} on parse failure.
+        """
+        json_system = (system or "") + (
+            "\n\nIMPORTANT: Respond with valid JSON only. "
+            "No markdown, no explanation, just the JSON object."
+        )
+
+        response_text = await self.chat(
+            user_message,
+            system=json_system.strip(),
+            temperature=temperature or 0.1,  # Lower temp for structured output
+        )
+
+        return self._parse_json_response(response_text)
+
+    async def chat_with_history(
+        self,
+        messages: list[dict[str, str]],
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+    ) -> str:
+        """
+        Chat with full message history (multi-turn conversation).
+
+        Args:
+            messages: List of {"role": "system|user|assistant", "content": "..."}
+
+        Returns:
+            The assistant's response text.
+        """
+        return await self._complete(
+            messages,
+            temperature=temperature or self.temperature,
+            max_tokens=max_tokens or self.max_tokens,
+        )
+
+    # ================================================================
+    # Hold Slayer Specific Methods
+    # ================================================================
+
+    async def analyze_ivr_menu(
+        self,
+        transcript: str,
+        intent: str,
+        previous_selections: Optional[list[str]] = None,
+    ) -> dict[str, Any]:
+        """
+        Analyze an IVR menu transcript and decide which option to press.
+
+        This is the LLM fallback when regex-based menu parsing fails.
+
+        Args:
+            transcript: The IVR audio transcript.
+            intent: What the user wants to accomplish.
+            previous_selections: DTMF digits already pressed in this call.
+
+        Returns:
+            {"digit": "3", "reason": "Option 3 is for card cancellation",
+             "confidence": 0.85}
+        """
+        system = (
+            "You are an expert at navigating phone menus (IVR systems). "
+            "Given an IVR transcript and the caller's intent, determine "
+            "which menu option (DTMF digit) to press.\n\n"
+            "Rules:\n"
+            "- If there's a direct match for the intent, choose it.\n"
+            "- If no direct match, choose 'speak to representative' or 'agent' option.\n"
+            "- If menu says 'press 0 for operator', that's always a safe fallback.\n"
+            "- Return the single digit to press.\n"
+            "- If you truly can't determine the right option, return digit: null.\n"
+        )
+
+        context = f"IVR Transcript:\n{transcript}\n\n"
+        context += f"Caller's Intent: {intent}\n"
+        if previous_selections:
+            context += f"Already pressed: {', '.join(previous_selections)}\n"
+        context += "\nWhich digit should be pressed? Return JSON."
+
+        result = await self.chat_json(context, system=system)
+
+        # Normalize response
+        if "digit" not in result:
+            # Try to extract from various response formats
+            for key in ["option", "press", "choice", "dtmf"]:
+                if key in result:
+                    result["digit"] = str(result[key])
+                    break
+
+        return result
+
+    async def detect_human_speech(
+        self,
+        transcript: str,
+        context: str = "",
+    ) -> dict[str, Any]:
+        """
+        Analyze a transcript to determine if a human agent is speaking.
+
+        Used as a secondary check when audio classifier detects speech
+        but we need to distinguish between IVR prompts and a live human.
+
+        Returns:
+            {"is_human": true, "confidence": 0.9, "reason": "Agent greeting detected"}
+        """
+        system = (
+            "You are analyzing a phone call transcript to determine if "
+            "a live human agent is speaking (vs an automated IVR system).\n\n"
+            "Human indicators:\n"
+            "- Personal greeting ('Hi, my name is...')\n"
+            "- Asking for account details\n"
+            "- Conversational tone, filler words\n"
+            "- Acknowledging hold time ('Thanks for waiting')\n"
+            "\nIVR indicators:\n"
+            "- 'Press N for...', 'Say...'\n"
+            "- Robotic phrasing\n"
+            "- Menu options\n"
+            "- 'Your call is important to us'\n"
+        )
+
+        prompt = f"Transcript:\n{transcript}\n"
+        if context:
+            prompt += f"\nContext: {context}\n"
+        prompt += "\nIs this a live human agent? Return JSON."
+
+        return await self.chat_json(prompt, system=system)
+
+    async def summarize_call(
+        self,
+        transcript_chunks: list[str],
+        intent: str,
+        duration_seconds: int,
+    ) -> dict[str, Any]:
+        """
+        Generate a call summary from transcript chunks.
+
+        Used for call history and analytics.
+
+        Returns:
+            {"summary": "...", "outcome": "resolved|unresolved|transferred",
+             "key_info": [...], "sentiment": "positive|neutral|negative"}
+        """
+        system = (
+            "Summarize this phone call concisely. Include:\n"
+            "- What the caller wanted\n"
+            "- What happened (IVR navigation, hold time, agent interaction)\n"
+            "- The outcome\n"
+            "Return as JSON with: summary, outcome, key_info (list), sentiment."
+        )
+
+        full_transcript = "\n".join(transcript_chunks)
+        prompt = (
+            f"Caller's intent: {intent}\n"
+            f"Call duration: {duration_seconds} seconds\n\n"
+            f"Full transcript:\n{full_transcript}\n\n"
+            "Summarize this call."
+        )
+
+        return await self.chat_json(prompt, system=system)
+
+    # ================================================================
+    # Internal
+    # ================================================================
+
+    async def _complete(
+        self,
+        messages: list[dict[str, str]],
+        temperature: float = 0.3,
+        max_tokens: int = 1024,
+    ) -> str:
+        """Execute a chat completion request."""
+        self._total_requests += 1
+        start = time.monotonic()
+
+        try:
+            payload = {
+                "model": self.model,
+                "messages": messages,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+            }
+
+            response = await self._client.post("/chat/completions", json=payload)
+            response.raise_for_status()
+
+            data = response.json()
+
+            # Track token usage
+            if "usage" in data:
+                self._total_tokens += data["usage"].get("total_tokens", 0)
+
+            # Track latency
+            elapsed_ms = (time.monotonic() - start) * 1000
+            self._avg_latency_ms = (
+                self._avg_latency_ms * 0.9 + elapsed_ms * 0.1
+            )
+
+            # Extract response text
+            choices = data.get("choices", [])
+            if choices:
+                return choices[0].get("message", {}).get("content", "")
+            return ""
+
+        except httpx.HTTPStatusError as e:
+            self._total_errors += 1
+            logger.error(f"LLM API error: {e.response.status_code} {e.response.text[:200]}")
+            return ""
+        except httpx.TimeoutException:
+            self._total_errors += 1
+            logger.error(f"LLM API timeout after {self.timeout}s")
+            return ""
+        except Exception as e:
+            self._total_errors += 1
+            logger.error(f"LLM client error: {e}")
+            return ""
+
+    @staticmethod
+    def _parse_json_response(text: str) -> dict[str, Any]:
+        """Parse JSON from LLM response, handling common formatting issues."""
+        text = text.strip()
+
+        # Try direct parse
+        try:
+            return json.loads(text)
+        except json.JSONDecodeError:
+            pass
+
+        # Try extracting from markdown code block
+        if "```" in text:
+            # Find content between ```json and ``` or ``` and ```
+            parts = text.split("```")
+            for i, part in enumerate(parts):
+                if i % 2 == 1:  # Odd indices are inside code blocks
+                    # Remove optional language tag
+                    content = part.strip()
+                    if content.startswith("json"):
+                        content = content[4:].strip()
+                    try:
+                        return json.loads(content)
+                    except json.JSONDecodeError:
+                        continue
+
+        # Try finding JSON object in the text
+        brace_start = text.find("{")
+        brace_end = text.rfind("}")
+        if brace_start != -1 and brace_end != -1:
+            try:
+                return json.loads(text[brace_start : brace_end + 1])
+            except json.JSONDecodeError:
+                pass
+
+        logger.warning(f"Failed to parse JSON from LLM response: {text[:200]}")
+        return {"error": "Failed to parse JSON response", "raw": text[:500]}
+
+    # ================================================================
+    # Stats
+    # ================================================================
+
+    @property
+    def stats(self) -> dict:
+        return {
+            "total_requests": self._total_requests,
+            "total_tokens": self._total_tokens,
+            "total_errors": self._total_errors,
+            "avg_latency_ms": round(self._avg_latency_ms, 1),
+            "model": self.model,
+            "base_url": self.base_url,
+        }
--- a/services/notification.py
+++ b/services/notification.py
@@ -0,0 +1,256 @@
+"""
+Notification Service — Tell the user what's happening.
+
+Sends notifications when:
+- A human picks up (TRANSFER NOW!)
+- Hold time estimates change
+- Call fails or times out
+- IVR navigation milestones
+
+Supports multiple channels: WebSocket (always), SMS (optional),
+push notifications (future).
+"""
+
+import asyncio
+import logging
+from datetime import datetime
+from enum import Enum
+from typing import Any, Optional
+
+from pydantic import BaseModel
+
+from config import Settings
+from core.event_bus import EventBus
+from models.events import EventType, GatewayEvent
+
+logger = logging.getLogger(__name__)
+
+
+class NotificationChannel(str, Enum):
+    """Where to send notifications."""
+
+    WEBSOCKET = "websocket"
+    SMS = "sms"
+    PUSH = "push"
+
+
+class NotificationPriority(str, Enum):
+    """How urgently to deliver."""
+
+    LOW = "low"  # Status updates, hold time estimates
+    NORMAL = "normal"  # IVR navigation milestones
+    HIGH = "high"  # Human detected, call failed
+    CRITICAL = "critical"  # Transfer happening NOW
+
+
+class Notification(BaseModel):
+    """A notification to send to the user."""
+
+    channel: NotificationChannel
+    priority: NotificationPriority
+    title: str
+    message: str
+    call_id: Optional[str] = None
+    data: dict[str, Any] = {}
+    timestamp: datetime = datetime.now()
+
+
+class NotificationService:
+    """
+    Sends notifications to users about call events.
+
+    Listens to the EventBus and routes events to the
+    appropriate notification channels.
+    """
+
+    def __init__(self, event_bus: EventBus, settings: Settings):
+        self._event_bus = event_bus
+        self._settings = settings
+        self._task: Optional[asyncio.Task] = None
+        self._sms_sender: Optional[Any] = None
+
+        # Track what we've already notified (avoid spam)
+        self._notified: dict[str, set[str]] = {}  # call_id -> set of event types
+
+    async def start(self) -> None:
+        """Start listening for events to notify on."""
+        self._task = asyncio.create_task(self._listen_loop())
+        logger.info("📢 Notification service started")
+
+    async def stop(self) -> None:
+        """Stop the notification listener."""
+        if self._task:
+            self._task.cancel()
+            try:
+                await self._task
+            except asyncio.CancelledError:
+                pass
+        logger.info("📢 Notification service stopped")
+
+    async def _listen_loop(self) -> None:
+        """Main event listener loop."""
+        subscription = self._event_bus.subscribe()
+        try:
+            async for event in subscription:
+                try:
+                    await self._handle_event(event)
+                except Exception as e:
+                    logger.error(f"Notification handler error: {e}", exc_info=True)
+        except asyncio.CancelledError:
+            pass
+        finally:
+            subscription.close()
+
+    async def _handle_event(self, event: GatewayEvent) -> None:
+        """Route an event to the appropriate notification(s)."""
+        call_id = event.call_id or ""
+
+        # Initialize tracking for this call
+        if call_id and call_id not in self._notified:
+            self._notified[call_id] = set()
+
+        # Skip duplicate notifications
+        dedup_key = f"{event.type.value}:{event.data.get('step_id', '')}"
+        if call_id and dedup_key in self._notified.get(call_id, set()):
+            return
+
+        notification = self._event_to_notification(event)
+        if not notification:
+            return
+
+        # Mark as notified
+        if call_id:
+            self._notified[call_id].add(dedup_key)
+
+        # Send via all appropriate channels
+        await self._send(notification)
+
+    def _event_to_notification(self, event: GatewayEvent) -> Optional[Notification]:
+        """Convert a gateway event to a notification (or None to skip)."""
+
+        if event.type == EventType.HUMAN_DETECTED:
+            return Notification(
+                channel=NotificationChannel.WEBSOCKET,
+                priority=NotificationPriority.CRITICAL,
+                title="🚨 Human Detected!",
+                message="A live person picked up — transferring you now!",
+                call_id=event.call_id,
+                data=event.data,
+            )
+
+        elif event.type == EventType.TRANSFER_STARTED:
+            return Notification(
+                channel=NotificationChannel.WEBSOCKET,
+                priority=NotificationPriority.CRITICAL,
+                title="📞 Call Transferred",
+                message="Your call has been connected to the agent. Pick up your phone!",
+                call_id=event.call_id,
+                data=event.data,
+            )
+
+        elif event.type == EventType.CALL_FAILED:
+            return Notification(
+                channel=NotificationChannel.WEBSOCKET,
+                priority=NotificationPriority.HIGH,
+                title="❌ Call Failed",
+                message=event.message or "The call couldn't be completed.",
+                call_id=event.call_id,
+                data=event.data,
+            )
+
+        elif event.type == EventType.HOLD_DETECTED:
+            return Notification(
+                channel=NotificationChannel.WEBSOCKET,
+                priority=NotificationPriority.NORMAL,
+                title="⏳ On Hold",
+                message="You're on hold. We'll notify you when someone picks up.",
+                call_id=event.call_id,
+                data=event.data,
+            )
+
+        elif event.type == EventType.IVR_STEP:
+            return Notification(
+                channel=NotificationChannel.WEBSOCKET,
+                priority=NotificationPriority.LOW,
+                title="📍 IVR Navigation",
+                message=event.message or "Navigating phone menu...",
+                call_id=event.call_id,
+                data=event.data,
+            )
+
+        elif event.type == EventType.IVR_DTMF_SENT:
+            return Notification(
+                channel=NotificationChannel.WEBSOCKET,
+                priority=NotificationPriority.LOW,
+                title="📱 Button Pressed",
+                message=event.message or f"Pressed {event.data.get('digits', '?')}",
+                call_id=event.call_id,
+                data=event.data,
+            )
+
+        elif event.type == EventType.CALL_ENDED:
+            # Clean up tracking
+            if event.call_id and event.call_id in self._notified:
+                del self._notified[event.call_id]
+
+            return Notification(
+                channel=NotificationChannel.WEBSOCKET,
+                priority=NotificationPriority.NORMAL,
+                title="📴 Call Ended",
+                message=event.message or "The call has ended.",
+                call_id=event.call_id,
+                data=event.data,
+            )
+
+        # Skip other event types (transcription, classification, etc.)
+        return None
+
+    async def _send(self, notification: Notification) -> None:
+        """Send a notification via the appropriate channel."""
+        logger.info(
+            f"📢 [{notification.priority.value}] {notification.title}: "
+            f"{notification.message}"
+        )
+
+        # WebSocket notifications go through the event bus
+        # (the WebSocket handler in the API reads from EventBus directly)
+
+        # SMS for critical notifications
+        if (
+            notification.priority == NotificationPriority.CRITICAL
+            and self._settings.notify_sms_number
+        ):
+            await self._send_sms(notification)
+
+    async def _send_sms(self, notification: Notification) -> None:
+        """
+        Send an SMS notification.
+
+        Uses a simple HTTP-based SMS gateway. In production,
+        this would use Twilio, AWS SNS, or similar.
+        """
+        phone = self._settings.notify_sms_number
+        if not phone:
+            return
+
+        try:
+            import httpx
+
+            # Generic webhook-based SMS (configure your provider)
+            # This is a placeholder — wire up your preferred SMS provider
+            logger.info(f"📱 SMS → {phone}: {notification.title}")
+
+            # Example: Twilio-style API
+            # async with httpx.AsyncClient() as client:
+            #     await client.post(
+            #         "https://api.twilio.com/2010-04-01/Accounts/.../Messages.json",
+            #         data={
+            #             "To": phone,
+            #             "From": self._settings.sip_trunk.did,
+            #             "Body": f"{notification.title}\n{notification.message}",
+            #         },
+            #         auth=(account_sid, auth_token),
+            #     )
+
+        except Exception as e:
+            logger.error(f"SMS send failed: {e}")
--- a/services/recording.py
+++ b/services/recording.py
@@ -0,0 +1,230 @@
+"""
+Recording Service — Call recording management.
+
+Records calls to WAV files via the PJSUA2 media pipeline,
+manages storage, and provides playback/download access.
+"""
+
+import asyncio
+import logging
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+from config import get_settings
+
+logger = logging.getLogger(__name__)
+
+
+class RecordingService:
+    """
+    Manages call recordings.
+
+    Features:
+    - Start/stop recording for any active call leg
+    - Dual-channel recording (separate caller/agent streams)
+    - Mixed recording (both parties in one file)
+    - WAV storage with organized directory structure
+    - Recording metadata tracking
+    """
+
+    def __init__(
+        self,
+        storage_dir: str = "recordings",
+        max_recording_seconds: int = 7200,  # 2 hours
+        sample_rate: int = 16000,
+    ):
+        self._storage_dir = Path(storage_dir)
+        self._max_recording_seconds = max_recording_seconds
+        self._sample_rate = sample_rate
+        self._active_recordings: dict[str, RecordingSession] = {}
+        self._metadata: list[dict] = []
+
+    async def start(self) -> None:
+        """Initialize the recording service."""
+        self._storage_dir.mkdir(parents=True, exist_ok=True)
+        logger.info(f"🎙️ Recording service ready (storage: {self._storage_dir})")
+
+    # ================================================================
+    # Recording Lifecycle
+    # ================================================================
+
+    async def start_recording(
+        self,
+        call_id: str,
+        media_pipeline=None,
+        leg_ids: Optional[list[str]] = None,
+        dual_channel: bool = False,
+    ) -> "RecordingSession":
+        """
+        Start recording a call.
+
+        Args:
+            call_id: The call to record.
+            media_pipeline: MediaPipeline instance for PJSUA2 recording.
+            leg_ids: Specific SIP leg IDs to record. If None, records all legs.
+            dual_channel: If True, record each party to a separate channel.
+
+        Returns:
+            RecordingSession with file paths and metadata.
+        """
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        date_dir = datetime.now().strftime("%Y-%m-%d")
+        recording_dir = self._storage_dir / date_dir
+        recording_dir.mkdir(parents=True, exist_ok=True)
+
+        if dual_channel:
+            filepath_caller = str(recording_dir / f"{call_id}_{timestamp}_caller.wav")
+            filepath_agent = str(recording_dir / f"{call_id}_{timestamp}_agent.wav")
+            filepath_mixed = str(recording_dir / f"{call_id}_{timestamp}_mixed.wav")
+        else:
+            filepath_caller = None
+            filepath_agent = None
+            filepath_mixed = str(recording_dir / f"{call_id}_{timestamp}.wav")
+
+        session = RecordingSession(
+            call_id=call_id,
+            filepath_mixed=filepath_mixed,
+            filepath_caller=filepath_caller,
+            filepath_agent=filepath_agent,
+            started_at=datetime.now(),
+            sample_rate=self._sample_rate,
+        )
+
+        # Start PJSUA2 recording if media pipeline is available
+        if media_pipeline and leg_ids:
+            for leg_id in leg_ids:
+                if filepath_mixed:
+                    media_pipeline.start_recording(leg_id, filepath_mixed)
+
+        self._active_recordings[call_id] = session
+        logger.info(f"🔴 Recording started: {call_id} → {filepath_mixed}")
+
+        # Safety timeout
+        asyncio.create_task(
+            self._recording_timeout(call_id),
+            name=f"rec_timeout_{call_id}",
+        )
+
+        return session
+
+    async def stop_recording(
+        self,
+        call_id: str,
+        media_pipeline=None,
+    ) -> Optional["RecordingSession"]:
+        """Stop recording a call and finalize the WAV file."""
+        session = self._active_recordings.pop(call_id, None)
+        if not session:
+            logger.warning(f"  No active recording for {call_id}")
+            return None
+
+        session.stopped_at = datetime.now()
+        session.duration_seconds = int(
+            (session.stopped_at - session.started_at).total_seconds()
+        )
+
+        # Stop PJSUA2 recording
+        if media_pipeline:
+            # The pipeline handles flushing and closing the WAV file
+            for leg_id in (session._leg_ids or []):
+                media_pipeline.stop_recording(leg_id)
+
+        # Calculate file size
+        if session.filepath_mixed and os.path.exists(session.filepath_mixed):
+            session.file_size_bytes = os.path.getsize(session.filepath_mixed)
+
+        # Store metadata
+        self._metadata.append(session.to_dict())
+
+        logger.info(
+            f"⏹ Recording stopped: {call_id} "
+            f"({session.duration_seconds}s, "
+            f"{session.file_size_bytes or 0} bytes)"
+        )
+        return session
+
+    async def _recording_timeout(self, call_id: str) -> None:
+        """Auto-stop recording after max duration."""
+        await asyncio.sleep(self._max_recording_seconds)
+        if call_id in self._active_recordings:
+            logger.warning(f"  Recording timeout for {call_id}, auto-stopping")
+            await self.stop_recording(call_id)
+
+    # ================================================================
+    # Queries
+    # ================================================================
+
+    def get_recording(self, call_id: str) -> Optional[dict]:
+        """Get recording metadata for a call."""
+        for meta in reversed(self._metadata):
+            if meta["call_id"] == call_id:
+                return meta
+        return None
+
+    def list_recordings(
+        self,
+        limit: int = 50,
+        offset: int = 0,
+    ) -> list[dict]:
+        """List recording metadata, newest first."""
+        sorted_meta = sorted(
+            self._metadata,
+            key=lambda m: m.get("started_at", ""),
+            reverse=True,
+        )
+        return sorted_meta[offset : offset + limit]
+
+    @property
+    def active_recording_count(self) -> int:
+        return len(self._active_recordings)
+
+    @property
+    def total_recordings(self) -> int:
+        return len(self._metadata)
+
+    def storage_usage_bytes(self) -> int:
+        """Calculate total storage used by recordings."""
+        total = 0
+        for root, _dirs, files in os.walk(self._storage_dir):
+            for f in files:
+                total += os.path.getsize(os.path.join(root, f))
+        return total
+
+
+class RecordingSession:
+    """Tracks a single active recording session."""
+
+    def __init__(
+        self,
+        call_id: str,
+        filepath_mixed: Optional[str] = None,
+        filepath_caller: Optional[str] = None,
+        filepath_agent: Optional[str] = None,
+        started_at: Optional[datetime] = None,
+        sample_rate: int = 16000,
+    ):
+        self.call_id = call_id
+        self.filepath_mixed = filepath_mixed
+        self.filepath_caller = filepath_caller
+        self.filepath_agent = filepath_agent
+        self.started_at = started_at or datetime.now()
+        self.stopped_at: Optional[datetime] = None
+        self.duration_seconds: Optional[int] = None
+        self.file_size_bytes: Optional[int] = None
+        self.sample_rate = sample_rate
+        self._leg_ids: list[str] = []
+
+    def to_dict(self) -> dict:
+        return {
+            "call_id": self.call_id,
+            "filepath_mixed": self.filepath_mixed,
+            "filepath_caller": self.filepath_caller,
+            "filepath_agent": self.filepath_agent,
+            "started_at": self.started_at.isoformat() if self.started_at else None,
+            "stopped_at": self.stopped_at.isoformat() if self.stopped_at else None,
+            "duration_seconds": self.duration_seconds,
+            "file_size_bytes": self.file_size_bytes,
+            "sample_rate": self.sample_rate,
+        }
--- a/services/transcription.py
+++ b/services/transcription.py
@@ -0,0 +1,161 @@
+"""
+Transcription Service — Speaches STT integration.
+
+Sends audio to your Speaches instances for real-time speech-to-text.
+Used by the Hold Slayer to understand IVR prompts and detect menu options.
+"""
+
+import io
+import logging
+from typing import Optional
+
+import httpx
+
+from config import SpeachesSettings
+
+logger = logging.getLogger(__name__)
+
+
+class TranscriptionService:
+    """
+    Client for Speaches STT service.
+
+    Speaches exposes an OpenAI-compatible API:
+    POST /v1/audio/transcriptions
+    """
+
+    def __init__(self, settings: SpeachesSettings):
+        self.settings = settings
+        self._client: Optional[httpx.AsyncClient] = None
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        """Get or create the HTTP client."""
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                base_url=self.settings.url,
+                timeout=httpx.Timeout(30.0, connect=5.0),
+            )
+        return self._client
+
+    async def transcribe(
+        self,
+        audio_data: bytes,
+        language: str = "en",
+        prompt: Optional[str] = None,
+    ) -> str:
+        """
+        Transcribe audio data to text.
+
+        Args:
+            audio_data: Raw PCM audio (16-bit signed, 16kHz, mono)
+            language: Language code (default: "en")
+            prompt: Optional context hint for better accuracy
+                    (e.g., "IVR menu options, phone banking")
+
+        Returns:
+            Transcribed text
+        """
+        client = await self._get_client()
+
+        # Convert raw PCM to WAV format for the API
+        wav_data = self._pcm_to_wav(audio_data)
+
+        try:
+            response = await client.post(
+                "/v1/audio/transcriptions",
+                files={"file": ("audio.wav", wav_data, "audio/wav")},
+                data={
+                    "model": self.settings.model,
+                    "language": language,
+                    "response_format": "text",
+                    **({"prompt": prompt} if prompt else {}),
+                },
+            )
+            response.raise_for_status()
+            text = response.text.strip()
+            logger.debug(f"Transcription: '{text}'")
+            return text
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Speaches API error: {e.response.status_code} {e.response.text}")
+            return ""
+        except httpx.ConnectError:
+            logger.error(f"Cannot connect to Speaches at {self.settings.url}")
+            return ""
+        except Exception as e:
+            logger.error(f"Transcription failed: {e}")
+            return ""
+
+    async def transcribe_stream(
+        self,
+        audio_data: bytes,
+        language: str = "en",
+    ):
+        """
+        Stream transcription — for real-time results.
+
+        Uses Speaches streaming endpoint if available,
+        falls back to chunked transcription.
+
+        Yields:
+            str: Partial transcription chunks
+        """
+        # For now, do chunked transcription
+        # TODO: Implement WebSocket streaming when Speaches supports it
+        chunk_size = 16000 * 2 * 3  # 3 seconds of 16kHz 16-bit mono
+
+        for i in range(0, len(audio_data), chunk_size):
+            chunk = audio_data[i:i + chunk_size]
+            if len(chunk) > 0:
+                text = await self.transcribe(chunk, language)
+                if text:
+                    yield text
+
+    async def close(self) -> None:
+        """Close the HTTP client."""
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+            self._client = None
+
+    @staticmethod
+    def _pcm_to_wav(pcm_data: bytes, sample_rate: int = 16000, channels: int = 1, sample_width: int = 2) -> bytes:
+        """
+        Convert raw PCM data to WAV format.
+
+        Args:
+            pcm_data: Raw PCM audio bytes
+            sample_rate: Sample rate in Hz (default: 16000)
+            channels: Number of channels (default: 1 = mono)
+            sample_width: Bytes per sample (default: 2 = 16-bit)
+
+        Returns:
+            WAV file as bytes
+        """
+        import struct
+
+        data_size = len(pcm_data)
+        file_size = 36 + data_size  # Header is 44 bytes, minus 8 for RIFF header
+
+        wav = io.BytesIO()
+
+        # RIFF header
+        wav.write(b"RIFF")
+        wav.write(struct.pack("<I", file_size))
+        wav.write(b"WAVE")
+
+        # fmt chunk
+        wav.write(b"fmt ")
+        wav.write(struct.pack("<I", 16))  # Chunk size
+        wav.write(struct.pack("<H", 1))  # PCM format
+        wav.write(struct.pack("<H", channels))
+        wav.write(struct.pack("<I", sample_rate))
+        wav.write(struct.pack("<I", sample_rate * channels * sample_width))  # Byte rate
+        wav.write(struct.pack("<H", channels * sample_width))  # Block align
+        wav.write(struct.pack("<H", sample_width * 8))  # Bits per sample
+
+        # data chunk
+        wav.write(b"data")
+        wav.write(struct.pack("<I", data_size))
+        wav.write(pcm_data)
+
+        return wav.getvalue()
				`@@ -0,0 +1 @@`
				`"""AI services — hold detection, transcription, classification, and more."""`