feat: add initial Hold Slayer AI telephony gateway implementation

Complete project scaffolding and core implementation of an AI-powered
telephony system that calls companies, navigates IVR menus, waits on
hold, and transfers to the user when a human answers.

Key components:
- FastAPI server with REST API, WebSocket, and MCP (SSE) interfaces
- SIP/VoIP call management via PJSUA2 with RTP audio streaming
- LLM-powered IVR navigation using OpenAI/Anthropic with tool calling
- Hold detection service combining audio analysis and silence detection
- Real-time STT (Whisper/Deepgram) and TTS (OpenAI/Piper) pipelines
- Call recording with per-channel and mixed audio capture
- Event bus (asyncio pub/sub) for real-time client updates
- Web dashboard with live call monitoring
- SQLite persistence via SQLAlchemy with call history and analytics
- Notification support (email, SMS, webhook, desktop)
- Docker Compose deployment with Opal VoIP and Opal Media containers
- Comprehensive test suite with unit, integration, and E2E tests
- Simplified .gitignore and full project documentation in README
This commit is contained in:
2026-03-21 19:23:26 +00:00
parent c9ff60702b
commit ecf37658ce
56 changed files with 11601 additions and 164 deletions

View File

@@ -0,0 +1,339 @@
"""
Call Flow Learner — Builds and refines call flows from exploration data.
When Hold Slayer runs in exploration mode, it discovers IVR steps.
This service takes those discoveries and:
1. Builds a CallFlow tree that can be reused next time
2. Merges new discoveries into existing flows (refining them)
3. Uses LLM to label steps and infer menu structure
Over time, each phone number builds up a reliable call flow
that makes future calls faster and more accurate.
"""
import logging
import re
from datetime import datetime
from typing import Any, Optional
from models.call_flow import ActionType, CallFlow, CallFlowStep
logger = logging.getLogger(__name__)
class CallFlowLearner:
"""
Learns IVR call flows from exploration data.
Usage:
learner = CallFlowLearner(llm_client=llm)
# After an exploration call completes:
flow = await learner.build_flow(
phone_number="+18005551234",
discovered_steps=steps_from_exploration,
intent="cancel my card",
)
# Next time we call, merge new discoveries:
updated = await learner.merge_discoveries(
existing_flow=flow,
new_steps=new_discoveries,
)
"""
def __init__(self, llm_client=None):
self._llm = llm_client
# ================================================================
# Build Flow from Exploration
# ================================================================
async def build_flow(
self,
phone_number: str,
discovered_steps: list[dict],
intent: Optional[str] = None,
company_name: Optional[str] = None,
) -> CallFlow:
"""
Build a CallFlow from exploration discoveries.
Args:
phone_number: The number that was called.
discovered_steps: List of step dicts from exploration mode:
[{"timestamp": ..., "audio_type": "ivr_prompt",
"transcript": "Press 1 for...", "action_taken": {"dtmf": "1"}}, ...]
intent: What the caller was trying to accomplish.
company_name: Optional company name for labeling.
Returns:
A CallFlow that can be stored and reused.
"""
logger.info(
f"🧠 Building call flow from {len(discovered_steps)} discoveries "
f"for {phone_number}"
)
# Phase 1: Extract meaningful steps (skip silence, ringing)
meaningful = [
s for s in discovered_steps
if s.get("audio_type") in ("ivr_prompt", "live_human", "music")
or s.get("action_taken")
]
if not meaningful:
logger.warning(" No meaningful steps discovered")
return self._empty_flow(phone_number, company_name)
# Phase 2: Convert discoveries to CallFlowSteps
flow_steps = []
for i, step in enumerate(meaningful):
flow_step = self._discovery_to_step(step, i, meaningful)
if flow_step:
flow_steps.append(flow_step)
# Phase 3: Link steps together (next_step pointers)
for i, step in enumerate(flow_steps[:-1]):
step.next_step = flow_steps[i + 1].id
# Phase 4: Use LLM to enhance step labels if available
if self._llm and flow_steps:
flow_steps = await self._llm_enhance_steps(flow_steps, intent)
# Build the flow
name = company_name or self._guess_company_name(phone_number)
flow = CallFlow(
id=f"flow_{phone_number.replace('+', '')}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
name=f"{name}{intent or 'General'}",
phone_number=phone_number,
description=f"Auto-learned flow for {name}. Intent: {intent or 'general'}",
steps=flow_steps,
tags=["auto-learned"],
notes=f"Learned from exploration on {datetime.now().isoformat()}",
times_used=1,
last_used=datetime.now(),
)
logger.info(
f" ✅ Built flow '{flow.name}' with {len(flow_steps)} steps"
)
return flow
def _discovery_to_step(
self,
discovery: dict,
index: int,
all_discoveries: list[dict],
) -> Optional[CallFlowStep]:
"""Convert a single exploration discovery to a CallFlowStep."""
audio_type = discovery.get("audio_type", "")
transcript = discovery.get("transcript", "")
action_taken = discovery.get("action_taken")
step_id = f"step_{index:03d}"
if audio_type == "ivr_prompt" and action_taken:
# IVR menu where we pressed a button
dtmf = action_taken.get("dtmf", "")
return CallFlowStep(
id=step_id,
description=self._summarize_menu(transcript) or f"IVR menu (pressed {dtmf})",
action=ActionType.DTMF,
action_value=dtmf,
expect=self._extract_expect_pattern(transcript),
timeout=15,
)
elif audio_type == "ivr_prompt" and not action_taken:
# IVR prompt we just listened to
return CallFlowStep(
id=step_id,
description=self._summarize_menu(transcript) or "IVR announcement",
action=ActionType.LISTEN,
timeout=30,
)
elif audio_type == "music":
# Hold music
return CallFlowStep(
id=step_id,
description="Hold music — waiting for agent",
action=ActionType.HOLD,
timeout=3600,
)
elif audio_type == "live_human":
# Human detected — this is the transfer point
return CallFlowStep(
id=step_id,
description="Live agent detected — transfer",
action=ActionType.TRANSFER,
action_value="preferred_device",
)
return None
# ================================================================
# Merge New Discoveries into Existing Flow
# ================================================================
async def merge_discoveries(
self,
existing_flow: CallFlow,
new_steps: list[dict],
intent: Optional[str] = None,
) -> CallFlow:
"""
Merge new exploration discoveries into an existing flow.
This refines the flow over time — updating timeouts,
confirming step order, adding alternative paths.
"""
logger.info(
f"🔄 Merging {len(new_steps)} new discoveries into "
f"flow '{existing_flow.name}'"
)
# Build a new flow from the discoveries
new_flow = await self.build_flow(
phone_number=existing_flow.phone_number,
discovered_steps=new_steps,
intent=intent,
)
# Simple merge strategy: keep existing steps but update timeouts
# and add any new steps that weren't in the original
existing_by_action = {
(s.action, s.action_value): s for s in existing_flow.steps
}
for new_step in new_flow.steps:
key = (new_step.action, new_step.action_value)
if key in existing_by_action:
# Update timeout to be the average
old_step = existing_by_action[key]
if old_step.timeout and new_step.timeout:
old_step.timeout = int(
(old_step.timeout + new_step.timeout) / 2
)
# New steps that don't exist are noted but not auto-added
# (to avoid corrupting a working flow)
# Update metadata
existing_flow.times_used = (existing_flow.times_used or 0) + 1
existing_flow.last_used = datetime.now()
logger.info(f" ✅ Merged. Flow now has {len(existing_flow.steps)} steps")
return existing_flow
# ================================================================
# LLM Enhancement
# ================================================================
async def _llm_enhance_steps(
self,
steps: list[CallFlowStep],
intent: Optional[str],
) -> list[CallFlowStep]:
"""Use LLM to improve step descriptions and structure."""
if not self._llm:
return steps
try:
# Build a summary of the steps for the LLM
step_descriptions = []
for s in steps:
desc = f"- {s.action.value}"
if s.action_value:
desc += f" ({s.action_value})"
if s.description:
desc += f": {s.description}"
step_descriptions.append(desc)
prompt = (
f"These are steps discovered while navigating a phone IVR system.\n"
f"Intent: {intent or 'general inquiry'}\n\n"
f"Steps:\n" + "\n".join(step_descriptions) + "\n\n"
f"For each step, provide a clear, concise description of what "
f"that step does. Return JSON array of objects with 'step_index' "
f"and 'description' fields."
)
result = await self._llm.chat_json(
prompt,
system="You are labeling IVR phone menu steps for a call flow database.",
)
# Apply LLM descriptions
if isinstance(result, list):
for item in result:
idx = item.get("step_index", -1)
desc = item.get("description", "")
if 0 <= idx < len(steps) and desc:
steps[idx].description = desc
elif isinstance(result, dict) and "steps" in result:
for item in result["steps"]:
idx = item.get("step_index", -1)
desc = item.get("description", "")
if 0 <= idx < len(steps) and desc:
steps[idx].description = desc
except Exception as e:
logger.warning(f" LLM enhancement failed (non-fatal): {e}")
return steps
# ================================================================
# Helpers
# ================================================================
@staticmethod
def _summarize_menu(transcript: str) -> Optional[str]:
"""Create a short summary of an IVR menu transcript."""
if not transcript:
return None
# Count how many options
options = re.findall(r'press\s+\d+', transcript.lower())
if options:
return f"IVR menu with {len(options)} options"
# Truncate long transcripts
if len(transcript) > 80:
return transcript[:77] + "..."
return transcript
@staticmethod
def _extract_expect_pattern(transcript: str) -> Optional[str]:
"""Extract a regex pattern to match this prompt next time."""
if not transcript:
return None
# Find the most distinctive phrase (>4 words, not generic)
words = transcript.split()
if len(words) >= 4:
# Use first meaningful phrase
phrase = " ".join(words[:6])
# Escape for regex
return re.escape(phrase.lower())
return None
@staticmethod
def _guess_company_name(phone_number: str) -> str:
"""Guess company name from phone number (placeholder)."""
# In production, this would do a reverse lookup
return f"Company {phone_number[-4:]}"
@staticmethod
def _empty_flow(phone_number: str, company_name: Optional[str]) -> CallFlow:
"""Create an empty flow placeholder."""
return CallFlow(
id=f"flow_{phone_number.replace('+', '')}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
name=f"{company_name or phone_number} — Empty",
phone_number=phone_number,
description="Empty flow — no meaningful steps discovered",
steps=[],
tags=["auto-learned", "empty"],
)