feat: add initial Hold Slayer AI telephony gateway implementation
Complete project scaffolding and core implementation of an AI-powered telephony system that calls companies, navigates IVR menus, waits on hold, and transfers to the user when a human answers. Key components: - FastAPI server with REST API, WebSocket, and MCP (SSE) interfaces - SIP/VoIP call management via PJSUA2 with RTP audio streaming - LLM-powered IVR navigation using OpenAI/Anthropic with tool calling - Hold detection service combining audio analysis and silence detection - Real-time STT (Whisper/Deepgram) and TTS (OpenAI/Piper) pipelines - Call recording with per-channel and mixed audio capture - Event bus (asyncio pub/sub) for real-time client updates - Web dashboard with live call monitoring - SQLite persistence via SQLAlchemy with call history and analytics - Notification support (email, SMS, webhook, desktop) - Docker Compose deployment with Opal VoIP and Opal Media containers - Comprehensive test suite with unit, integration, and E2E tests - Simplified .gitignore and full project documentation in README
This commit is contained in:
717
services/hold_slayer.py
Normal file
717
services/hold_slayer.py
Normal file
@@ -0,0 +1,717 @@
|
||||
"""
|
||||
Hold Slayer Service — The main event.
|
||||
|
||||
Navigate IVR trees, wait on hold, detect when a human picks up,
|
||||
and transfer you in. This is the state machine that orchestrates
|
||||
the entire hold-slaying process.
|
||||
|
||||
Two modes:
|
||||
1. run_with_flow(): Follow a stored call flow tree (fast, reliable)
|
||||
2. run_exploration(): No stored flow — listen, transcribe, and figure it out
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from config import Settings
|
||||
from core.call_manager import CallManager
|
||||
from core.sip_engine import SIPEngine
|
||||
from models.call import ActiveCall, AudioClassification, CallStatus, ClassificationResult
|
||||
from models.call_flow import ActionType, CallFlow, CallFlowStep
|
||||
from models.events import EventType, GatewayEvent
|
||||
from services.audio_classifier import AudioClassifier
|
||||
from services.transcription import TranscriptionService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# LLM client is optional — imported at use time
|
||||
_llm_client = None
|
||||
|
||||
|
||||
def _get_llm():
|
||||
"""Lazy-load LLM client (optional dependency)."""
|
||||
global _llm_client
|
||||
if _llm_client is None:
|
||||
try:
|
||||
from config import get_settings
|
||||
from services.llm_client import LLMClient
|
||||
|
||||
settings = get_settings()
|
||||
_llm_client = LLMClient(
|
||||
base_url=settings.llm.base_url,
|
||||
model=settings.llm.model,
|
||||
api_key=settings.llm.api_key,
|
||||
timeout=settings.llm.timeout,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"LLM client not available: {e}")
|
||||
_llm_client = False # Sentinel: don't retry
|
||||
return _llm_client if _llm_client is not False else None
|
||||
|
||||
|
||||
class HoldSlayerService:
|
||||
"""
|
||||
The Hold Slayer.
|
||||
|
||||
Navigates IVR menus, waits on hold, detects live humans,
|
||||
and transfers the call to your device.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
gateway, # AIPSTNGateway (avoid circular import)
|
||||
call_manager: CallManager,
|
||||
sip_engine: SIPEngine,
|
||||
classifier: AudioClassifier,
|
||||
transcription: TranscriptionService,
|
||||
settings: Settings,
|
||||
):
|
||||
self.gateway = gateway
|
||||
self.call_manager = call_manager
|
||||
self.sip_engine = sip_engine
|
||||
self.classifier = classifier
|
||||
self.transcription = transcription
|
||||
self.settings = settings
|
||||
|
||||
async def run(
|
||||
self,
|
||||
call: ActiveCall,
|
||||
sip_leg_id: str,
|
||||
call_flow_id: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Main entry point. Run the Hold Slayer on a call.
|
||||
|
||||
Args:
|
||||
call: The active call to work on
|
||||
sip_leg_id: SIP leg ID for the PSTN call
|
||||
call_flow_id: Optional stored call flow to follow
|
||||
|
||||
Returns:
|
||||
True if successfully transferred to user, False otherwise
|
||||
"""
|
||||
logger.info(f"🗡️ Hold Slayer activated for {call.remote_number}")
|
||||
logger.info(f" Intent: {call.intent}")
|
||||
logger.info(f" Call Flow: {call_flow_id or 'exploration mode'}")
|
||||
|
||||
try:
|
||||
# Wait for call to be connected
|
||||
await self._wait_for_connection(call, timeout=60)
|
||||
|
||||
if call_flow_id:
|
||||
# Load the stored call flow from the database
|
||||
flow = await self._load_call_flow(call_flow_id)
|
||||
if flow:
|
||||
return await self.run_with_flow(call, sip_leg_id, flow)
|
||||
else:
|
||||
logger.warning(f"Call flow '{call_flow_id}' not found, switching to exploration")
|
||||
|
||||
# No flow or flow not found — explore
|
||||
return await self.run_exploration(call, sip_leg_id)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info(f"Hold Slayer cancelled for {call.id}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Hold Slayer error: {e}", exc_info=True)
|
||||
await self.call_manager.update_status(call.id, CallStatus.FAILED)
|
||||
return False
|
||||
|
||||
# ================================================================
|
||||
# Mode 1: Follow a Stored Call Flow
|
||||
# ================================================================
|
||||
|
||||
async def run_with_flow(
|
||||
self,
|
||||
call: ActiveCall,
|
||||
sip_leg_id: str,
|
||||
flow: CallFlow,
|
||||
) -> bool:
|
||||
"""
|
||||
Navigate using a stored call flow tree.
|
||||
Falls back to exploration for unknown steps.
|
||||
"""
|
||||
logger.info(f"📋 Following call flow: {flow.name}")
|
||||
steps = flow.steps_by_id()
|
||||
current_step_id = flow.steps[0].id if flow.steps else None
|
||||
|
||||
while current_step_id:
|
||||
step = steps.get(current_step_id)
|
||||
if not step:
|
||||
logger.error(f"Step '{current_step_id}' not found in flow")
|
||||
break
|
||||
|
||||
call.current_step_id = current_step_id
|
||||
logger.info(f"📍 Step: {step.description}")
|
||||
|
||||
await self.call_manager.event_bus.publish(GatewayEvent(
|
||||
type=EventType.IVR_STEP,
|
||||
call_id=call.id,
|
||||
data={"step_id": step.id, "description": step.description, "action": step.action.value},
|
||||
message=f"📍 IVR Step: {step.description}",
|
||||
))
|
||||
|
||||
# === Execute the step based on its action type ===
|
||||
|
||||
if step.action == ActionType.HOLD:
|
||||
# HOLD MODE: Audio classifier takes over
|
||||
await self.call_manager.update_status(call.id, CallStatus.ON_HOLD)
|
||||
logger.info(f"⏳ On hold. Activating hold detection...")
|
||||
|
||||
human_detected = await self._wait_for_human(
|
||||
call, sip_leg_id, timeout=step.timeout
|
||||
)
|
||||
|
||||
if human_detected:
|
||||
current_step_id = step.next_step
|
||||
else:
|
||||
logger.warning("⏰ Hold timeout reached!")
|
||||
break
|
||||
|
||||
elif step.action == ActionType.DTMF:
|
||||
# Wait for the expected prompt, then send DTMF
|
||||
await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)
|
||||
|
||||
if step.expect:
|
||||
heard = await self._wait_for_prompt(
|
||||
call, sip_leg_id, step.expect, step.timeout
|
||||
)
|
||||
if not heard and step.fallback_step:
|
||||
logger.info(f"⚠️ Didn't hear expected prompt, falling back")
|
||||
current_step_id = step.fallback_step
|
||||
continue
|
||||
|
||||
# Send the DTMF digits
|
||||
if step.action_value:
|
||||
await self.sip_engine.send_dtmf(sip_leg_id, step.action_value)
|
||||
logger.info(f"📱 Pressed: {step.action_value}")
|
||||
|
||||
await self.call_manager.event_bus.publish(GatewayEvent(
|
||||
type=EventType.IVR_DTMF_SENT,
|
||||
call_id=call.id,
|
||||
data={"digits": step.action_value, "step": step.id},
|
||||
message=f"📱 DTMF sent: {step.action_value}",
|
||||
))
|
||||
|
||||
# Small delay after DTMF for the IVR to process
|
||||
await asyncio.sleep(2.0)
|
||||
current_step_id = step.next_step
|
||||
|
||||
elif step.action == ActionType.WAIT:
|
||||
# Just wait for a prompt
|
||||
if step.expect:
|
||||
await self._wait_for_prompt(
|
||||
call, sip_leg_id, step.expect, step.timeout
|
||||
)
|
||||
else:
|
||||
await asyncio.sleep(step.timeout)
|
||||
current_step_id = step.next_step
|
||||
|
||||
elif step.action == ActionType.LISTEN:
|
||||
# Listen and decide — regex first, LLM fallback
|
||||
await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)
|
||||
|
||||
transcript = await self._listen_for_menu(
|
||||
call, sip_leg_id, step.timeout
|
||||
)
|
||||
|
||||
# Phase 1: Try regex-based keyword matching (fast, no API call)
|
||||
decision = self._decide_menu_option(
|
||||
transcript, call.intent or "", step.expect
|
||||
)
|
||||
|
||||
# Phase 2: LLM fallback if regex couldn't decide
|
||||
if not decision and transcript:
|
||||
llm = _get_llm()
|
||||
if llm:
|
||||
try:
|
||||
logger.info("🤖 Regex inconclusive, asking LLM...")
|
||||
llm_result = await llm.analyze_ivr_menu(
|
||||
transcript=transcript,
|
||||
intent=call.intent or "",
|
||||
previous_selections=list(call.dtmf_history) if hasattr(call, 'dtmf_history') else None,
|
||||
)
|
||||
decision = llm_result.get("digit")
|
||||
if decision:
|
||||
confidence = llm_result.get("confidence", 0)
|
||||
reason = llm_result.get("reason", "")
|
||||
logger.info(
|
||||
f"🤖 LLM decided: press {decision} "
|
||||
f"(confidence={confidence}, reason='{reason}')"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"🤖 LLM fallback failed: {e}")
|
||||
|
||||
if decision:
|
||||
await self.sip_engine.send_dtmf(sip_leg_id, decision)
|
||||
logger.info(f"🧠 Decided: press {decision} (heard: '{transcript[:60]}...')")
|
||||
else:
|
||||
# Default: press 0 for agent
|
||||
await self.sip_engine.send_dtmf(sip_leg_id, "0")
|
||||
logger.info(f"🧠 No clear match, pressing 0 for agent")
|
||||
|
||||
await asyncio.sleep(2.0)
|
||||
current_step_id = step.next_step
|
||||
|
||||
elif step.action == ActionType.SPEAK:
|
||||
# Say something into the call (TTS)
|
||||
# TODO: Implement TTS integration
|
||||
logger.info(f"🗣️ Would say: '{step.action_value}' (TTS not yet implemented)")
|
||||
await asyncio.sleep(3.0)
|
||||
current_step_id = step.next_step
|
||||
|
||||
elif step.action == ActionType.TRANSFER:
|
||||
# We did it! Transfer to user's device
|
||||
await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)
|
||||
logger.info(f"🚨 TRANSFERRING TO {step.action_value}")
|
||||
|
||||
device_target = step.action_value or call.device or self.settings.hold_slayer.default_transfer_device
|
||||
await self.gateway.transfer_call(call.id, device_target)
|
||||
return True
|
||||
|
||||
else:
|
||||
logger.warning(f"Unknown action type: {step.action}")
|
||||
current_step_id = step.next_step
|
||||
|
||||
return False
|
||||
|
||||
# ================================================================
|
||||
# Mode 2: Exploration (No Stored Flow)
|
||||
# ================================================================
|
||||
|
||||
async def run_exploration(
|
||||
self,
|
||||
call: ActiveCall,
|
||||
sip_leg_id: str,
|
||||
) -> bool:
|
||||
"""
|
||||
No stored flow — explore the IVR blind.
|
||||
Records what it discovers so we can build a flow for next time.
|
||||
"""
|
||||
logger.info(f"🔍 Exploration mode: discovering IVR for {call.remote_number}")
|
||||
await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)
|
||||
|
||||
discovered_steps: list[dict] = []
|
||||
max_time = self.settings.hold_slayer.max_hold_time
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < max_time:
|
||||
# Check if call is still active
|
||||
current_call = self.call_manager.get_call(call.id)
|
||||
if not current_call or current_call.status in (
|
||||
CallStatus.COMPLETED, CallStatus.FAILED, CallStatus.CANCELLED
|
||||
):
|
||||
break
|
||||
|
||||
# Get audio and classify
|
||||
audio_chunk = b""
|
||||
try:
|
||||
async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
|
||||
audio_chunk += chunk
|
||||
if len(audio_chunk) >= 16000 * 2 * 3: # 3 seconds
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Audio stream error: {e}")
|
||||
await asyncio.sleep(1.0)
|
||||
continue
|
||||
|
||||
if not audio_chunk:
|
||||
await asyncio.sleep(1.0)
|
||||
continue
|
||||
|
||||
# Classify the audio
|
||||
classification = self.classifier.classify_chunk(audio_chunk)
|
||||
self.classifier.update_history(classification.audio_type)
|
||||
await self.call_manager.add_classification(call.id, classification)
|
||||
|
||||
# Transcribe if it sounds like speech
|
||||
transcript = ""
|
||||
if classification.audio_type in (
|
||||
AudioClassification.IVR_PROMPT,
|
||||
AudioClassification.LIVE_HUMAN,
|
||||
):
|
||||
transcript = await self.transcription.transcribe(
|
||||
audio_chunk,
|
||||
prompt="Phone IVR menu, customer service, press 1 for..."
|
||||
)
|
||||
if transcript:
|
||||
await self.call_manager.add_transcript(call.id, transcript)
|
||||
|
||||
# Record discovery
|
||||
discovered_steps.append({
|
||||
"timestamp": time.time(),
|
||||
"audio_type": classification.audio_type.value,
|
||||
"confidence": classification.confidence,
|
||||
"transcript": transcript,
|
||||
"action_taken": None,
|
||||
})
|
||||
|
||||
# === Decision Logic ===
|
||||
|
||||
if classification.audio_type == AudioClassification.LIVE_HUMAN:
|
||||
# HUMAN DETECTED! Transfer!
|
||||
logger.info("🚨 LIVE HUMAN DETECTED!")
|
||||
await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)
|
||||
|
||||
device = call.device or self.settings.hold_slayer.default_transfer_device
|
||||
await self.gateway.transfer_call(call.id, device)
|
||||
|
||||
logger.info(f"📋 Discovered {len(discovered_steps)} IVR steps")
|
||||
return True
|
||||
|
||||
elif classification.audio_type == AudioClassification.MUSIC:
|
||||
# On hold — just keep monitoring
|
||||
if current_call.status != CallStatus.ON_HOLD:
|
||||
await self.call_manager.update_status(call.id, CallStatus.ON_HOLD)
|
||||
|
||||
# Check for hold→human transition
|
||||
if self.classifier.detect_hold_to_human_transition():
|
||||
logger.info("🚨 Hold-to-human transition detected!")
|
||||
await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)
|
||||
|
||||
device = call.device or self.settings.hold_slayer.default_transfer_device
|
||||
await self.gateway.transfer_call(call.id, device)
|
||||
return True
|
||||
|
||||
elif classification.audio_type == AudioClassification.IVR_PROMPT and transcript:
|
||||
# IVR menu — try to navigate
|
||||
decision = self._decide_menu_option(
|
||||
transcript, call.intent or "", None
|
||||
)
|
||||
if decision:
|
||||
await self.sip_engine.send_dtmf(sip_leg_id, decision)
|
||||
discovered_steps[-1]["action_taken"] = {"dtmf": decision}
|
||||
logger.info(f"🧠 Exploration: pressed {decision}")
|
||||
await asyncio.sleep(2.0)
|
||||
else:
|
||||
# Try pressing 0 for agent
|
||||
await self.sip_engine.send_dtmf(sip_leg_id, "0")
|
||||
discovered_steps[-1]["action_taken"] = {"dtmf": "0", "reason": "default_agent"}
|
||||
logger.info("🧠 Exploration: pressed 0 (trying for agent)")
|
||||
await asyncio.sleep(2.0)
|
||||
|
||||
elif classification.audio_type == AudioClassification.SILENCE:
|
||||
# Silence — wait a bit
|
||||
await asyncio.sleep(2.0)
|
||||
|
||||
elif classification.audio_type == AudioClassification.RINGING:
|
||||
# Still ringing
|
||||
await asyncio.sleep(1.0)
|
||||
|
||||
logger.warning(f"Hold Slayer timed out after {max_time}s")
|
||||
return False
|
||||
|
||||
# ================================================================
|
||||
# Core Detection Methods
|
||||
# ================================================================
|
||||
|
||||
async def _wait_for_human(
|
||||
self,
|
||||
call: ActiveCall,
|
||||
sip_leg_id: str,
|
||||
timeout: int = 7200,
|
||||
) -> bool:
|
||||
"""
|
||||
Wait on hold until a live human is detected.
|
||||
|
||||
Continuously classifies audio and watches for the
|
||||
music → speech transition.
|
||||
"""
|
||||
check_interval = self.settings.hold_slayer.hold_check_interval
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
# Check if call is still active
|
||||
current_call = self.call_manager.get_call(call.id)
|
||||
if not current_call or current_call.status in (
|
||||
CallStatus.COMPLETED, CallStatus.FAILED, CallStatus.CANCELLED
|
||||
):
|
||||
return False
|
||||
|
||||
# Get audio chunk
|
||||
audio_chunk = b""
|
||||
try:
|
||||
async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
|
||||
audio_chunk += chunk
|
||||
if len(audio_chunk) >= int(16000 * 2 * check_interval):
|
||||
break
|
||||
except Exception:
|
||||
await asyncio.sleep(check_interval)
|
||||
continue
|
||||
|
||||
if not audio_chunk:
|
||||
await asyncio.sleep(check_interval)
|
||||
continue
|
||||
|
||||
# Classify
|
||||
result = self.classifier.classify_chunk(audio_chunk)
|
||||
self.classifier.update_history(result.audio_type)
|
||||
await self.call_manager.add_classification(call.id, result)
|
||||
|
||||
# Check for human
|
||||
if result.audio_type == AudioClassification.LIVE_HUMAN:
|
||||
# Verify with transcription
|
||||
transcript = await self.transcription.transcribe(audio_chunk)
|
||||
if transcript:
|
||||
await self.call_manager.add_transcript(call.id, transcript)
|
||||
# If we got meaningful speech, it's probably a real person
|
||||
if len(transcript.split()) >= 3:
|
||||
logger.info(f"🚨 Human confirmed! Said: '{transcript[:100]}'")
|
||||
return True
|
||||
|
||||
# Check for the music→speech transition pattern
|
||||
if self.classifier.detect_hold_to_human_transition():
|
||||
logger.info("🚨 Hold-to-human transition detected!")
|
||||
return True
|
||||
|
||||
# Log progress periodically
|
||||
elapsed = int(time.time() - start_time)
|
||||
if elapsed > 0 and elapsed % 60 == 0:
|
||||
logger.info(
|
||||
f"⏳ Still on hold... {elapsed}s "
|
||||
f"(audio: {result.audio_type.value}, {result.confidence:.0%})"
|
||||
)
|
||||
|
||||
return False
|
||||
|
||||
async def _wait_for_prompt(
|
||||
self,
|
||||
call: ActiveCall,
|
||||
sip_leg_id: str,
|
||||
expected_pattern: str,
|
||||
timeout: int = 30,
|
||||
) -> bool:
|
||||
"""
|
||||
Wait for an expected IVR prompt.
|
||||
|
||||
Listens, transcribes, and checks if the transcript matches
|
||||
the expected pattern (regex or keywords).
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
audio_chunk = b""
|
||||
try:
|
||||
async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
|
||||
audio_chunk += chunk
|
||||
if len(audio_chunk) >= 16000 * 2 * 3: # 3 seconds
|
||||
break
|
||||
except Exception:
|
||||
await asyncio.sleep(1.0)
|
||||
continue
|
||||
|
||||
if not audio_chunk:
|
||||
await asyncio.sleep(1.0)
|
||||
continue
|
||||
|
||||
# Classify first
|
||||
result = self.classifier.classify_chunk(audio_chunk)
|
||||
if result.audio_type not in (
|
||||
AudioClassification.IVR_PROMPT,
|
||||
AudioClassification.LIVE_HUMAN,
|
||||
):
|
||||
continue
|
||||
|
||||
# Transcribe
|
||||
transcript = await self.transcription.transcribe(audio_chunk)
|
||||
if not transcript:
|
||||
continue
|
||||
|
||||
await self.call_manager.add_transcript(call.id, transcript)
|
||||
|
||||
# Check if it matches expected pattern
|
||||
try:
|
||||
if re.search(expected_pattern, transcript, re.IGNORECASE):
|
||||
logger.info(f"✅ Heard expected: '{transcript[:80]}'")
|
||||
return True
|
||||
except re.error:
|
||||
# Treat as keyword search if regex is invalid
|
||||
if expected_pattern.lower() in transcript.lower():
|
||||
logger.info(f"✅ Heard expected: '{transcript[:80]}'")
|
||||
return True
|
||||
|
||||
logger.warning(f"⚠️ Didn't hear expected prompt within {timeout}s")
|
||||
return False
|
||||
|
||||
async def _listen_for_menu(
|
||||
self,
|
||||
call: ActiveCall,
|
||||
sip_leg_id: str,
|
||||
timeout: int = 30,
|
||||
) -> str:
|
||||
"""Listen for an IVR menu and return the full transcript."""
|
||||
transcript_parts: list[str] = []
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
audio_chunk = b""
|
||||
try:
|
||||
async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
|
||||
audio_chunk += chunk
|
||||
if len(audio_chunk) >= 16000 * 2 * 5: # 5 seconds
|
||||
break
|
||||
except Exception:
|
||||
await asyncio.sleep(1.0)
|
||||
continue
|
||||
|
||||
if not audio_chunk:
|
||||
break
|
||||
|
||||
result = self.classifier.classify_chunk(audio_chunk)
|
||||
|
||||
# If we're getting silence after speech, the menu prompt is done
|
||||
if result.audio_type == AudioClassification.SILENCE and transcript_parts:
|
||||
break
|
||||
|
||||
if result.audio_type in (
|
||||
AudioClassification.IVR_PROMPT,
|
||||
AudioClassification.LIVE_HUMAN,
|
||||
):
|
||||
text = await self.transcription.transcribe(audio_chunk)
|
||||
if text:
|
||||
transcript_parts.append(text)
|
||||
|
||||
full_transcript = " ".join(transcript_parts)
|
||||
if full_transcript:
|
||||
await self.call_manager.add_transcript(call.id, full_transcript)
|
||||
|
||||
return full_transcript
|
||||
|
||||
async def _wait_for_connection(self, call: ActiveCall, timeout: int = 60) -> None:
|
||||
"""Wait for the call to be connected (answered)."""
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
current = self.call_manager.get_call(call.id)
|
||||
if not current:
|
||||
raise RuntimeError(f"Call {call.id} disappeared")
|
||||
if current.status in (CallStatus.CONNECTED, CallStatus.NAVIGATING_IVR):
|
||||
return
|
||||
if current.status in (CallStatus.FAILED, CallStatus.CANCELLED):
|
||||
raise RuntimeError(f"Call {call.id} failed: {current.status}")
|
||||
await asyncio.sleep(0.5)
|
||||
raise TimeoutError(f"Call {call.id} not connected within {timeout}s")
|
||||
|
||||
# ================================================================
|
||||
# Menu Navigation Logic
|
||||
# ================================================================
|
||||
|
||||
def _decide_menu_option(
|
||||
self,
|
||||
transcript: str,
|
||||
intent: str,
|
||||
expected_options: Optional[str],
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Decide which menu option to select based on transcript and intent.
|
||||
|
||||
Simple keyword-based matching. This is where an LLM integration
|
||||
would massively improve navigation accuracy.
|
||||
|
||||
Returns:
|
||||
DTMF digit(s) to press, or None if can't decide
|
||||
"""
|
||||
transcript_lower = transcript.lower()
|
||||
intent_lower = intent.lower()
|
||||
|
||||
# Common IVR patterns: "press 1 for X, press 2 for Y"
|
||||
# Extract options
|
||||
options = re.findall(
|
||||
r'(?:press|dial|say)\s+(\d+)\s+(?:for|to)\s+(.+?)(?:\.|,|press|dial|$)',
|
||||
transcript_lower,
|
||||
)
|
||||
|
||||
if not options:
|
||||
# Try alternate patterns: "for X, press 1"
|
||||
options = re.findall(
|
||||
r'for\s+(.+?),?\s*(?:press|dial)\s+(\d+)',
|
||||
transcript_lower,
|
||||
)
|
||||
# Swap order to be (digit, description)
|
||||
options = [(digit, desc) for desc, digit in options]
|
||||
|
||||
if not options:
|
||||
return None
|
||||
|
||||
# Score each option against the intent
|
||||
best_match = None
|
||||
best_score = 0
|
||||
|
||||
# Keywords that map intents to IVR options
|
||||
intent_keywords = {
|
||||
"cancel": ["cancel", "close", "end", "terminate"],
|
||||
"dispute": ["dispute", "charge", "billing", "transaction", "statement"],
|
||||
"balance": ["balance", "account", "summary"],
|
||||
"agent": ["agent", "representative", "operator", "speak", "person", "human"],
|
||||
"payment": ["payment", "pay", "bill"],
|
||||
"card": ["card", "credit", "debit"],
|
||||
"fraud": ["fraud", "unauthorized", "stolen", "lost"],
|
||||
"transfer": ["transfer", "move", "send"],
|
||||
}
|
||||
|
||||
for digit, description in options:
|
||||
score = 0
|
||||
|
||||
# Direct keyword match in description
|
||||
for keyword_group, keywords in intent_keywords.items():
|
||||
if any(kw in intent_lower for kw in keywords):
|
||||
if any(kw in description for kw in keywords):
|
||||
score += 10
|
||||
|
||||
# Fuzzy: any word overlap between intent and description
|
||||
intent_words = set(intent_lower.split())
|
||||
desc_words = set(description.split())
|
||||
overlap = intent_words & desc_words
|
||||
score += len(overlap) * 3
|
||||
|
||||
# "Speak to agent" is usually what we want if nothing else matches
|
||||
if any(w in description for w in ["agent", "representative", "operator", "person"]):
|
||||
score += 5
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_match = digit
|
||||
|
||||
if best_match and best_score >= 3:
|
||||
return best_match
|
||||
|
||||
# Default: look for "agent" or "representative" option
|
||||
for digit, description in options:
|
||||
if any(w in description for w in ["agent", "representative", "operator"]):
|
||||
return digit
|
||||
|
||||
return None
|
||||
|
||||
async def _load_call_flow(self, flow_id: str) -> Optional[CallFlow]:
|
||||
"""Load a stored call flow from the database."""
|
||||
from db.database import get_session_factory, StoredCallFlow
|
||||
from sqlalchemy import select
|
||||
|
||||
try:
|
||||
factory = get_session_factory()
|
||||
async with factory() as session:
|
||||
result = await session.execute(
|
||||
select(StoredCallFlow).where(StoredCallFlow.id == flow_id)
|
||||
)
|
||||
row = result.scalar_one_or_none()
|
||||
if row:
|
||||
from models.call_flow import CallFlowStep
|
||||
return CallFlow(
|
||||
id=row.id,
|
||||
name=row.name,
|
||||
phone_number=row.phone_number,
|
||||
description=row.description or "",
|
||||
steps=[CallFlowStep(**s) for s in row.steps],
|
||||
tags=row.tags or [],
|
||||
notes=row.notes,
|
||||
avg_hold_time=row.avg_hold_time,
|
||||
success_rate=row.success_rate,
|
||||
last_used=row.last_used,
|
||||
times_used=row.times_used or 0,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load call flow '{flow_id}': {e}")
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user