Complete project scaffolding and core implementation of an AI-powered telephony system that calls companies, navigates IVR menus, waits on hold, and transfers to the user when a human answers. Key components: - FastAPI server with REST API, WebSocket, and MCP (SSE) interfaces - SIP/VoIP call management via PJSUA2 with RTP audio streaming - LLM-powered IVR navigation using OpenAI/Anthropic with tool calling - Hold detection service combining audio analysis and silence detection - Real-time STT (Whisper/Deepgram) and TTS (OpenAI/Piper) pipelines - Call recording with per-channel and mixed audio capture - Event bus (asyncio pub/sub) for real-time client updates - Web dashboard with live call monitoring - SQLite persistence via SQLAlchemy with call history and analytics - Notification support (email, SMS, webhook, desktop) - Docker Compose deployment with Opal VoIP and Opal Media containers - Comprehensive test suite with unit, integration, and E2E tests - Simplified .gitignore and full project documentation in README
718 lines
28 KiB
Python
718 lines
28 KiB
Python
"""
|
|
Hold Slayer Service — The main event.
|
|
|
|
Navigate IVR trees, wait on hold, detect when a human picks up,
|
|
and transfer you in. This is the state machine that orchestrates
|
|
the entire hold-slaying process.
|
|
|
|
Two modes:
|
|
1. run_with_flow(): Follow a stored call flow tree (fast, reliable)
|
|
2. run_exploration(): No stored flow — listen, transcribe, and figure it out
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import re
|
|
import time
|
|
from typing import Optional
|
|
|
|
from config import Settings
|
|
from core.call_manager import CallManager
|
|
from core.sip_engine import SIPEngine
|
|
from models.call import ActiveCall, AudioClassification, CallStatus, ClassificationResult
|
|
from models.call_flow import ActionType, CallFlow, CallFlowStep
|
|
from models.events import EventType, GatewayEvent
|
|
from services.audio_classifier import AudioClassifier
|
|
from services.transcription import TranscriptionService
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# LLM client is optional — imported at use time
|
|
_llm_client = None
|
|
|
|
|
|
def _get_llm():
|
|
"""Lazy-load LLM client (optional dependency)."""
|
|
global _llm_client
|
|
if _llm_client is None:
|
|
try:
|
|
from config import get_settings
|
|
from services.llm_client import LLMClient
|
|
|
|
settings = get_settings()
|
|
_llm_client = LLMClient(
|
|
base_url=settings.llm.base_url,
|
|
model=settings.llm.model,
|
|
api_key=settings.llm.api_key,
|
|
timeout=settings.llm.timeout,
|
|
)
|
|
except Exception as e:
|
|
logger.debug(f"LLM client not available: {e}")
|
|
_llm_client = False # Sentinel: don't retry
|
|
return _llm_client if _llm_client is not False else None
|
|
|
|
|
|
class HoldSlayerService:
|
|
"""
|
|
The Hold Slayer.
|
|
|
|
Navigates IVR menus, waits on hold, detects live humans,
|
|
and transfers the call to your device.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
gateway, # AIPSTNGateway (avoid circular import)
|
|
call_manager: CallManager,
|
|
sip_engine: SIPEngine,
|
|
classifier: AudioClassifier,
|
|
transcription: TranscriptionService,
|
|
settings: Settings,
|
|
):
|
|
self.gateway = gateway
|
|
self.call_manager = call_manager
|
|
self.sip_engine = sip_engine
|
|
self.classifier = classifier
|
|
self.transcription = transcription
|
|
self.settings = settings
|
|
|
|
async def run(
|
|
self,
|
|
call: ActiveCall,
|
|
sip_leg_id: str,
|
|
call_flow_id: Optional[str] = None,
|
|
) -> bool:
|
|
"""
|
|
Main entry point. Run the Hold Slayer on a call.
|
|
|
|
Args:
|
|
call: The active call to work on
|
|
sip_leg_id: SIP leg ID for the PSTN call
|
|
call_flow_id: Optional stored call flow to follow
|
|
|
|
Returns:
|
|
True if successfully transferred to user, False otherwise
|
|
"""
|
|
logger.info(f"🗡️ Hold Slayer activated for {call.remote_number}")
|
|
logger.info(f" Intent: {call.intent}")
|
|
logger.info(f" Call Flow: {call_flow_id or 'exploration mode'}")
|
|
|
|
try:
|
|
# Wait for call to be connected
|
|
await self._wait_for_connection(call, timeout=60)
|
|
|
|
if call_flow_id:
|
|
# Load the stored call flow from the database
|
|
flow = await self._load_call_flow(call_flow_id)
|
|
if flow:
|
|
return await self.run_with_flow(call, sip_leg_id, flow)
|
|
else:
|
|
logger.warning(f"Call flow '{call_flow_id}' not found, switching to exploration")
|
|
|
|
# No flow or flow not found — explore
|
|
return await self.run_exploration(call, sip_leg_id)
|
|
|
|
except asyncio.CancelledError:
|
|
logger.info(f"Hold Slayer cancelled for {call.id}")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Hold Slayer error: {e}", exc_info=True)
|
|
await self.call_manager.update_status(call.id, CallStatus.FAILED)
|
|
return False
|
|
|
|
# ================================================================
|
|
# Mode 1: Follow a Stored Call Flow
|
|
# ================================================================
|
|
|
|
async def run_with_flow(
|
|
self,
|
|
call: ActiveCall,
|
|
sip_leg_id: str,
|
|
flow: CallFlow,
|
|
) -> bool:
|
|
"""
|
|
Navigate using a stored call flow tree.
|
|
Falls back to exploration for unknown steps.
|
|
"""
|
|
logger.info(f"📋 Following call flow: {flow.name}")
|
|
steps = flow.steps_by_id()
|
|
current_step_id = flow.steps[0].id if flow.steps else None
|
|
|
|
while current_step_id:
|
|
step = steps.get(current_step_id)
|
|
if not step:
|
|
logger.error(f"Step '{current_step_id}' not found in flow")
|
|
break
|
|
|
|
call.current_step_id = current_step_id
|
|
logger.info(f"📍 Step: {step.description}")
|
|
|
|
await self.call_manager.event_bus.publish(GatewayEvent(
|
|
type=EventType.IVR_STEP,
|
|
call_id=call.id,
|
|
data={"step_id": step.id, "description": step.description, "action": step.action.value},
|
|
message=f"📍 IVR Step: {step.description}",
|
|
))
|
|
|
|
# === Execute the step based on its action type ===
|
|
|
|
if step.action == ActionType.HOLD:
|
|
# HOLD MODE: Audio classifier takes over
|
|
await self.call_manager.update_status(call.id, CallStatus.ON_HOLD)
|
|
logger.info(f"⏳ On hold. Activating hold detection...")
|
|
|
|
human_detected = await self._wait_for_human(
|
|
call, sip_leg_id, timeout=step.timeout
|
|
)
|
|
|
|
if human_detected:
|
|
current_step_id = step.next_step
|
|
else:
|
|
logger.warning("⏰ Hold timeout reached!")
|
|
break
|
|
|
|
elif step.action == ActionType.DTMF:
|
|
# Wait for the expected prompt, then send DTMF
|
|
await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)
|
|
|
|
if step.expect:
|
|
heard = await self._wait_for_prompt(
|
|
call, sip_leg_id, step.expect, step.timeout
|
|
)
|
|
if not heard and step.fallback_step:
|
|
logger.info(f"⚠️ Didn't hear expected prompt, falling back")
|
|
current_step_id = step.fallback_step
|
|
continue
|
|
|
|
# Send the DTMF digits
|
|
if step.action_value:
|
|
await self.sip_engine.send_dtmf(sip_leg_id, step.action_value)
|
|
logger.info(f"📱 Pressed: {step.action_value}")
|
|
|
|
await self.call_manager.event_bus.publish(GatewayEvent(
|
|
type=EventType.IVR_DTMF_SENT,
|
|
call_id=call.id,
|
|
data={"digits": step.action_value, "step": step.id},
|
|
message=f"📱 DTMF sent: {step.action_value}",
|
|
))
|
|
|
|
# Small delay after DTMF for the IVR to process
|
|
await asyncio.sleep(2.0)
|
|
current_step_id = step.next_step
|
|
|
|
elif step.action == ActionType.WAIT:
|
|
# Just wait for a prompt
|
|
if step.expect:
|
|
await self._wait_for_prompt(
|
|
call, sip_leg_id, step.expect, step.timeout
|
|
)
|
|
else:
|
|
await asyncio.sleep(step.timeout)
|
|
current_step_id = step.next_step
|
|
|
|
elif step.action == ActionType.LISTEN:
|
|
# Listen and decide — regex first, LLM fallback
|
|
await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)
|
|
|
|
transcript = await self._listen_for_menu(
|
|
call, sip_leg_id, step.timeout
|
|
)
|
|
|
|
# Phase 1: Try regex-based keyword matching (fast, no API call)
|
|
decision = self._decide_menu_option(
|
|
transcript, call.intent or "", step.expect
|
|
)
|
|
|
|
# Phase 2: LLM fallback if regex couldn't decide
|
|
if not decision and transcript:
|
|
llm = _get_llm()
|
|
if llm:
|
|
try:
|
|
logger.info("🤖 Regex inconclusive, asking LLM...")
|
|
llm_result = await llm.analyze_ivr_menu(
|
|
transcript=transcript,
|
|
intent=call.intent or "",
|
|
previous_selections=list(call.dtmf_history) if hasattr(call, 'dtmf_history') else None,
|
|
)
|
|
decision = llm_result.get("digit")
|
|
if decision:
|
|
confidence = llm_result.get("confidence", 0)
|
|
reason = llm_result.get("reason", "")
|
|
logger.info(
|
|
f"🤖 LLM decided: press {decision} "
|
|
f"(confidence={confidence}, reason='{reason}')"
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"🤖 LLM fallback failed: {e}")
|
|
|
|
if decision:
|
|
await self.sip_engine.send_dtmf(sip_leg_id, decision)
|
|
logger.info(f"🧠 Decided: press {decision} (heard: '{transcript[:60]}...')")
|
|
else:
|
|
# Default: press 0 for agent
|
|
await self.sip_engine.send_dtmf(sip_leg_id, "0")
|
|
logger.info(f"🧠 No clear match, pressing 0 for agent")
|
|
|
|
await asyncio.sleep(2.0)
|
|
current_step_id = step.next_step
|
|
|
|
elif step.action == ActionType.SPEAK:
|
|
# Say something into the call (TTS)
|
|
# TODO: Implement TTS integration
|
|
logger.info(f"🗣️ Would say: '{step.action_value}' (TTS not yet implemented)")
|
|
await asyncio.sleep(3.0)
|
|
current_step_id = step.next_step
|
|
|
|
elif step.action == ActionType.TRANSFER:
|
|
# We did it! Transfer to user's device
|
|
await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)
|
|
logger.info(f"🚨 TRANSFERRING TO {step.action_value}")
|
|
|
|
device_target = step.action_value or call.device or self.settings.hold_slayer.default_transfer_device
|
|
await self.gateway.transfer_call(call.id, device_target)
|
|
return True
|
|
|
|
else:
|
|
logger.warning(f"Unknown action type: {step.action}")
|
|
current_step_id = step.next_step
|
|
|
|
return False
|
|
|
|
# ================================================================
|
|
# Mode 2: Exploration (No Stored Flow)
|
|
# ================================================================
|
|
|
|
async def run_exploration(
|
|
self,
|
|
call: ActiveCall,
|
|
sip_leg_id: str,
|
|
) -> bool:
|
|
"""
|
|
No stored flow — explore the IVR blind.
|
|
Records what it discovers so we can build a flow for next time.
|
|
"""
|
|
logger.info(f"🔍 Exploration mode: discovering IVR for {call.remote_number}")
|
|
await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)
|
|
|
|
discovered_steps: list[dict] = []
|
|
max_time = self.settings.hold_slayer.max_hold_time
|
|
start_time = time.time()
|
|
|
|
while time.time() - start_time < max_time:
|
|
# Check if call is still active
|
|
current_call = self.call_manager.get_call(call.id)
|
|
if not current_call or current_call.status in (
|
|
CallStatus.COMPLETED, CallStatus.FAILED, CallStatus.CANCELLED
|
|
):
|
|
break
|
|
|
|
# Get audio and classify
|
|
audio_chunk = b""
|
|
try:
|
|
async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
|
|
audio_chunk += chunk
|
|
if len(audio_chunk) >= 16000 * 2 * 3: # 3 seconds
|
|
break
|
|
except Exception as e:
|
|
logger.error(f"Audio stream error: {e}")
|
|
await asyncio.sleep(1.0)
|
|
continue
|
|
|
|
if not audio_chunk:
|
|
await asyncio.sleep(1.0)
|
|
continue
|
|
|
|
# Classify the audio
|
|
classification = self.classifier.classify_chunk(audio_chunk)
|
|
self.classifier.update_history(classification.audio_type)
|
|
await self.call_manager.add_classification(call.id, classification)
|
|
|
|
# Transcribe if it sounds like speech
|
|
transcript = ""
|
|
if classification.audio_type in (
|
|
AudioClassification.IVR_PROMPT,
|
|
AudioClassification.LIVE_HUMAN,
|
|
):
|
|
transcript = await self.transcription.transcribe(
|
|
audio_chunk,
|
|
prompt="Phone IVR menu, customer service, press 1 for..."
|
|
)
|
|
if transcript:
|
|
await self.call_manager.add_transcript(call.id, transcript)
|
|
|
|
# Record discovery
|
|
discovered_steps.append({
|
|
"timestamp": time.time(),
|
|
"audio_type": classification.audio_type.value,
|
|
"confidence": classification.confidence,
|
|
"transcript": transcript,
|
|
"action_taken": None,
|
|
})
|
|
|
|
# === Decision Logic ===
|
|
|
|
if classification.audio_type == AudioClassification.LIVE_HUMAN:
|
|
# HUMAN DETECTED! Transfer!
|
|
logger.info("🚨 LIVE HUMAN DETECTED!")
|
|
await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)
|
|
|
|
device = call.device or self.settings.hold_slayer.default_transfer_device
|
|
await self.gateway.transfer_call(call.id, device)
|
|
|
|
logger.info(f"📋 Discovered {len(discovered_steps)} IVR steps")
|
|
return True
|
|
|
|
elif classification.audio_type == AudioClassification.MUSIC:
|
|
# On hold — just keep monitoring
|
|
if current_call.status != CallStatus.ON_HOLD:
|
|
await self.call_manager.update_status(call.id, CallStatus.ON_HOLD)
|
|
|
|
# Check for hold→human transition
|
|
if self.classifier.detect_hold_to_human_transition():
|
|
logger.info("🚨 Hold-to-human transition detected!")
|
|
await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)
|
|
|
|
device = call.device or self.settings.hold_slayer.default_transfer_device
|
|
await self.gateway.transfer_call(call.id, device)
|
|
return True
|
|
|
|
elif classification.audio_type == AudioClassification.IVR_PROMPT and transcript:
|
|
# IVR menu — try to navigate
|
|
decision = self._decide_menu_option(
|
|
transcript, call.intent or "", None
|
|
)
|
|
if decision:
|
|
await self.sip_engine.send_dtmf(sip_leg_id, decision)
|
|
discovered_steps[-1]["action_taken"] = {"dtmf": decision}
|
|
logger.info(f"🧠 Exploration: pressed {decision}")
|
|
await asyncio.sleep(2.0)
|
|
else:
|
|
# Try pressing 0 for agent
|
|
await self.sip_engine.send_dtmf(sip_leg_id, "0")
|
|
discovered_steps[-1]["action_taken"] = {"dtmf": "0", "reason": "default_agent"}
|
|
logger.info("🧠 Exploration: pressed 0 (trying for agent)")
|
|
await asyncio.sleep(2.0)
|
|
|
|
elif classification.audio_type == AudioClassification.SILENCE:
|
|
# Silence — wait a bit
|
|
await asyncio.sleep(2.0)
|
|
|
|
elif classification.audio_type == AudioClassification.RINGING:
|
|
# Still ringing
|
|
await asyncio.sleep(1.0)
|
|
|
|
logger.warning(f"Hold Slayer timed out after {max_time}s")
|
|
return False
|
|
|
|
# ================================================================
|
|
# Core Detection Methods
|
|
# ================================================================
|
|
|
|
async def _wait_for_human(
|
|
self,
|
|
call: ActiveCall,
|
|
sip_leg_id: str,
|
|
timeout: int = 7200,
|
|
) -> bool:
|
|
"""
|
|
Wait on hold until a live human is detected.
|
|
|
|
Continuously classifies audio and watches for the
|
|
music → speech transition.
|
|
"""
|
|
check_interval = self.settings.hold_slayer.hold_check_interval
|
|
start_time = time.time()
|
|
|
|
while time.time() - start_time < timeout:
|
|
# Check if call is still active
|
|
current_call = self.call_manager.get_call(call.id)
|
|
if not current_call or current_call.status in (
|
|
CallStatus.COMPLETED, CallStatus.FAILED, CallStatus.CANCELLED
|
|
):
|
|
return False
|
|
|
|
# Get audio chunk
|
|
audio_chunk = b""
|
|
try:
|
|
async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
|
|
audio_chunk += chunk
|
|
if len(audio_chunk) >= int(16000 * 2 * check_interval):
|
|
break
|
|
except Exception:
|
|
await asyncio.sleep(check_interval)
|
|
continue
|
|
|
|
if not audio_chunk:
|
|
await asyncio.sleep(check_interval)
|
|
continue
|
|
|
|
# Classify
|
|
result = self.classifier.classify_chunk(audio_chunk)
|
|
self.classifier.update_history(result.audio_type)
|
|
await self.call_manager.add_classification(call.id, result)
|
|
|
|
# Check for human
|
|
if result.audio_type == AudioClassification.LIVE_HUMAN:
|
|
# Verify with transcription
|
|
transcript = await self.transcription.transcribe(audio_chunk)
|
|
if transcript:
|
|
await self.call_manager.add_transcript(call.id, transcript)
|
|
# If we got meaningful speech, it's probably a real person
|
|
if len(transcript.split()) >= 3:
|
|
logger.info(f"🚨 Human confirmed! Said: '{transcript[:100]}'")
|
|
return True
|
|
|
|
# Check for the music→speech transition pattern
|
|
if self.classifier.detect_hold_to_human_transition():
|
|
logger.info("🚨 Hold-to-human transition detected!")
|
|
return True
|
|
|
|
# Log progress periodically
|
|
elapsed = int(time.time() - start_time)
|
|
if elapsed > 0 and elapsed % 60 == 0:
|
|
logger.info(
|
|
f"⏳ Still on hold... {elapsed}s "
|
|
f"(audio: {result.audio_type.value}, {result.confidence:.0%})"
|
|
)
|
|
|
|
return False
|
|
|
|
async def _wait_for_prompt(
|
|
self,
|
|
call: ActiveCall,
|
|
sip_leg_id: str,
|
|
expected_pattern: str,
|
|
timeout: int = 30,
|
|
) -> bool:
|
|
"""
|
|
Wait for an expected IVR prompt.
|
|
|
|
Listens, transcribes, and checks if the transcript matches
|
|
the expected pattern (regex or keywords).
|
|
"""
|
|
start_time = time.time()
|
|
|
|
while time.time() - start_time < timeout:
|
|
audio_chunk = b""
|
|
try:
|
|
async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
|
|
audio_chunk += chunk
|
|
if len(audio_chunk) >= 16000 * 2 * 3: # 3 seconds
|
|
break
|
|
except Exception:
|
|
await asyncio.sleep(1.0)
|
|
continue
|
|
|
|
if not audio_chunk:
|
|
await asyncio.sleep(1.0)
|
|
continue
|
|
|
|
# Classify first
|
|
result = self.classifier.classify_chunk(audio_chunk)
|
|
if result.audio_type not in (
|
|
AudioClassification.IVR_PROMPT,
|
|
AudioClassification.LIVE_HUMAN,
|
|
):
|
|
continue
|
|
|
|
# Transcribe
|
|
transcript = await self.transcription.transcribe(audio_chunk)
|
|
if not transcript:
|
|
continue
|
|
|
|
await self.call_manager.add_transcript(call.id, transcript)
|
|
|
|
# Check if it matches expected pattern
|
|
try:
|
|
if re.search(expected_pattern, transcript, re.IGNORECASE):
|
|
logger.info(f"✅ Heard expected: '{transcript[:80]}'")
|
|
return True
|
|
except re.error:
|
|
# Treat as keyword search if regex is invalid
|
|
if expected_pattern.lower() in transcript.lower():
|
|
logger.info(f"✅ Heard expected: '{transcript[:80]}'")
|
|
return True
|
|
|
|
logger.warning(f"⚠️ Didn't hear expected prompt within {timeout}s")
|
|
return False
|
|
|
|
async def _listen_for_menu(
|
|
self,
|
|
call: ActiveCall,
|
|
sip_leg_id: str,
|
|
timeout: int = 30,
|
|
) -> str:
|
|
"""Listen for an IVR menu and return the full transcript."""
|
|
transcript_parts: list[str] = []
|
|
start_time = time.time()
|
|
|
|
while time.time() - start_time < timeout:
|
|
audio_chunk = b""
|
|
try:
|
|
async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
|
|
audio_chunk += chunk
|
|
if len(audio_chunk) >= 16000 * 2 * 5: # 5 seconds
|
|
break
|
|
except Exception:
|
|
await asyncio.sleep(1.0)
|
|
continue
|
|
|
|
if not audio_chunk:
|
|
break
|
|
|
|
result = self.classifier.classify_chunk(audio_chunk)
|
|
|
|
# If we're getting silence after speech, the menu prompt is done
|
|
if result.audio_type == AudioClassification.SILENCE and transcript_parts:
|
|
break
|
|
|
|
if result.audio_type in (
|
|
AudioClassification.IVR_PROMPT,
|
|
AudioClassification.LIVE_HUMAN,
|
|
):
|
|
text = await self.transcription.transcribe(audio_chunk)
|
|
if text:
|
|
transcript_parts.append(text)
|
|
|
|
full_transcript = " ".join(transcript_parts)
|
|
if full_transcript:
|
|
await self.call_manager.add_transcript(call.id, full_transcript)
|
|
|
|
return full_transcript
|
|
|
|
async def _wait_for_connection(self, call: ActiveCall, timeout: int = 60) -> None:
|
|
"""Wait for the call to be connected (answered)."""
|
|
start = time.time()
|
|
while time.time() - start < timeout:
|
|
current = self.call_manager.get_call(call.id)
|
|
if not current:
|
|
raise RuntimeError(f"Call {call.id} disappeared")
|
|
if current.status in (CallStatus.CONNECTED, CallStatus.NAVIGATING_IVR):
|
|
return
|
|
if current.status in (CallStatus.FAILED, CallStatus.CANCELLED):
|
|
raise RuntimeError(f"Call {call.id} failed: {current.status}")
|
|
await asyncio.sleep(0.5)
|
|
raise TimeoutError(f"Call {call.id} not connected within {timeout}s")
|
|
|
|
# ================================================================
|
|
# Menu Navigation Logic
|
|
# ================================================================
|
|
|
|
def _decide_menu_option(
|
|
self,
|
|
transcript: str,
|
|
intent: str,
|
|
expected_options: Optional[str],
|
|
) -> Optional[str]:
|
|
"""
|
|
Decide which menu option to select based on transcript and intent.
|
|
|
|
Simple keyword-based matching. This is where an LLM integration
|
|
would massively improve navigation accuracy.
|
|
|
|
Returns:
|
|
DTMF digit(s) to press, or None if can't decide
|
|
"""
|
|
transcript_lower = transcript.lower()
|
|
intent_lower = intent.lower()
|
|
|
|
# Common IVR patterns: "press 1 for X, press 2 for Y"
|
|
# Extract options
|
|
options = re.findall(
|
|
r'(?:press|dial|say)\s+(\d+)\s+(?:for|to)\s+(.+?)(?:\.|,|press|dial|$)',
|
|
transcript_lower,
|
|
)
|
|
|
|
if not options:
|
|
# Try alternate patterns: "for X, press 1"
|
|
options = re.findall(
|
|
r'for\s+(.+?),?\s*(?:press|dial)\s+(\d+)',
|
|
transcript_lower,
|
|
)
|
|
# Swap order to be (digit, description)
|
|
options = [(digit, desc) for desc, digit in options]
|
|
|
|
if not options:
|
|
return None
|
|
|
|
# Score each option against the intent
|
|
best_match = None
|
|
best_score = 0
|
|
|
|
# Keywords that map intents to IVR options
|
|
intent_keywords = {
|
|
"cancel": ["cancel", "close", "end", "terminate"],
|
|
"dispute": ["dispute", "charge", "billing", "transaction", "statement"],
|
|
"balance": ["balance", "account", "summary"],
|
|
"agent": ["agent", "representative", "operator", "speak", "person", "human"],
|
|
"payment": ["payment", "pay", "bill"],
|
|
"card": ["card", "credit", "debit"],
|
|
"fraud": ["fraud", "unauthorized", "stolen", "lost"],
|
|
"transfer": ["transfer", "move", "send"],
|
|
}
|
|
|
|
for digit, description in options:
|
|
score = 0
|
|
|
|
# Direct keyword match in description
|
|
for keyword_group, keywords in intent_keywords.items():
|
|
if any(kw in intent_lower for kw in keywords):
|
|
if any(kw in description for kw in keywords):
|
|
score += 10
|
|
|
|
# Fuzzy: any word overlap between intent and description
|
|
intent_words = set(intent_lower.split())
|
|
desc_words = set(description.split())
|
|
overlap = intent_words & desc_words
|
|
score += len(overlap) * 3
|
|
|
|
# "Speak to agent" is usually what we want if nothing else matches
|
|
if any(w in description for w in ["agent", "representative", "operator", "person"]):
|
|
score += 5
|
|
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = digit
|
|
|
|
if best_match and best_score >= 3:
|
|
return best_match
|
|
|
|
# Default: look for "agent" or "representative" option
|
|
for digit, description in options:
|
|
if any(w in description for w in ["agent", "representative", "operator"]):
|
|
return digit
|
|
|
|
return None
|
|
|
|
async def _load_call_flow(self, flow_id: str) -> Optional[CallFlow]:
|
|
"""Load a stored call flow from the database."""
|
|
from db.database import get_session_factory, StoredCallFlow
|
|
from sqlalchemy import select
|
|
|
|
try:
|
|
factory = get_session_factory()
|
|
async with factory() as session:
|
|
result = await session.execute(
|
|
select(StoredCallFlow).where(StoredCallFlow.id == flow_id)
|
|
)
|
|
row = result.scalar_one_or_none()
|
|
if row:
|
|
from models.call_flow import CallFlowStep
|
|
return CallFlow(
|
|
id=row.id,
|
|
name=row.name,
|
|
phone_number=row.phone_number,
|
|
description=row.description or "",
|
|
steps=[CallFlowStep(**s) for s in row.steps],
|
|
tags=row.tags or [],
|
|
notes=row.notes,
|
|
avg_hold_time=row.avg_hold_time,
|
|
success_rate=row.success_rate,
|
|
last_used=row.last_used,
|
|
times_used=row.times_used or 0,
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to load call flow '{flow_id}': {e}")
|
|
|
|
return None
|