hold-slayer/services/hold_slayer.py

"""
Hold Slayer Service — The main event.

Navigate IVR trees, wait on hold, detect when a human picks up,
and transfer you in. This is the state machine that orchestrates
the entire hold-slaying process.

Two modes:
1. run_with_flow(): Follow a stored call flow tree (fast, reliable)
2. run_exploration(): No stored flow — listen, transcribe, and figure it out
"""

import asyncio
import logging
import re
import time
from typing import Optional

from config import Settings
from core.call_manager import CallManager
from core.sip_engine import SIPEngine
from models.call import ActiveCall, AudioClassification, CallStatus, ClassificationResult
from models.call_flow import ActionType, CallFlow, CallFlowStep
from models.events import EventType, GatewayEvent
from services.audio_classifier import AudioClassifier
from services.transcription import TranscriptionService

logger = logging.getLogger(__name__)

# LLM client is optional — imported at use time
_llm_client = None


def _get_llm():
    """Lazy-load LLM client (optional dependency)."""
    global _llm_client
    if _llm_client is None:
        try:
            from config import get_settings
            from services.llm_client import LLMClient

            settings = get_settings()
            _llm_client = LLMClient(
                base_url=settings.llm.base_url,
                model=settings.llm.model,
                api_key=settings.llm.api_key,
                timeout=settings.llm.timeout,
            )
        except Exception as e:
            logger.debug(f"LLM client not available: {e}")
            _llm_client = False  # Sentinel: don't retry
    return _llm_client if _llm_client is not False else None


class HoldSlayerService:
    """
    The Hold Slayer.

    Navigates IVR menus, waits on hold, detects live humans,
    and transfers the call to your device.
    """

    def __init__(
        self,
        gateway,  # AIPSTNGateway (avoid circular import)
        call_manager: CallManager,
        sip_engine: SIPEngine,
        classifier: AudioClassifier,
        transcription: TranscriptionService,
        settings: Settings,
    ):
        self.gateway = gateway
        self.call_manager = call_manager
        self.sip_engine = sip_engine
        self.classifier = classifier
        self.transcription = transcription
        self.settings = settings

    async def run(
        self,
        call: ActiveCall,
        sip_leg_id: str,
        call_flow_id: Optional[str] = None,
    ) -> bool:
        """
        Main entry point. Run the Hold Slayer on a call.

        Args:
            call: The active call to work on
            sip_leg_id: SIP leg ID for the PSTN call
            call_flow_id: Optional stored call flow to follow

        Returns:
            True if successfully transferred to user, False otherwise
        """
        logger.info(f"🗡️ Hold Slayer activated for {call.remote_number}")
        logger.info(f"   Intent: {call.intent}")
        logger.info(f"   Call Flow: {call_flow_id or 'exploration mode'}")

        try:
            # Wait for call to be connected
            await self._wait_for_connection(call, timeout=60)

            if call_flow_id:
                # Load the stored call flow from the database
                flow = await self._load_call_flow(call_flow_id)
                if flow:
                    return await self.run_with_flow(call, sip_leg_id, flow)
                else:
                    logger.warning(f"Call flow '{call_flow_id}' not found, switching to exploration")

            # No flow or flow not found — explore
            return await self.run_exploration(call, sip_leg_id)

        except asyncio.CancelledError:
            logger.info(f"Hold Slayer cancelled for {call.id}")
            return False
        except Exception as e:
            logger.error(f"Hold Slayer error: {e}", exc_info=True)
            await self.call_manager.update_status(call.id, CallStatus.FAILED)
            return False

    # ================================================================
    # Mode 1: Follow a Stored Call Flow
    # ================================================================

    async def run_with_flow(
        self,
        call: ActiveCall,
        sip_leg_id: str,
        flow: CallFlow,
    ) -> bool:
        """
        Navigate using a stored call flow tree.
        Falls back to exploration for unknown steps.
        """
        logger.info(f"📋 Following call flow: {flow.name}")
        steps = flow.steps_by_id()
        current_step_id = flow.steps[0].id if flow.steps else None

        while current_step_id:
            step = steps.get(current_step_id)
            if not step:
                logger.error(f"Step '{current_step_id}' not found in flow")
                break

            call.current_step_id = current_step_id
            logger.info(f"📍 Step: {step.description}")

            await self.call_manager.event_bus.publish(GatewayEvent(
                type=EventType.IVR_STEP,
                call_id=call.id,
                data={"step_id": step.id, "description": step.description, "action": step.action.value},
                message=f"📍 IVR Step: {step.description}",
            ))

            # === Execute the step based on its action type ===

            if step.action == ActionType.HOLD:
                # HOLD MODE: Audio classifier takes over
                await self.call_manager.update_status(call.id, CallStatus.ON_HOLD)
                logger.info(f"⏳ On hold. Activating hold detection...")

                human_detected = await self._wait_for_human(
                    call, sip_leg_id, timeout=step.timeout
                )

                if human_detected:
                    current_step_id = step.next_step
                else:
                    logger.warning("⏰ Hold timeout reached!")
                    break

            elif step.action == ActionType.DTMF:
                # Wait for the expected prompt, then send DTMF
                await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)

                if step.expect:
                    heard = await self._wait_for_prompt(
                        call, sip_leg_id, step.expect, step.timeout
                    )
                    if not heard and step.fallback_step:
                        logger.info(f"⚠️ Didn't hear expected prompt, falling back")
                        current_step_id = step.fallback_step
                        continue

                # Send the DTMF digits
                if step.action_value:
                    await self.sip_engine.send_dtmf(sip_leg_id, step.action_value)
                    logger.info(f"📱 Pressed: {step.action_value}")

                    await self.call_manager.event_bus.publish(GatewayEvent(
                        type=EventType.IVR_DTMF_SENT,
                        call_id=call.id,
                        data={"digits": step.action_value, "step": step.id},
                        message=f"📱 DTMF sent: {step.action_value}",
                    ))

                # Small delay after DTMF for the IVR to process
                await asyncio.sleep(2.0)
                current_step_id = step.next_step

            elif step.action == ActionType.WAIT:
                # Just wait for a prompt
                if step.expect:
                    await self._wait_for_prompt(
                        call, sip_leg_id, step.expect, step.timeout
                    )
                else:
                    await asyncio.sleep(step.timeout)
                current_step_id = step.next_step

            elif step.action == ActionType.LISTEN:
                # Listen and decide — regex first, LLM fallback
                await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)

                transcript = await self._listen_for_menu(
                    call, sip_leg_id, step.timeout
                )

                # Phase 1: Try regex-based keyword matching (fast, no API call)
                decision = self._decide_menu_option(
                    transcript, call.intent or "", step.expect
                )

                # Phase 2: LLM fallback if regex couldn't decide
                if not decision and transcript:
                    llm = _get_llm()
                    if llm:
                        try:
                            logger.info("🤖 Regex inconclusive, asking LLM...")
                            llm_result = await llm.analyze_ivr_menu(
                                transcript=transcript,
                                intent=call.intent or "",
                                previous_selections=list(call.dtmf_history) if hasattr(call, 'dtmf_history') else None,
                            )
                            decision = llm_result.get("digit")
                            if decision:
                                confidence = llm_result.get("confidence", 0)
                                reason = llm_result.get("reason", "")
                                logger.info(
                                    f"🤖 LLM decided: press {decision} "
                                    f"(confidence={confidence}, reason='{reason}')"
                                )
                        except Exception as e:
                            logger.warning(f"🤖 LLM fallback failed: {e}")

                if decision:
                    await self.sip_engine.send_dtmf(sip_leg_id, decision)
                    logger.info(f"🧠 Decided: press {decision} (heard: '{transcript[:60]}...')")
                else:
                    # Default: press 0 for agent
                    await self.sip_engine.send_dtmf(sip_leg_id, "0")
                    logger.info(f"🧠 No clear match, pressing 0 for agent")

                await asyncio.sleep(2.0)
                current_step_id = step.next_step

            elif step.action == ActionType.SPEAK:
                # Say something into the call (TTS)
                # TODO: Implement TTS integration
                logger.info(f"🗣️ Would say: '{step.action_value}' (TTS not yet implemented)")
                await asyncio.sleep(3.0)
                current_step_id = step.next_step

            elif step.action == ActionType.TRANSFER:
                # We did it! Transfer to user's device
                await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)
                logger.info(f"🚨 TRANSFERRING TO {step.action_value}")

                device_target = step.action_value or call.device or self.settings.hold_slayer.default_transfer_device
                await self.gateway.transfer_call(call.id, device_target)
                return True

            else:
                logger.warning(f"Unknown action type: {step.action}")
                current_step_id = step.next_step

        return False

    # ================================================================
    # Mode 2: Exploration (No Stored Flow)
    # ================================================================

    async def run_exploration(
        self,
        call: ActiveCall,
        sip_leg_id: str,
    ) -> bool:
        """
        No stored flow — explore the IVR blind.
        Records what it discovers so we can build a flow for next time.
        """
        logger.info(f"🔍 Exploration mode: discovering IVR for {call.remote_number}")
        await self.call_manager.update_status(call.id, CallStatus.NAVIGATING_IVR)

        discovered_steps: list[dict] = []
        max_time = self.settings.hold_slayer.max_hold_time
        start_time = time.time()

        while time.time() - start_time < max_time:
            # Check if call is still active
            current_call = self.call_manager.get_call(call.id)
            if not current_call or current_call.status in (
                CallStatus.COMPLETED, CallStatus.FAILED, CallStatus.CANCELLED
            ):
                break

            # Get audio and classify
            audio_chunk = b""
            try:
                async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
                    audio_chunk += chunk
                    if len(audio_chunk) >= 16000 * 2 * 3:  # 3 seconds
                        break
            except Exception as e:
                logger.error(f"Audio stream error: {e}")
                await asyncio.sleep(1.0)
                continue

            if not audio_chunk:
                await asyncio.sleep(1.0)
                continue

            # Classify the audio
            classification = self.classifier.classify_chunk(audio_chunk)
            self.classifier.update_history(classification.audio_type)
            await self.call_manager.add_classification(call.id, classification)

            # Transcribe if it sounds like speech
            transcript = ""
            if classification.audio_type in (
                AudioClassification.IVR_PROMPT,
                AudioClassification.LIVE_HUMAN,
            ):
                transcript = await self.transcription.transcribe(
                    audio_chunk,
                    prompt="Phone IVR menu, customer service, press 1 for..."
                )
                if transcript:
                    await self.call_manager.add_transcript(call.id, transcript)

            # Record discovery
            discovered_steps.append({
                "timestamp": time.time(),
                "audio_type": classification.audio_type.value,
                "confidence": classification.confidence,
                "transcript": transcript,
                "action_taken": None,
            })

            # === Decision Logic ===

            if classification.audio_type == AudioClassification.LIVE_HUMAN:
                # HUMAN DETECTED! Transfer!
                logger.info("🚨 LIVE HUMAN DETECTED!")
                await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)

                device = call.device or self.settings.hold_slayer.default_transfer_device
                await self.gateway.transfer_call(call.id, device)

                logger.info(f"📋 Discovered {len(discovered_steps)} IVR steps")
                return True

            elif classification.audio_type == AudioClassification.MUSIC:
                # On hold — just keep monitoring
                if current_call.status != CallStatus.ON_HOLD:
                    await self.call_manager.update_status(call.id, CallStatus.ON_HOLD)

                # Check for hold→human transition
                if self.classifier.detect_hold_to_human_transition():
                    logger.info("🚨 Hold-to-human transition detected!")
                    await self.call_manager.update_status(call.id, CallStatus.HUMAN_DETECTED)

                    device = call.device or self.settings.hold_slayer.default_transfer_device
                    await self.gateway.transfer_call(call.id, device)
                    return True

            elif classification.audio_type == AudioClassification.IVR_PROMPT and transcript:
                # IVR menu — try to navigate
                decision = self._decide_menu_option(
                    transcript, call.intent or "", None
                )
                if decision:
                    await self.sip_engine.send_dtmf(sip_leg_id, decision)
                    discovered_steps[-1]["action_taken"] = {"dtmf": decision}
                    logger.info(f"🧠 Exploration: pressed {decision}")
                    await asyncio.sleep(2.0)
                else:
                    # Try pressing 0 for agent
                    await self.sip_engine.send_dtmf(sip_leg_id, "0")
                    discovered_steps[-1]["action_taken"] = {"dtmf": "0", "reason": "default_agent"}
                    logger.info("🧠 Exploration: pressed 0 (trying for agent)")
                    await asyncio.sleep(2.0)

            elif classification.audio_type == AudioClassification.SILENCE:
                # Silence — wait a bit
                await asyncio.sleep(2.0)

            elif classification.audio_type == AudioClassification.RINGING:
                # Still ringing
                await asyncio.sleep(1.0)

        logger.warning(f"Hold Slayer timed out after {max_time}s")
        return False

    # ================================================================
    # Core Detection Methods
    # ================================================================

    async def _wait_for_human(
        self,
        call: ActiveCall,
        sip_leg_id: str,
        timeout: int = 7200,
    ) -> bool:
        """
        Wait on hold until a live human is detected.

        Continuously classifies audio and watches for the
        music → speech transition.
        """
        check_interval = self.settings.hold_slayer.hold_check_interval
        start_time = time.time()

        while time.time() - start_time < timeout:
            # Check if call is still active
            current_call = self.call_manager.get_call(call.id)
            if not current_call or current_call.status in (
                CallStatus.COMPLETED, CallStatus.FAILED, CallStatus.CANCELLED
            ):
                return False

            # Get audio chunk
            audio_chunk = b""
            try:
                async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
                    audio_chunk += chunk
                    if len(audio_chunk) >= int(16000 * 2 * check_interval):
                        break
            except Exception:
                await asyncio.sleep(check_interval)
                continue

            if not audio_chunk:
                await asyncio.sleep(check_interval)
                continue

            # Classify
            result = self.classifier.classify_chunk(audio_chunk)
            self.classifier.update_history(result.audio_type)
            await self.call_manager.add_classification(call.id, result)

            # Check for human
            if result.audio_type == AudioClassification.LIVE_HUMAN:
                # Verify with transcription
                transcript = await self.transcription.transcribe(audio_chunk)
                if transcript:
                    await self.call_manager.add_transcript(call.id, transcript)
                    # If we got meaningful speech, it's probably a real person
                    if len(transcript.split()) >= 3:
                        logger.info(f"🚨 Human confirmed! Said: '{transcript[:100]}'")
                        return True

            # Check for the music→speech transition pattern
            if self.classifier.detect_hold_to_human_transition():
                logger.info("🚨 Hold-to-human transition detected!")
                return True

            # Log progress periodically
            elapsed = int(time.time() - start_time)
            if elapsed > 0 and elapsed % 60 == 0:
                logger.info(
                    f"⏳ Still on hold... {elapsed}s "
                    f"(audio: {result.audio_type.value}, {result.confidence:.0%})"
                )

        return False

    async def _wait_for_prompt(
        self,
        call: ActiveCall,
        sip_leg_id: str,
        expected_pattern: str,
        timeout: int = 30,
    ) -> bool:
        """
        Wait for an expected IVR prompt.

        Listens, transcribes, and checks if the transcript matches
        the expected pattern (regex or keywords).
        """
        start_time = time.time()

        while time.time() - start_time < timeout:
            audio_chunk = b""
            try:
                async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
                    audio_chunk += chunk
                    if len(audio_chunk) >= 16000 * 2 * 3:  # 3 seconds
                        break
            except Exception:
                await asyncio.sleep(1.0)
                continue

            if not audio_chunk:
                await asyncio.sleep(1.0)
                continue

            # Classify first
            result = self.classifier.classify_chunk(audio_chunk)
            if result.audio_type not in (
                AudioClassification.IVR_PROMPT,
                AudioClassification.LIVE_HUMAN,
            ):
                continue

            # Transcribe
            transcript = await self.transcription.transcribe(audio_chunk)
            if not transcript:
                continue

            await self.call_manager.add_transcript(call.id, transcript)

            # Check if it matches expected pattern
            try:
                if re.search(expected_pattern, transcript, re.IGNORECASE):
                    logger.info(f"✅ Heard expected: '{transcript[:80]}'")
                    return True
            except re.error:
                # Treat as keyword search if regex is invalid
                if expected_pattern.lower() in transcript.lower():
                    logger.info(f"✅ Heard expected: '{transcript[:80]}'")
                    return True

        logger.warning(f"⚠️ Didn't hear expected prompt within {timeout}s")
        return False

    async def _listen_for_menu(
        self,
        call: ActiveCall,
        sip_leg_id: str,
        timeout: int = 30,
    ) -> str:
        """Listen for an IVR menu and return the full transcript."""
        transcript_parts: list[str] = []
        start_time = time.time()

        while time.time() - start_time < timeout:
            audio_chunk = b""
            try:
                async for chunk in self.sip_engine.get_audio_stream(sip_leg_id):
                    audio_chunk += chunk
                    if len(audio_chunk) >= 16000 * 2 * 5:  # 5 seconds
                        break
            except Exception:
                await asyncio.sleep(1.0)
                continue

            if not audio_chunk:
                break

            result = self.classifier.classify_chunk(audio_chunk)

            # If we're getting silence after speech, the menu prompt is done
            if result.audio_type == AudioClassification.SILENCE and transcript_parts:
                break

            if result.audio_type in (
                AudioClassification.IVR_PROMPT,
                AudioClassification.LIVE_HUMAN,
            ):
                text = await self.transcription.transcribe(audio_chunk)
                if text:
                    transcript_parts.append(text)

        full_transcript = " ".join(transcript_parts)
        if full_transcript:
            await self.call_manager.add_transcript(call.id, full_transcript)

        return full_transcript

    async def _wait_for_connection(self, call: ActiveCall, timeout: int = 60) -> None:
        """Wait for the call to be connected (answered)."""
        start = time.time()
        while time.time() - start < timeout:
            current = self.call_manager.get_call(call.id)
            if not current:
                raise RuntimeError(f"Call {call.id} disappeared")
            if current.status in (CallStatus.CONNECTED, CallStatus.NAVIGATING_IVR):
                return
            if current.status in (CallStatus.FAILED, CallStatus.CANCELLED):
                raise RuntimeError(f"Call {call.id} failed: {current.status}")
            await asyncio.sleep(0.5)
        raise TimeoutError(f"Call {call.id} not connected within {timeout}s")

    # ================================================================
    # Menu Navigation Logic
    # ================================================================

    def _decide_menu_option(
        self,
        transcript: str,
        intent: str,
        expected_options: Optional[str],
    ) -> Optional[str]:
        """
        Decide which menu option to select based on transcript and intent.

        Simple keyword-based matching. This is where an LLM integration
        would massively improve navigation accuracy.

        Returns:
            DTMF digit(s) to press, or None if can't decide
        """
        transcript_lower = transcript.lower()
        intent_lower = intent.lower()

        # Common IVR patterns: "press 1 for X, press 2 for Y"
        # Extract options
        options = re.findall(
            r'(?:press|dial|say)\s+(\d+)\s+(?:for|to)\s+(.+?)(?:\.|,|press|dial|$)',
            transcript_lower,
        )

        if not options:
            # Try alternate patterns: "for X, press 1"
            options = re.findall(
                r'for\s+(.+?),?\s*(?:press|dial)\s+(\d+)',
                transcript_lower,
            )
            # Swap order to be (digit, description)
            options = [(digit, desc) for desc, digit in options]

        if not options:
            return None

        # Score each option against the intent
        best_match = None
        best_score = 0

        # Keywords that map intents to IVR options
        intent_keywords = {
            "cancel": ["cancel", "close", "end", "terminate"],
            "dispute": ["dispute", "charge", "billing", "transaction", "statement"],
            "balance": ["balance", "account", "summary"],
            "agent": ["agent", "representative", "operator", "speak", "person", "human"],
            "payment": ["payment", "pay", "bill"],
            "card": ["card", "credit", "debit"],
            "fraud": ["fraud", "unauthorized", "stolen", "lost"],
            "transfer": ["transfer", "move", "send"],
        }

        for digit, description in options:
            score = 0

            # Direct keyword match in description
            for keyword_group, keywords in intent_keywords.items():
                if any(kw in intent_lower for kw in keywords):
                    if any(kw in description for kw in keywords):
                        score += 10

            # Fuzzy: any word overlap between intent and description
            intent_words = set(intent_lower.split())
            desc_words = set(description.split())
            overlap = intent_words & desc_words
            score += len(overlap) * 3

            # "Speak to agent" is usually what we want if nothing else matches
            if any(w in description for w in ["agent", "representative", "operator", "person"]):
                score += 5

            if score > best_score:
                best_score = score
                best_match = digit

        if best_match and best_score >= 3:
            return best_match

        # Default: look for "agent" or "representative" option
        for digit, description in options:
            if any(w in description for w in ["agent", "representative", "operator"]):
                return digit

        return None

    async def _load_call_flow(self, flow_id: str) -> Optional[CallFlow]:
        """Load a stored call flow from the database."""
        from db.database import get_session_factory, StoredCallFlow
        from sqlalchemy import select

        try:
            factory = get_session_factory()
            async with factory() as session:
                result = await session.execute(
                    select(StoredCallFlow).where(StoredCallFlow.id == flow_id)
                )
                row = result.scalar_one_or_none()
                if row:
                    from models.call_flow import CallFlowStep
                    return CallFlow(
                        id=row.id,
                        name=row.name,
                        phone_number=row.phone_number,
                        description=row.description or "",
                        steps=[CallFlowStep(**s) for s in row.steps],
                        tags=row.tags or [],
                        notes=row.notes,
                        avg_hold_time=row.avg_hold_time,
                        success_rate=row.success_rate,
                        last_used=row.last_used,
                        times_used=row.times_used or 0,
                    )
        except Exception as e:
            logger.error(f"Failed to load call flow '{flow_id}': {e}")

        return None