palladium/studies/202512_GenesysCX/ctm-token-calculator/tests/test_benefit_model.py

"""Benefit engine."""

from __future__ import annotations

import pytest

from tokencalc.benefit_model import (
    calculate_acw_summarization_benefit,
    calculate_email_ai_benefit,
    calculate_total_benefit,
    calculate_va_deflection_benefit,
)
from tokencalc.defaults import CTM_DEFAULT_FEATURE_SCOPES, CTM_DEFAULT_SITES
from tokencalc.inputs import WORKING_SECONDS_PER_YEAR, FeatureScope, SiteInput
from tokencalc.scenarios import BENEFIT_PARAMS

ALL_SITES = [s.site_name for s in CTM_DEFAULT_SITES]


def _small_site() -> SiteInput:
    return SiteInput(
        "Small", "US", agents=10, supervisors=1,
        voice_volume_monthly=10_000, email_volume_monthly=1_000,
        chat_volume_monthly=0, sms_volume_monthly=0,
        voice_aht_seconds=300, email_aht_seconds=600,
        chat_aht_seconds=480, voice_acw_seconds=60,
        fully_loaded_agent_cost_annual=74_880,  # → $0.01/second exactly
        fully_loaded_supervisor_cost_annual=95_000,
    )


def test_acw_benefit_hand_check():
    """10,000 calls × 12 × 70% eligible × 60s ACW × 40% reduction ×
    50% Y1 realization × $0.01/s = $10,080."""
    site = _small_site()
    assert site.agent_cost_per_second == pytest.approx(0.01)
    df = calculate_acw_summarization_benefit(
        [site], FeatureScope("Agent Copilot", ["Small"]), "realistic", year=1,
    )
    expected = 10_000 * 12 * 0.70 * 60 * 0.40 * 0.50 * 0.01
    assert df["annual_value"].sum() == pytest.approx(expected)


def test_email_benefit_split():
    site = _small_site()
    df = calculate_email_ai_benefit(
        [site], FeatureScope("Email AI (Auto-Respond)", ["Small"]),
        "realistic", year=1,
    )
    lines = set(df["benefit_line"])
    assert lines == {
        "Email Auto-Respond (displaced handling)",
        "Email Auto-Suggest (drafting time)",
    }
    # auto-respond: 1,000×12 × 20% × 600s × 50% × $0.01 = $7,200
    respond = df[df["benefit_line"].str.contains("Respond")]["annual_value"].sum()
    assert respond == pytest.approx(7_200)


def test_scenarios_produce_distinct_benefits():
    totals = {
        name: calculate_total_benefit(
            CTM_DEFAULT_SITES, CTM_DEFAULT_FEATURE_SCOPES, name, year=2
        )["annual_value"].sum()
        for name in ("floor", "realistic", "stretch")
    }
    assert totals["floor"] < totals["realistic"] < totals["stretch"]


def test_claim_exceeds_realistic():
    realistic = calculate_total_benefit(
        CTM_DEFAULT_SITES, CTM_DEFAULT_FEATURE_SCOPES, "realistic", year=1,
        params="realistic",
    )["annual_value"].sum()
    claim = calculate_total_benefit(
        CTM_DEFAULT_SITES, CTM_DEFAULT_FEATURE_SCOPES, "realistic", year=1,
        params="claim",
    )["annual_value"].sum()
    assert claim > realistic


def test_benefits_ramp_by_year():
    by_year = [
        calculate_total_benefit(
            CTM_DEFAULT_SITES, CTM_DEFAULT_FEATURE_SCOPES, "realistic", year=y
        )["annual_value"].sum()
        for y in (1, 2, 3)
    ]
    assert by_year[0] < by_year[1] < by_year[2]


def test_zero_volume_site_is_safe():
    site = SiteInput(
        "Empty", "US", agents=0, supervisors=0,
        voice_volume_monthly=0, email_volume_monthly=0,
        chat_volume_monthly=0, sms_volume_monthly=0,
        voice_aht_seconds=300, email_aht_seconds=600,
        chat_aht_seconds=480, voice_acw_seconds=0,
        fully_loaded_agent_cost_annual=0,
        fully_loaded_supervisor_cost_annual=0,
    )
    df = calculate_total_benefit(
        [site], [FeatureScope("Agent Copilot", ["Empty"])], "realistic", year=1,
    )
    assert df["annual_value"].sum() == 0


def test_working_seconds_constant():
    assert WORKING_SECONDS_PER_YEAR == 2_080 * 3_600


# ── Virtual Agent deflection tests ───────────────────────────────────────────

def test_va_bot_deflection_hand_check():
    """Voice Bot: 10,000 calls/mo × 12 × 35% bot_rate × 300s AHT
    × 50% Y1 realization × realization_factor × $0.01/s.

    realistic realization_factor = 0.70 × 0.80 × (1 − 0.05) = 0.532
    """
    site = _small_site()
    df = calculate_va_deflection_benefit(
        [site],
        FeatureScope("Voice Bot", ["Small"], deflection_target=0.35),
        "realistic",
        year=1,
        params="realistic",
    )
    completion = BENEFIT_PARAMS["va_completion_rate"]["realistic"]
    labour = BENEFIT_PARAMS["va_labour_realization"]["realistic"]
    callback = BENEFIT_PARAMS["va_callback_discount"]["realistic"]
    real_factor = completion * labour * (1.0 - callback)
    expected = (
        10_000 * 12          # annual calls
        * 0.35               # bot deflection rate
        * 300                # AHT seconds
        * 0.50               # Y1 scenario realization
        * real_factor        # completion × labour × (1 − callback)
        * 0.01               # labour rate per second
    )
    assert df["annual_value"].sum() == pytest.approx(expected)


def test_va_agentic_deflection_uses_residual():
    """Agentic VA must operate on the residual (1 − bot_rate) call pool,
    not the full volume.

    With bot_rate=0.35 and va_rate=0.15:
      residual = 10,000 × (1 − 0.35) = 6,500 calls/mo
      va_deflected = 6,500 × 0.15 = 975 calls/mo
    """
    site = _small_site()
    df = calculate_va_deflection_benefit(
        [site],
        FeatureScope("Agentic Virtual Agent", ["Small"], deflection_target=0.15),
        "realistic",
        year=1,
        params="realistic",
    )
    completion = BENEFIT_PARAMS["va_completion_rate"]["realistic"]
    labour = BENEFIT_PARAMS["va_labour_realization"]["realistic"]
    callback = BENEFIT_PARAMS["va_callback_discount"]["realistic"]
    real_factor = completion * labour * (1.0 - callback)
    # realistic scenario: voice_bot_deflection = 0.35
    bot_rate = 0.35
    va_rate = 0.15
    expected = (
        10_000 * 12                    # annual calls
        * (1.0 - bot_rate) * va_rate   # residual × va_rate (layered)
        * 300                          # AHT seconds
        * 0.50                         # Y1 scenario realization
        * real_factor
        * 0.01
    )
    assert df["annual_value"].sum() == pytest.approx(expected)


def test_va_no_double_count():
    """Combined bot + VA benefit must be less than the naive additive sum.

    Naive (wrong): volume × (bot_rate + va_rate) × AHT × ...
    Correct (layered): volume × (bot_rate + (1−bot_rate)×va_rate) × AHT × ...

    With bot=35%, va=15%:
      naive total deflection = 50%
      layered total deflection = 35% + 65%×15% = 44.75%
    """
    site = _small_site()
    bot_scope = FeatureScope("Voice Bot", ["Small"], deflection_target=0.35)
    va_scope = FeatureScope("Agentic Virtual Agent", ["Small"], deflection_target=0.15)

    bot_df = calculate_va_deflection_benefit([site], bot_scope, "realistic", year=1)
    va_df = calculate_va_deflection_benefit([site], va_scope, "realistic", year=1)
    combined = bot_df["annual_value"].sum() + va_df["annual_value"].sum()

    # Naive additive (the old broken model): both on full volume
    completion = BENEFIT_PARAMS["va_completion_rate"]["realistic"]
    labour = BENEFIT_PARAMS["va_labour_realization"]["realistic"]
    callback = BENEFIT_PARAMS["va_callback_discount"]["realistic"]
    real_factor = completion * labour * (1.0 - callback)
    naive = (
        10_000 * 12 * (0.35 + 0.15) * 300 * 0.50 * real_factor * 0.01
    )
    assert combined < naive, (
        f"Combined layered benefit ({combined:.2f}) should be less than "
        f"naive additive ({naive:.2f}) — double-count not fixed"
    )

    # Also verify the exact layered total
    layered_deflection = 0.35 + (1.0 - 0.35) * 0.15  # = 0.4475
    expected_combined = (
        10_000 * 12 * layered_deflection * 300 * 0.50 * real_factor * 0.01
    )
    assert combined == pytest.approx(expected_combined)


def test_va_claim_params_reproduce_no_haircut():
    """params='claim' must apply zero haircuts (all factors = 1.0),
    reproducing the original Genesys ROI-doc assumption."""
    site = _small_site()
    df_claim = calculate_va_deflection_benefit(
        [site],
        FeatureScope("Voice Bot", ["Small"], deflection_target=0.35),
        "realistic",
        year=1,
        params="claim",
    )
    df_realistic = calculate_va_deflection_benefit(
        [site],
        FeatureScope("Voice Bot", ["Small"], deflection_target=0.35),
        "realistic",
        year=1,
        params="realistic",
    )
    # claim should be strictly higher (no haircuts applied)
    assert df_claim["annual_value"].sum() > df_realistic["annual_value"].sum()

    # claim realization_factor = 1.0 × 1.0 × (1 − 0.0) = 1.0
    expected_claim = 10_000 * 12 * 0.35 * 300 * 0.50 * 1.0 * 0.01
    assert df_claim["annual_value"].sum() == pytest.approx(expected_claim)