Files
2026-06-10 14:28:16 -04:00

240 lines
8.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Benefit engine."""
from __future__ import annotations
import pytest
from tokencalc.benefit_model import (
calculate_acw_summarization_benefit,
calculate_email_ai_benefit,
calculate_total_benefit,
calculate_va_deflection_benefit,
)
from tokencalc.defaults import CTM_DEFAULT_FEATURE_SCOPES, CTM_DEFAULT_SITES
from tokencalc.inputs import WORKING_SECONDS_PER_YEAR, FeatureScope, SiteInput
from tokencalc.scenarios import BENEFIT_PARAMS
ALL_SITES = [s.site_name for s in CTM_DEFAULT_SITES]
def _small_site() -> SiteInput:
return SiteInput(
"Small", "US", agents=10, supervisors=1,
voice_volume_monthly=10_000, email_volume_monthly=1_000,
chat_volume_monthly=0, sms_volume_monthly=0,
voice_aht_seconds=300, email_aht_seconds=600,
chat_aht_seconds=480, voice_acw_seconds=60,
fully_loaded_agent_cost_annual=74_880, # → $0.01/second exactly
fully_loaded_supervisor_cost_annual=95_000,
)
def test_acw_benefit_hand_check():
"""10,000 calls × 12 × 70% eligible × 60s ACW × 40% reduction ×
50% Y1 realization × $0.01/s = $10,080."""
site = _small_site()
assert site.agent_cost_per_second == pytest.approx(0.01)
df = calculate_acw_summarization_benefit(
[site], FeatureScope("Agent Copilot", ["Small"]), "realistic", year=1,
)
expected = 10_000 * 12 * 0.70 * 60 * 0.40 * 0.50 * 0.01
assert df["annual_value"].sum() == pytest.approx(expected)
def test_email_benefit_split():
site = _small_site()
df = calculate_email_ai_benefit(
[site], FeatureScope("Email AI (Auto-Respond)", ["Small"]),
"realistic", year=1,
)
lines = set(df["benefit_line"])
assert lines == {
"Email Auto-Respond (displaced handling)",
"Email Auto-Suggest (drafting time)",
}
# auto-respond: 1,000×12 × 20% × 600s × 50% × $0.01 = $7,200
respond = df[df["benefit_line"].str.contains("Respond")]["annual_value"].sum()
assert respond == pytest.approx(7_200)
def test_scenarios_produce_distinct_benefits():
totals = {
name: calculate_total_benefit(
CTM_DEFAULT_SITES, CTM_DEFAULT_FEATURE_SCOPES, name, year=2
)["annual_value"].sum()
for name in ("floor", "realistic", "stretch")
}
assert totals["floor"] < totals["realistic"] < totals["stretch"]
def test_claim_exceeds_realistic():
realistic = calculate_total_benefit(
CTM_DEFAULT_SITES, CTM_DEFAULT_FEATURE_SCOPES, "realistic", year=1,
params="realistic",
)["annual_value"].sum()
claim = calculate_total_benefit(
CTM_DEFAULT_SITES, CTM_DEFAULT_FEATURE_SCOPES, "realistic", year=1,
params="claim",
)["annual_value"].sum()
assert claim > realistic
def test_benefits_ramp_by_year():
by_year = [
calculate_total_benefit(
CTM_DEFAULT_SITES, CTM_DEFAULT_FEATURE_SCOPES, "realistic", year=y
)["annual_value"].sum()
for y in (1, 2, 3)
]
assert by_year[0] < by_year[1] < by_year[2]
def test_zero_volume_site_is_safe():
site = SiteInput(
"Empty", "US", agents=0, supervisors=0,
voice_volume_monthly=0, email_volume_monthly=0,
chat_volume_monthly=0, sms_volume_monthly=0,
voice_aht_seconds=300, email_aht_seconds=600,
chat_aht_seconds=480, voice_acw_seconds=0,
fully_loaded_agent_cost_annual=0,
fully_loaded_supervisor_cost_annual=0,
)
df = calculate_total_benefit(
[site], [FeatureScope("Agent Copilot", ["Empty"])], "realistic", year=1,
)
assert df["annual_value"].sum() == 0
def test_working_seconds_constant():
assert WORKING_SECONDS_PER_YEAR == 2_080 * 3_600
# ── Virtual Agent deflection tests ───────────────────────────────────────────
def test_va_bot_deflection_hand_check():
"""Voice Bot: 10,000 calls/mo × 12 × 35% bot_rate × 300s AHT
× 50% Y1 realization × realization_factor × $0.01/s.
realistic realization_factor = 0.70 × 0.80 × (1 0.05) = 0.532
"""
site = _small_site()
df = calculate_va_deflection_benefit(
[site],
FeatureScope("Voice Bot", ["Small"], deflection_target=0.35),
"realistic",
year=1,
params="realistic",
)
completion = BENEFIT_PARAMS["va_completion_rate"]["realistic"]
labour = BENEFIT_PARAMS["va_labour_realization"]["realistic"]
callback = BENEFIT_PARAMS["va_callback_discount"]["realistic"]
real_factor = completion * labour * (1.0 - callback)
expected = (
10_000 * 12 # annual calls
* 0.35 # bot deflection rate
* 300 # AHT seconds
* 0.50 # Y1 scenario realization
* real_factor # completion × labour × (1 callback)
* 0.01 # labour rate per second
)
assert df["annual_value"].sum() == pytest.approx(expected)
def test_va_agentic_deflection_uses_residual():
"""Agentic VA must operate on the residual (1 bot_rate) call pool,
not the full volume.
With bot_rate=0.35 and va_rate=0.15:
residual = 10,000 × (1 0.35) = 6,500 calls/mo
va_deflected = 6,500 × 0.15 = 975 calls/mo
"""
site = _small_site()
df = calculate_va_deflection_benefit(
[site],
FeatureScope("Agentic Virtual Agent", ["Small"], deflection_target=0.15),
"realistic",
year=1,
params="realistic",
)
completion = BENEFIT_PARAMS["va_completion_rate"]["realistic"]
labour = BENEFIT_PARAMS["va_labour_realization"]["realistic"]
callback = BENEFIT_PARAMS["va_callback_discount"]["realistic"]
real_factor = completion * labour * (1.0 - callback)
# realistic scenario: voice_bot_deflection = 0.35
bot_rate = 0.35
va_rate = 0.15
expected = (
10_000 * 12 # annual calls
* (1.0 - bot_rate) * va_rate # residual × va_rate (layered)
* 300 # AHT seconds
* 0.50 # Y1 scenario realization
* real_factor
* 0.01
)
assert df["annual_value"].sum() == pytest.approx(expected)
def test_va_no_double_count():
"""Combined bot + VA benefit must be less than the naive additive sum.
Naive (wrong): volume × (bot_rate + va_rate) × AHT × ...
Correct (layered): volume × (bot_rate + (1bot_rate)×va_rate) × AHT × ...
With bot=35%, va=15%:
naive total deflection = 50%
layered total deflection = 35% + 65%×15% = 44.75%
"""
site = _small_site()
bot_scope = FeatureScope("Voice Bot", ["Small"], deflection_target=0.35)
va_scope = FeatureScope("Agentic Virtual Agent", ["Small"], deflection_target=0.15)
bot_df = calculate_va_deflection_benefit([site], bot_scope, "realistic", year=1)
va_df = calculate_va_deflection_benefit([site], va_scope, "realistic", year=1)
combined = bot_df["annual_value"].sum() + va_df["annual_value"].sum()
# Naive additive (the old broken model): both on full volume
completion = BENEFIT_PARAMS["va_completion_rate"]["realistic"]
labour = BENEFIT_PARAMS["va_labour_realization"]["realistic"]
callback = BENEFIT_PARAMS["va_callback_discount"]["realistic"]
real_factor = completion * labour * (1.0 - callback)
naive = (
10_000 * 12 * (0.35 + 0.15) * 300 * 0.50 * real_factor * 0.01
)
assert combined < naive, (
f"Combined layered benefit ({combined:.2f}) should be less than "
f"naive additive ({naive:.2f}) — double-count not fixed"
)
# Also verify the exact layered total
layered_deflection = 0.35 + (1.0 - 0.35) * 0.15 # = 0.4475
expected_combined = (
10_000 * 12 * layered_deflection * 300 * 0.50 * real_factor * 0.01
)
assert combined == pytest.approx(expected_combined)
def test_va_claim_params_reproduce_no_haircut():
"""params='claim' must apply zero haircuts (all factors = 1.0),
reproducing the original Genesys ROI-doc assumption."""
site = _small_site()
df_claim = calculate_va_deflection_benefit(
[site],
FeatureScope("Voice Bot", ["Small"], deflection_target=0.35),
"realistic",
year=1,
params="claim",
)
df_realistic = calculate_va_deflection_benefit(
[site],
FeatureScope("Voice Bot", ["Small"], deflection_target=0.35),
"realistic",
year=1,
params="realistic",
)
# claim should be strictly higher (no haircuts applied)
assert df_claim["annual_value"].sum() > df_realistic["annual_value"].sum()
# claim realization_factor = 1.0 × 1.0 × (1 0.0) = 1.0
expected_claim = 10_000 * 12 * 0.35 * 300 * 0.50 * 1.0 * 0.01
assert df_claim["annual_value"].sum() == pytest.approx(expected_claim)