palladium/studies/202512_GenesysCX/ctm-token-calculator/app/streamlit_app.py

"""
NTT DATA — CTM Token Calculator (Streamlit).

Run from the ctm-token-calculator root::

    streamlit run app/streamlit_app.py

Thin presentation layer over ``tokencalc`` — all math lives in the
library, shared with the JupyterLab notebook.
"""

from __future__ import annotations

import dataclasses
import io
import json
import sys
from pathlib import Path

# Import tokencalc from the project root without install
_ROOT = Path(__file__).resolve().parent.parent
if str(_ROOT) not in sys.path:
    sys.path.insert(0, str(_ROOT))

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st

import tokencalc.scenarios as tc_scenarios
from tokencalc import (
    CONTRACTED_NAMED_USERS,
    CTM_DEFAULT_FEATURE_SCOPES,
    CTM_DEFAULT_SITES,
    CTM_DEFAULT_TAKEOUTS,
    DEFAULT_METERS,
    DEFAULT_PRICING,
    Confidence,
    CostTakeout,
    FeatureScope,
    SiteInput,
    build_business_case,
    calculate_total_benefit,
    calculate_total_cost,
    export_excel,
    get_scenario,
    meters_dataframe,
    scenario_state_from_json,
    scenario_state_to_json,
    sites_dataframe,
)

st.set_page_config(page_title="NTT DATA — CTM Token Calculator",
                   page_icon="🧮", layout="wide")

YEARS = (1, 2, 3)
FEATURES = list(DEFAULT_METERS)
_DEFAULT_REALISTIC = {
    k: v["realistic"] for k, v in tc_scenarios.BENEFIT_PARAMS.items()
}


# ── State ────────────────────────────────────────────────────────────

def _init_state(force: bool = False) -> None:
    if force or "sites" not in st.session_state:
        st.session_state.sites = list(CTM_DEFAULT_SITES)
        st.session_state.takeouts = list(CTM_DEFAULT_TAKEOUTS)
        st.session_state.scopes = [
            dataclasses.replace(s) for s in CTM_DEFAULT_FEATURE_SCOPES
        ]
        st.session_state.meters = dict(DEFAULT_METERS)
        st.session_state.pricing = dict(DEFAULT_PRICING)
        st.session_state.use_contracted = False
        st.session_state.implementation_cost = 0.0
        for k, v in _DEFAULT_REALISTIC.items():  # reset benefit sliders
            tc_scenarios.BENEFIT_PARAMS[k]["realistic"] = v


_init_state()


def _state_key() -> str:
    """Stable serialization of inputs for st.cache_data keys."""
    return scenario_state_to_json(
        st.session_state.sites, st.session_state.takeouts, st.session_state.scopes
    ) + json.dumps(
        {
            "params": {k: v["realistic"] for k, v in tc_scenarios.BENEFIT_PARAMS.items()},
            "contracted": st.session_state.use_contracted,
            "impl": st.session_state.implementation_cost,
            "meters": {f: m.tokens_per_unit for f, m in st.session_state.meters.items()},
            "pricing": {
                r: (p.list_rate_per_token, p.contracted_rate_per_token)
                for r, p in st.session_state.pricing.items()
            },
        }
    )


@st.cache_data(show_spinner=False)
def _cached_case(state_key: str, scenario: str) -> dict:
    return build_business_case(
        st.session_state.sites, st.session_state.scopes,
        st.session_state.meters, st.session_state.pricing,
        st.session_state.takeouts, scenario,
        implementation_cost=st.session_state.implementation_cost,
        use_contracted=st.session_state.use_contracted,
    )


def _case(scenario: str) -> dict:
    return _cached_case(_state_key(), scenario)


# ── Sidebar ──────────────────────────────────────────────────────────

st.sidebar.title("NTT DATA — CTM Token Calculator")
page = st.sidebar.radio("Page", [
    "1. Inputs", "2. Token Meters", "3. Cost Model", "4. Benefit Model",
    "5. Business Case", "6. Sensitivity Analysis", "7. Export",
])
st.sidebar.divider()
scenario_name = st.sidebar.radio(
    "Scenario", ["floor", "realistic", "stretch"], index=1, horizontal=True
)
year = st.sidebar.radio("Year", YEARS, horizontal=True)
if st.sidebar.button("Reset to CTM defaults"):
    _init_state(force=True)
    st.cache_data.clear()
    st.rerun()
st.sidebar.caption(
    "⚠️ Planning tool — published list rates unless overridden; "
    "not contractual pricing."
)

sites: list[SiteInput] = st.session_state.sites
scopes: list[FeatureScope] = st.session_state.scopes
meters = st.session_state.meters
pricing = st.session_state.pricing
scenario = get_scenario(scenario_name)


def _users_warning() -> None:
    total = sum(s.named_users for s in sites)
    if total != CONTRACTED_NAMED_USERS:
        st.warning(
            f"Named users across sites = {total:,} ≠ contracted licence "
            f"count {CONTRACTED_NAMED_USERS:,}."
        )


# ── Page 1: Inputs ───────────────────────────────────────────────────

if page == "1. Inputs":
    st.header("Inputs")
    st.caption("Site data outside NAM is **estimated — confirm with CTM data**.")
    _users_warning()

    df = sites_dataframe(sites)
    edited = st.data_editor(df, num_rows="dynamic", key="sites_editor")
    if st.button("Apply site changes"):
        try:
            st.session_state.sites = [
                SiteInput(
                    **{
                        **row,
                        "languages": [
                            x.strip() for x in str(row["languages"]).split(",") if x.strip()
                        ],
                    }
                )
                for row in edited.to_dict("records")
            ]
            st.cache_data.clear()
            st.success("Sites updated.")
            st.rerun()
        except (ValueError, TypeError) as e:
            st.error(f"Validation failed: {e}")

    st.subheader("Cost takeouts")
    tdf = pd.DataFrame(
        [
            {"name": t.name, "annual_cost": t.annual_cost,
             "start_year": t.start_year, "confidence": t.confidence.value,
             "notes": t.notes}
            for t in st.session_state.takeouts
        ]
    )
    tedit = st.data_editor(
        tdf, num_rows="dynamic", key="takeouts_editor",
        column_config={
            "confidence": st.column_config.SelectboxColumn(
                options=[c.value for c in Confidence]
            )
        },
    )
    if st.button("Apply takeout changes"):
        try:
            st.session_state.takeouts = [
                CostTakeout(
                    name=r["name"], annual_cost=float(r["annual_cost"] or 0),
                    start_year=int(r["start_year"] or 1),
                    confidence=Confidence(r["confidence"]), notes=r["notes"] or "",
                )
                for r in tedit.to_dict("records")
            ]
            st.cache_data.clear()
            st.success("Takeouts updated.")
            st.rerun()
        except (ValueError, TypeError) as e:
            st.error(f"Validation failed: {e}")

    st.subheader("Save / load scenario")
    col1, col2 = st.columns(2)
    with col1:
        st.download_button(
            "Download scenario JSON",
            scenario_state_to_json(sites, st.session_state.takeouts, scopes),
            file_name="ctm_scenario.json", mime="application/json",
        )
    with col2:
        up = st.file_uploader("Load scenario JSON", type="json")
        if up is not None and st.button("Load"):
            s, t, sc = scenario_state_from_json(up.read().decode())
            st.session_state.sites, st.session_state.takeouts = s, t
            st.session_state.scopes = sc
            st.cache_data.clear()
            st.success("Scenario loaded.")
            st.rerun()

# ── Page 2: Token Meters ─────────────────────────────────────────────

elif page == "2. Token Meters":
    st.header("Token Meters")
    st.dataframe(meters_dataframe(meters), width="stretch", hide_index=True)

    st.subheader("Override a meter rate")
    feature = st.selectbox("Feature", FEATURES)
    m = meters[feature]
    override = st.toggle("Override default", key=f"ovr_{feature}")
    if override:
        new_rate = st.number_input(
            "tokens per unit (per user/month for per-user meters)",
            value=float(m.tokens_per_unit), min_value=0.0, step=0.005,
            format="%.4f",
        )
        if st.button("Apply override"):
            meters[feature] = dataclasses.replace(
                m,
                tokens_per_unit=new_rate,
                units_per_token=(1 / new_rate if new_rate and m.units_per_token else 0.0),
                confidence=Confidence.ESTIMATED,
                notes=m.notes + " [rate overridden by user]",
            )
            st.cache_data.clear()
            st.success(f"{feature} now {new_rate} tokens/unit (flagged estimated).")

    st.subheader("Token pricing per region")
    st.session_state.use_contracted = st.toggle(
        "Apply contracted rate (if known) instead of list rate",
        value=st.session_state.use_contracted,
    )
    for region, p in pricing.items():
        c1, c2 = st.columns(2)
        with c1:
            lr = st.number_input(
                f"{region} — list $/token", value=float(p.list_rate_per_token),
                min_value=0.0, key=f"list_{region}",
            )
        with c2:
            cr = st.number_input(
                f"{region} — contracted $/token (0 = unknown)",
                value=float(p.contracted_rate_per_token or 0.0),
                min_value=0.0, key=f"con_{region}",
            )
        pricing[region] = dataclasses.replace(
            p, list_rate_per_token=lr,
            contracted_rate_per_token=cr or None,
        )

# ── Page 3: Cost Model ───────────────────────────────────────────────

elif page == "3. Cost Model":
    st.header("Cost Model")
    _users_warning()

    st.subheader("Feature enablement & phasing")
    st.caption("Phase = model year the feature switches on at that site; 0 = off.")
    site_names = [s.site_name for s in sites]
    matrix = pd.DataFrame(0, index=site_names, columns=FEATURES, dtype=int)
    for sc in scopes:
        for sn in sc.enabled_sites:
            if sn in matrix.index:
                matrix.loc[sn, sc.feature] = sc.phase
    edited_matrix = st.data_editor(matrix, key="phasing_matrix")
    if st.button("Apply phasing"):
        new_scopes: list[FeatureScope] = []
        for feature in FEATURES:
            for phase in (1, 2, 3):
                enabled = [sn for sn in site_names
                           if int(edited_matrix.loc[sn, feature]) == phase]
                if enabled:
                    template = next(
                        (s for s in scopes if s.feature == feature), None
                    )
                    new_scopes.append(
                        FeatureScope(
                            feature, enabled, phase=phase,
                            adoption_curve=(
                                template.adoption_curve if template else {}
                            ),
                            deflection_target=(
                                template.deflection_target if template else None
                            ),
                            eligibility_pct=(
                                template.eligibility_pct if template else None
                            ),
                        )
                    )
        st.session_state.scopes = new_scopes
        st.cache_data.clear()
        st.success("Phasing updated.")
        st.rerun()

    frames = []
    for y in YEARS:
        d = calculate_total_cost(
            sites, scopes, meters, pricing, scenario, y,
            use_contracted=st.session_state.use_contracted,
        )
        d["year"] = f"Y{y}"
        frames.append(d)
    cost_3y = pd.concat(frames, ignore_index=True)

    this_year = frames[year - 1]
    total = this_year["annual_cost"].sum()
    unknown = this_year[this_year["confidence"] == "unknown"]["annual_cost"].sum()
    c1, c2 = st.columns(2)
    c1.metric(f"Year {year} total cost ({scenario_name})", f"${total:,.0f}")
    c2.metric("of which 🔴 unknown-rate features", f"${unknown:,.0f}",
              help="Range driven by unsourced meter rates — total could move "
                   "materially once these are confirmed.")

    st.plotly_chart(
        px.bar(cost_3y, x="year", y="annual_cost", color="cost_line",
               title=f"Cost breakdown by feature — {scenario_name}",
               labels={"annual_cost": "$/yr"}),
        width="stretch", key="cost_stack",
    )
    icon_map = {c.value: c.icon for c in Confidence}
    show = this_year.copy()
    show["confidence"] = show["confidence"].map(
        lambda v: f"{icon_map.get(v, '')} {v}"
    )
    st.dataframe(show.sort_values("annual_cost", ascending=False),
                 width="stretch", hide_index=True)

# ── Page 4: Benefit Model ────────────────────────────────────────────

elif page == "4. Benefit Model":
    st.header("Benefit Model")
    st.caption("Sliders adjust the pressure-tested (realistic) parameters; "
               "the Genesys-claim figures stay fixed for comparison.")

    cols = st.columns(3)
    for i, (key, vals) in enumerate(tc_scenarios.BENEFIT_PARAMS.items()):
        with cols[i % 3]:
            tc_scenarios.BENEFIT_PARAMS[key]["realistic"] = st.slider(
                key.replace("_", " "),
                0.0, max(1.0, vals["claim"]),
                value=float(vals["realistic"]), step=0.005, format="%.3f",
                key=f"bp_{key}",
            )

    frames = []
    for y in YEARS:
        d = calculate_total_benefit(sites, scopes, scenario, y, params="realistic")
        d["year"] = f"Y{y}"
        frames.append(d)
    ben_3y = pd.concat(frames, ignore_index=True)

    st.metric(f"Year {year} total benefit ({scenario_name})",
              f"${frames[year - 1]['annual_value'].sum():,.0f}")
    st.plotly_chart(
        px.bar(ben_3y, x="year", y="annual_value", color="benefit_line",
               title=f"Benefit breakdown by source — {scenario_name}",
               labels={"annual_value": "$/yr"}),
        width="stretch", key="benefit_stack",
    )

    claim = calculate_total_benefit(sites, scopes, scenario, year, params="claim")
    realistic = frames[year - 1]
    comp = pd.merge(
        claim[["benefit_line", "annual_value"]].rename(
            columns={"annual_value": "Genesys claim"}),
        realistic[["benefit_line", "annual_value"]].rename(
            columns={"annual_value": "Pressure-tested"}),
        on="benefit_line", how="outer",
    ).fillna(0)
    fig = go.Figure([
        go.Bar(name="Genesys claim", x=comp.benefit_line, y=comp["Genesys claim"]),
        go.Bar(name="Pressure-tested realistic", x=comp.benefit_line,
               y=comp["Pressure-tested"]),
    ])
    fig.update_layout(barmode="group", yaxis_tickformat="$,.0f",
                      title=f"Genesys claim vs pressure-tested — Year {year}")
    st.plotly_chart(fig, width="stretch", key="claim_vs_real")

# ── Page 5: Business Case ────────────────────────────────────────────

elif page == "5. Business Case":
    st.header("Business Case")
    st.session_state.implementation_cost = st.number_input(
        "One-off implementation cost (amortized over 3 years)",
        value=float(st.session_state.implementation_cost), min_value=0.0,
        step=50_000.0,
    )
    case = _case(scenario_name)

    pb = case["payback_period_years"]
    c1, c2, c3 = st.columns(3)
    c1.metric("NPV @ 8%", f"${case['npv']:,.0f}")
    c2.metric("Payback", f"{pb:.2f} yrs" if pb is not None else "never")
    c3.metric("3-Year ROI", f"{case['roi_3yr']:.0%}" if case["roi_3yr"] else "n/a")

    pnl = pd.concat(
        [
            case["cost_by_year"].drop(columns="confidence"),
            case["takeouts_by_year"].drop(columns="confidence"),
            case["benefit_by_year"].drop(columns="confidence"),
            case["net_by_year"],
        ],
        ignore_index=True,
    )
    pnl["3-yr Total"] = pnl[["Y1", "Y2", "Y3"]].sum(axis=1)
    st.dataframe(
        pnl, width="stretch", hide_index=True,
        column_config={
            c: st.column_config.NumberColumn(c, format="$%,.0f")
            for c in ("Y1", "Y2", "Y3", "3-yr Total")
        },
    )

    fig = go.Figure()
    for name in ("floor", "realistic", "stretch"):
        c = _case(name)
        fig.add_scatter(
            x=c["cumulative_net"].year, y=c["cumulative_net"].cumulative_net,
            mode="lines+markers", name=name.capitalize(),
        )
    fig.update_layout(title="Cumulative net cash flow by scenario",
                      xaxis_title="Year", yaxis_tickformat="$,.0f")
    st.plotly_chart(fig, width="stretch", key="cum_net")

# ── Page 6: Sensitivity ──────────────────────────────────────────────

elif page == "6. Sensitivity Analysis":
    st.header("Sensitivity Analysis")
    base_npv = _case(scenario_name)["npv"]
    st.caption(f"Base 3-yr NPV ({scenario_name}): ${base_npv:,.0f}")

    def _npv_with(**overrides) -> float:
        sc = dataclasses.replace(scenario, **overrides)
        return build_business_case(
            sites, scopes, meters, pricing, st.session_state.takeouts, sc,
            implementation_cost=st.session_state.implementation_cost,
            use_contracted=st.session_state.use_contracted,
        )["npv"]

    drivers = [
        "voice_bot_deflection", "voice_bot_avg_minutes", "agentic_va_deflection",
        "voice_summarization_eligibility", "voice_knowledge_eligibility",
        "email_auto_respond_rate", "email_auto_suggest_acceptance",
    ]
    rows = []
    for d in drivers:
        base_v = getattr(scenario, d)
        lo = base_v * 0.75 if d == "voice_bot_avg_minutes" else min(base_v * 0.75, 1.0)
        hi = base_v * 1.25 if d == "voice_bot_avg_minutes" else min(base_v * 1.25, 1.0)
        rows.append({"driver": d,
                     "low": _npv_with(**{d: lo}) - base_npv,
                     "high": _npv_with(**{d: hi}) - base_npv})
    torn = pd.DataFrame(rows)
    torn["swing"] = (torn.high - torn.low).abs()
    torn = torn.sort_values("swing")
    fig = go.Figure([
        go.Bar(y=torn.driver, x=torn.low, orientation="h", name="-25%"),
        go.Bar(y=torn.driver, x=torn.high, orientation="h", name="+25%"),
    ])
    fig.update_layout(barmode="overlay", title="Tornado — NPV impact of ±25%",
                      xaxis_tickformat="$,.0f")
    st.plotly_chart(fig, width="stretch", key="tornado")

    st.subheader("Two-variable heatmap")
    xs = np.linspace(0.0, 0.50, 6)   # Email Auto-Respond rate
    ys = np.linspace(0.0, 0.25, 6)   # Agentic VA deflection
    z = [[_npv_with(email_auto_respond_rate=float(x),
                    agentic_va_deflection=float(yv)) for x in xs] for yv in ys]
    fig = go.Figure(go.Heatmap(
        x=[f"{x:.0%}" for x in xs], y=[f"{yv:.0%}" for yv in ys], z=z,
        colorbar={"title": "3-yr NPV"},
    ))
    fig.update_layout(title="NPV: Email Auto-Respond rate × Agentic VA deflection",
                      xaxis_title="Email Auto-Respond rate",
                      yaxis_title="Agentic VA deflection")
    st.plotly_chart(fig, width="stretch", key="heatmap")

    st.subheader("Break-even finder")
    rates = np.linspace(0.0, 0.50, 26)
    npvs = [_npv_with(email_auto_respond_rate=float(r)) for r in rates]
    breakeven = next((r for r, v in zip(rates, npvs) if v >= 0), None)
    if npvs[0] >= 0:
        st.success(f"Case is NPV-positive even at 0% Auto-Respond "
                   f"(${npvs[0]:,.0f}).")
    elif breakeven is not None:
        st.info(f"Break-even at ~{breakeven:.0%} email Auto-Respond rate.")
    else:
        st.error("No break-even within 0–50% Auto-Respond.")
    st.plotly_chart(
        px.line(x=rates, y=npvs,
                labels={"x": "Email Auto-Respond rate", "y": "3-yr NPV ($)"}),
        width="stretch", key="breakeven",
    )

# ── Page 7: Export ───────────────────────────────────────────────────

elif page == "7. Export":
    st.header("Export")
    case = _case(scenario_name)
    cost_frames, ben_frames = [], []
    for y in YEARS:
        d = calculate_total_cost(sites, scopes, meters, pricing, scenario, y,
                                 use_contracted=st.session_state.use_contracted)
        d["year"] = f"Y{y}"
        cost_frames.append(d)
        b = calculate_total_benefit(sites, scopes, scenario, y)
        b["year"] = f"Y{y}"
        ben_frames.append(b)

    comparison = pd.DataFrame([
        {"scenario": n, "NPV": _case(n)["npv"],
         "payback_years": _case(n)["payback_period_years"],
         "roi_3yr": _case(n)["roi_3yr"]}
        for n in ("floor", "realistic", "stretch")
    ])

    pnl = pd.concat(
        [case["cost_by_year"].drop(columns="confidence"),
         case["takeouts_by_year"].drop(columns="confidence"),
         case["benefit_by_year"].drop(columns="confidence"),
         case["net_by_year"]],
        ignore_index=True,
    )

    buf = io.BytesIO()
    with pd.ExcelWriter(buf, engine="openpyxl") as writer:
        sites_dataframe(sites).to_excel(writer, sheet_name="Inputs", index=False)
        meters_dataframe(meters).to_excel(writer, sheet_name="Meters", index=False)
        pd.concat(cost_frames).to_excel(writer, sheet_name="Cost detail", index=False)
        pd.concat(ben_frames).to_excel(writer, sheet_name="Benefit detail", index=False)
        pnl.to_excel(writer, sheet_name="Business case", index=False)
        comparison.to_excel(writer, sheet_name="Scenario comparison", index=False)
    st.download_button(
        "⬇️ Download Excel workbook",
        buf.getvalue(),
        file_name=f"ctm_token_calculator_{scenario_name}.xlsx",
        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    )
    st.download_button(
        "⬇️ Download scenario JSON",
        scenario_state_to_json(sites, st.session_state.takeouts, scopes),
        file_name="ctm_scenario.json", mime="application/json",
    )
    st.dataframe(comparison, width="stretch", hide_index=True)