feat: add GenesysCX study and fix Streamlit chart key collisions

- Add 202512_GenesysCX TEI study (config, seed data, notebooks, README) with NPV $10.8M / ROI 266% including AI-token cost line - Add explicit `key` parameter to all chart wrappers in app/components to prevent StreamlitDuplicateElementId errors when the same figure type renders across Summary/Benefits/Costs tabs - Render benefits bar and cost pie charts on their respective tabs - Add benefits_vs_costs_by_year chart wrapper
2026-06-10 14:26:49 -04:00
parent ecd164ee6d
commit 64fb83257d
34 changed files with 12902 additions and 39 deletions
--- a/studies/202512_GenesysCX/ctm-token-calculator/app/streamlit_app.py
+++ b/studies/202512_GenesysCX/ctm-token-calculator/app/streamlit_app.py
@@ -0,0 +1,576 @@
+"""
+NTT DATA — CTM Token Calculator (Streamlit).
+
+Run from the ctm-token-calculator root::
+
+    streamlit run app/streamlit_app.py
+
+Thin presentation layer over ``tokencalc`` — all math lives in the
+library, shared with the JupyterLab notebook.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import io
+import json
+import sys
+from pathlib import Path
+
+# Import tokencalc from the project root without install
+_ROOT = Path(__file__).resolve().parent.parent
+if str(_ROOT) not in sys.path:
+    sys.path.insert(0, str(_ROOT))
+
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import streamlit as st
+
+import tokencalc.scenarios as tc_scenarios
+from tokencalc import (
+    CONTRACTED_NAMED_USERS,
+    CTM_DEFAULT_FEATURE_SCOPES,
+    CTM_DEFAULT_SITES,
+    CTM_DEFAULT_TAKEOUTS,
+    DEFAULT_METERS,
+    DEFAULT_PRICING,
+    Confidence,
+    CostTakeout,
+    FeatureScope,
+    SiteInput,
+    build_business_case,
+    calculate_total_benefit,
+    calculate_total_cost,
+    export_excel,
+    get_scenario,
+    meters_dataframe,
+    scenario_state_from_json,
+    scenario_state_to_json,
+    sites_dataframe,
+)
+
+st.set_page_config(page_title="NTT DATA — CTM Token Calculator",
+                   page_icon="🧮", layout="wide")
+
+YEARS = (1, 2, 3)
+FEATURES = list(DEFAULT_METERS)
+_DEFAULT_REALISTIC = {
+    k: v["realistic"] for k, v in tc_scenarios.BENEFIT_PARAMS.items()
+}
+
+
+# ── State ────────────────────────────────────────────────────────────
+
+def _init_state(force: bool = False) -> None:
+    if force or "sites" not in st.session_state:
+        st.session_state.sites = list(CTM_DEFAULT_SITES)
+        st.session_state.takeouts = list(CTM_DEFAULT_TAKEOUTS)
+        st.session_state.scopes = [
+            dataclasses.replace(s) for s in CTM_DEFAULT_FEATURE_SCOPES
+        ]
+        st.session_state.meters = dict(DEFAULT_METERS)
+        st.session_state.pricing = dict(DEFAULT_PRICING)
+        st.session_state.use_contracted = False
+        st.session_state.implementation_cost = 0.0
+        for k, v in _DEFAULT_REALISTIC.items():  # reset benefit sliders
+            tc_scenarios.BENEFIT_PARAMS[k]["realistic"] = v
+
+
+_init_state()
+
+
+def _state_key() -> str:
+    """Stable serialization of inputs for st.cache_data keys."""
+    return scenario_state_to_json(
+        st.session_state.sites, st.session_state.takeouts, st.session_state.scopes
+    ) + json.dumps(
+        {
+            "params": {k: v["realistic"] for k, v in tc_scenarios.BENEFIT_PARAMS.items()},
+            "contracted": st.session_state.use_contracted,
+            "impl": st.session_state.implementation_cost,
+            "meters": {f: m.tokens_per_unit for f, m in st.session_state.meters.items()},
+            "pricing": {
+                r: (p.list_rate_per_token, p.contracted_rate_per_token)
+                for r, p in st.session_state.pricing.items()
+            },
+        }
+    )
+
+
+@st.cache_data(show_spinner=False)
+def _cached_case(state_key: str, scenario: str) -> dict:
+    return build_business_case(
+        st.session_state.sites, st.session_state.scopes,
+        st.session_state.meters, st.session_state.pricing,
+        st.session_state.takeouts, scenario,
+        implementation_cost=st.session_state.implementation_cost,
+        use_contracted=st.session_state.use_contracted,
+    )
+
+
+def _case(scenario: str) -> dict:
+    return _cached_case(_state_key(), scenario)
+
+
+# ── Sidebar ──────────────────────────────────────────────────────────
+
+st.sidebar.title("NTT DATA — CTM Token Calculator")
+page = st.sidebar.radio("Page", [
+    "1. Inputs", "2. Token Meters", "3. Cost Model", "4. Benefit Model",
+    "5. Business Case", "6. Sensitivity Analysis", "7. Export",
+])
+st.sidebar.divider()
+scenario_name = st.sidebar.radio(
+    "Scenario", ["floor", "realistic", "stretch"], index=1, horizontal=True
+)
+year = st.sidebar.radio("Year", YEARS, horizontal=True)
+if st.sidebar.button("Reset to CTM defaults"):
+    _init_state(force=True)
+    st.cache_data.clear()
+    st.rerun()
+st.sidebar.caption(
+    "⚠️ Planning tool — published list rates unless overridden; "
+    "not contractual pricing."
+)
+
+sites: list[SiteInput] = st.session_state.sites
+scopes: list[FeatureScope] = st.session_state.scopes
+meters = st.session_state.meters
+pricing = st.session_state.pricing
+scenario = get_scenario(scenario_name)
+
+
+def _users_warning() -> None:
+    total = sum(s.named_users for s in sites)
+    if total != CONTRACTED_NAMED_USERS:
+        st.warning(
+            f"Named users across sites = {total:,} ≠ contracted licence "
+            f"count {CONTRACTED_NAMED_USERS:,}."
+        )
+
+
+# ── Page 1: Inputs ───────────────────────────────────────────────────
+
+if page == "1. Inputs":
+    st.header("Inputs")
+    st.caption("Site data outside NAM is **estimated — confirm with CTM data**.")
+    _users_warning()
+
+    df = sites_dataframe(sites)
+    edited = st.data_editor(df, num_rows="dynamic", key="sites_editor")
+    if st.button("Apply site changes"):
+        try:
+            st.session_state.sites = [
+                SiteInput(
+                    **{
+                        **row,
+                        "languages": [
+                            x.strip() for x in str(row["languages"]).split(",") if x.strip()
+                        ],
+                    }
+                )
+                for row in edited.to_dict("records")
+            ]
+            st.cache_data.clear()
+            st.success("Sites updated.")
+            st.rerun()
+        except (ValueError, TypeError) as e:
+            st.error(f"Validation failed: {e}")
+
+    st.subheader("Cost takeouts")
+    tdf = pd.DataFrame(
+        [
+            {"name": t.name, "annual_cost": t.annual_cost,
+             "start_year": t.start_year, "confidence": t.confidence.value,
+             "notes": t.notes}
+            for t in st.session_state.takeouts
+        ]
+    )
+    tedit = st.data_editor(
+        tdf, num_rows="dynamic", key="takeouts_editor",
+        column_config={
+            "confidence": st.column_config.SelectboxColumn(
+                options=[c.value for c in Confidence]
+            )
+        },
+    )
+    if st.button("Apply takeout changes"):
+        try:
+            st.session_state.takeouts = [
+                CostTakeout(
+                    name=r["name"], annual_cost=float(r["annual_cost"] or 0),
+                    start_year=int(r["start_year"] or 1),
+                    confidence=Confidence(r["confidence"]), notes=r["notes"] or "",
+                )
+                for r in tedit.to_dict("records")
+            ]
+            st.cache_data.clear()
+            st.success("Takeouts updated.")
+            st.rerun()
+        except (ValueError, TypeError) as e:
+            st.error(f"Validation failed: {e}")
+
+    st.subheader("Save / load scenario")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.download_button(
+            "Download scenario JSON",
+            scenario_state_to_json(sites, st.session_state.takeouts, scopes),
+            file_name="ctm_scenario.json", mime="application/json",
+        )
+    with col2:
+        up = st.file_uploader("Load scenario JSON", type="json")
+        if up is not None and st.button("Load"):
+            s, t, sc = scenario_state_from_json(up.read().decode())
+            st.session_state.sites, st.session_state.takeouts = s, t
+            st.session_state.scopes = sc
+            st.cache_data.clear()
+            st.success("Scenario loaded.")
+            st.rerun()
+
+# ── Page 2: Token Meters ─────────────────────────────────────────────
+
+elif page == "2. Token Meters":
+    st.header("Token Meters")
+    st.dataframe(meters_dataframe(meters), width="stretch", hide_index=True)
+
+    st.subheader("Override a meter rate")
+    feature = st.selectbox("Feature", FEATURES)
+    m = meters[feature]
+    override = st.toggle("Override default", key=f"ovr_{feature}")
+    if override:
+        new_rate = st.number_input(
+            "tokens per unit (per user/month for per-user meters)",
+            value=float(m.tokens_per_unit), min_value=0.0, step=0.005,
+            format="%.4f",
+        )
+        if st.button("Apply override"):
+            meters[feature] = dataclasses.replace(
+                m,
+                tokens_per_unit=new_rate,
+                units_per_token=(1 / new_rate if new_rate and m.units_per_token else 0.0),
+                confidence=Confidence.ESTIMATED,
+                notes=m.notes + " [rate overridden by user]",
+            )
+            st.cache_data.clear()
+            st.success(f"{feature} now {new_rate} tokens/unit (flagged estimated).")
+
+    st.subheader("Token pricing per region")
+    st.session_state.use_contracted = st.toggle(
+        "Apply contracted rate (if known) instead of list rate",
+        value=st.session_state.use_contracted,
+    )
+    for region, p in pricing.items():
+        c1, c2 = st.columns(2)
+        with c1:
+            lr = st.number_input(
+                f"{region} — list $/token", value=float(p.list_rate_per_token),
+                min_value=0.0, key=f"list_{region}",
+            )
+        with c2:
+            cr = st.number_input(
+                f"{region} — contracted $/token (0 = unknown)",
+                value=float(p.contracted_rate_per_token or 0.0),
+                min_value=0.0, key=f"con_{region}",
+            )
+        pricing[region] = dataclasses.replace(
+            p, list_rate_per_token=lr,
+            contracted_rate_per_token=cr or None,
+        )
+
+# ── Page 3: Cost Model ───────────────────────────────────────────────
+
+elif page == "3. Cost Model":
+    st.header("Cost Model")
+    _users_warning()
+
+    st.subheader("Feature enablement & phasing")
+    st.caption("Phase = model year the feature switches on at that site; 0 = off.")
+    site_names = [s.site_name for s in sites]
+    matrix = pd.DataFrame(0, index=site_names, columns=FEATURES, dtype=int)
+    for sc in scopes:
+        for sn in sc.enabled_sites:
+            if sn in matrix.index:
+                matrix.loc[sn, sc.feature] = sc.phase
+    edited_matrix = st.data_editor(matrix, key="phasing_matrix")
+    if st.button("Apply phasing"):
+        new_scopes: list[FeatureScope] = []
+        for feature in FEATURES:
+            for phase in (1, 2, 3):
+                enabled = [sn for sn in site_names
+                           if int(edited_matrix.loc[sn, feature]) == phase]
+                if enabled:
+                    template = next(
+                        (s for s in scopes if s.feature == feature), None
+                    )
+                    new_scopes.append(
+                        FeatureScope(
+                            feature, enabled, phase=phase,
+                            adoption_curve=(
+                                template.adoption_curve if template else {}
+                            ),
+                            deflection_target=(
+                                template.deflection_target if template else None
+                            ),
+                            eligibility_pct=(
+                                template.eligibility_pct if template else None
+                            ),
+                        )
+                    )
+        st.session_state.scopes = new_scopes
+        st.cache_data.clear()
+        st.success("Phasing updated.")
+        st.rerun()
+
+    frames = []
+    for y in YEARS:
+        d = calculate_total_cost(
+            sites, scopes, meters, pricing, scenario, y,
+            use_contracted=st.session_state.use_contracted,
+        )
+        d["year"] = f"Y{y}"
+        frames.append(d)
+    cost_3y = pd.concat(frames, ignore_index=True)
+
+    this_year = frames[year - 1]
+    total = this_year["annual_cost"].sum()
+    unknown = this_year[this_year["confidence"] == "unknown"]["annual_cost"].sum()
+    c1, c2 = st.columns(2)
+    c1.metric(f"Year {year} total cost ({scenario_name})", f"${total:,.0f}")
+    c2.metric("of which 🔴 unknown-rate features", f"${unknown:,.0f}",
+              help="Range driven by unsourced meter rates — total could move "
+                   "materially once these are confirmed.")
+
+    st.plotly_chart(
+        px.bar(cost_3y, x="year", y="annual_cost", color="cost_line",
+               title=f"Cost breakdown by feature — {scenario_name}",
+               labels={"annual_cost": "$/yr"}),
+        width="stretch", key="cost_stack",
+    )
+    icon_map = {c.value: c.icon for c in Confidence}
+    show = this_year.copy()
+    show["confidence"] = show["confidence"].map(
+        lambda v: f"{icon_map.get(v, '')} {v}"
+    )
+    st.dataframe(show.sort_values("annual_cost", ascending=False),
+                 width="stretch", hide_index=True)
+
+# ── Page 4: Benefit Model ────────────────────────────────────────────
+
+elif page == "4. Benefit Model":
+    st.header("Benefit Model")
+    st.caption("Sliders adjust the pressure-tested (realistic) parameters; "
+               "the Genesys-claim figures stay fixed for comparison.")
+
+    cols = st.columns(3)
+    for i, (key, vals) in enumerate(tc_scenarios.BENEFIT_PARAMS.items()):
+        with cols[i % 3]:
+            tc_scenarios.BENEFIT_PARAMS[key]["realistic"] = st.slider(
+                key.replace("_", " "),
+                0.0, max(1.0, vals["claim"]),
+                value=float(vals["realistic"]), step=0.005, format="%.3f",
+                key=f"bp_{key}",
+            )
+
+    frames = []
+    for y in YEARS:
+        d = calculate_total_benefit(sites, scopes, scenario, y, params="realistic")
+        d["year"] = f"Y{y}"
+        frames.append(d)
+    ben_3y = pd.concat(frames, ignore_index=True)
+
+    st.metric(f"Year {year} total benefit ({scenario_name})",
+              f"${frames[year - 1]['annual_value'].sum():,.0f}")
+    st.plotly_chart(
+        px.bar(ben_3y, x="year", y="annual_value", color="benefit_line",
+               title=f"Benefit breakdown by source — {scenario_name}",
+               labels={"annual_value": "$/yr"}),
+        width="stretch", key="benefit_stack",
+    )
+
+    claim = calculate_total_benefit(sites, scopes, scenario, year, params="claim")
+    realistic = frames[year - 1]
+    comp = pd.merge(
+        claim[["benefit_line", "annual_value"]].rename(
+            columns={"annual_value": "Genesys claim"}),
+        realistic[["benefit_line", "annual_value"]].rename(
+            columns={"annual_value": "Pressure-tested"}),
+        on="benefit_line", how="outer",
+    ).fillna(0)
+    fig = go.Figure([
+        go.Bar(name="Genesys claim", x=comp.benefit_line, y=comp["Genesys claim"]),
+        go.Bar(name="Pressure-tested realistic", x=comp.benefit_line,
+               y=comp["Pressure-tested"]),
+    ])
+    fig.update_layout(barmode="group", yaxis_tickformat="$,.0f",
+                      title=f"Genesys claim vs pressure-tested — Year {year}")
+    st.plotly_chart(fig, width="stretch", key="claim_vs_real")
+
+# ── Page 5: Business Case ────────────────────────────────────────────
+
+elif page == "5. Business Case":
+    st.header("Business Case")
+    st.session_state.implementation_cost = st.number_input(
+        "One-off implementation cost (amortized over 3 years)",
+        value=float(st.session_state.implementation_cost), min_value=0.0,
+        step=50_000.0,
+    )
+    case = _case(scenario_name)
+
+    pb = case["payback_period_years"]
+    c1, c2, c3 = st.columns(3)
+    c1.metric("NPV @ 8%", f"${case['npv']:,.0f}")
+    c2.metric("Payback", f"{pb:.2f} yrs" if pb is not None else "never")
+    c3.metric("3-Year ROI", f"{case['roi_3yr']:.0%}" if case["roi_3yr"] else "n/a")
+
+    pnl = pd.concat(
+        [
+            case["cost_by_year"].drop(columns="confidence"),
+            case["takeouts_by_year"].drop(columns="confidence"),
+            case["benefit_by_year"].drop(columns="confidence"),
+            case["net_by_year"],
+        ],
+        ignore_index=True,
+    )
+    pnl["3-yr Total"] = pnl[["Y1", "Y2", "Y3"]].sum(axis=1)
+    st.dataframe(
+        pnl, width="stretch", hide_index=True,
+        column_config={
+            c: st.column_config.NumberColumn(c, format="$%,.0f")
+            for c in ("Y1", "Y2", "Y3", "3-yr Total")
+        },
+    )
+
+    fig = go.Figure()
+    for name in ("floor", "realistic", "stretch"):
+        c = _case(name)
+        fig.add_scatter(
+            x=c["cumulative_net"].year, y=c["cumulative_net"].cumulative_net,
+            mode="lines+markers", name=name.capitalize(),
+        )
+    fig.update_layout(title="Cumulative net cash flow by scenario",
+                      xaxis_title="Year", yaxis_tickformat="$,.0f")
+    st.plotly_chart(fig, width="stretch", key="cum_net")
+
+# ── Page 6: Sensitivity ──────────────────────────────────────────────
+
+elif page == "6. Sensitivity Analysis":
+    st.header("Sensitivity Analysis")
+    base_npv = _case(scenario_name)["npv"]
+    st.caption(f"Base 3-yr NPV ({scenario_name}): ${base_npv:,.0f}")
+
+    def _npv_with(**overrides) -> float:
+        sc = dataclasses.replace(scenario, **overrides)
+        return build_business_case(
+            sites, scopes, meters, pricing, st.session_state.takeouts, sc,
+            implementation_cost=st.session_state.implementation_cost,
+            use_contracted=st.session_state.use_contracted,
+        )["npv"]
+
+    drivers = [
+        "voice_bot_deflection", "voice_bot_avg_minutes", "agentic_va_deflection",
+        "voice_summarization_eligibility", "voice_knowledge_eligibility",
+        "email_auto_respond_rate", "email_auto_suggest_acceptance",
+    ]
+    rows = []
+    for d in drivers:
+        base_v = getattr(scenario, d)
+        lo = base_v * 0.75 if d == "voice_bot_avg_minutes" else min(base_v * 0.75, 1.0)
+        hi = base_v * 1.25 if d == "voice_bot_avg_minutes" else min(base_v * 1.25, 1.0)
+        rows.append({"driver": d,
+                     "low": _npv_with(**{d: lo}) - base_npv,
+                     "high": _npv_with(**{d: hi}) - base_npv})
+    torn = pd.DataFrame(rows)
+    torn["swing"] = (torn.high - torn.low).abs()
+    torn = torn.sort_values("swing")
+    fig = go.Figure([
+        go.Bar(y=torn.driver, x=torn.low, orientation="h", name="-25%"),
+        go.Bar(y=torn.driver, x=torn.high, orientation="h", name="+25%"),
+    ])
+    fig.update_layout(barmode="overlay", title="Tornado — NPV impact of ±25%",
+                      xaxis_tickformat="$,.0f")
+    st.plotly_chart(fig, width="stretch", key="tornado")
+
+    st.subheader("Two-variable heatmap")
+    xs = np.linspace(0.0, 0.50, 6)   # Email Auto-Respond rate
+    ys = np.linspace(0.0, 0.25, 6)   # Agentic VA deflection
+    z = [[_npv_with(email_auto_respond_rate=float(x),
+                    agentic_va_deflection=float(yv)) for x in xs] for yv in ys]
+    fig = go.Figure(go.Heatmap(
+        x=[f"{x:.0%}" for x in xs], y=[f"{yv:.0%}" for yv in ys], z=z,
+        colorbar={"title": "3-yr NPV"},
+    ))
+    fig.update_layout(title="NPV: Email Auto-Respond rate × Agentic VA deflection",
+                      xaxis_title="Email Auto-Respond rate",
+                      yaxis_title="Agentic VA deflection")
+    st.plotly_chart(fig, width="stretch", key="heatmap")
+
+    st.subheader("Break-even finder")
+    rates = np.linspace(0.0, 0.50, 26)
+    npvs = [_npv_with(email_auto_respond_rate=float(r)) for r in rates]
+    breakeven = next((r for r, v in zip(rates, npvs) if v >= 0), None)
+    if npvs[0] >= 0:
+        st.success(f"Case is NPV-positive even at 0% Auto-Respond "
+                   f"(${npvs[0]:,.0f}).")
+    elif breakeven is not None:
+        st.info(f"Break-even at ~{breakeven:.0%} email Auto-Respond rate.")
+    else:
+        st.error("No break-even within 0–50% Auto-Respond.")
+    st.plotly_chart(
+        px.line(x=rates, y=npvs,
+                labels={"x": "Email Auto-Respond rate", "y": "3-yr NPV ($)"}),
+        width="stretch", key="breakeven",
+    )
+
+# ── Page 7: Export ───────────────────────────────────────────────────
+
+elif page == "7. Export":
+    st.header("Export")
+    case = _case(scenario_name)
+    cost_frames, ben_frames = [], []
+    for y in YEARS:
+        d = calculate_total_cost(sites, scopes, meters, pricing, scenario, y,
+                                 use_contracted=st.session_state.use_contracted)
+        d["year"] = f"Y{y}"
+        cost_frames.append(d)
+        b = calculate_total_benefit(sites, scopes, scenario, y)
+        b["year"] = f"Y{y}"
+        ben_frames.append(b)
+
+    comparison = pd.DataFrame([
+        {"scenario": n, "NPV": _case(n)["npv"],
+         "payback_years": _case(n)["payback_period_years"],
+         "roi_3yr": _case(n)["roi_3yr"]}
+        for n in ("floor", "realistic", "stretch")
+    ])
+
+    pnl = pd.concat(
+        [case["cost_by_year"].drop(columns="confidence"),
+         case["takeouts_by_year"].drop(columns="confidence"),
+         case["benefit_by_year"].drop(columns="confidence"),
+         case["net_by_year"]],
+        ignore_index=True,
+    )
+
+    buf = io.BytesIO()
+    with pd.ExcelWriter(buf, engine="openpyxl") as writer:
+        sites_dataframe(sites).to_excel(writer, sheet_name="Inputs", index=False)
+        meters_dataframe(meters).to_excel(writer, sheet_name="Meters", index=False)
+        pd.concat(cost_frames).to_excel(writer, sheet_name="Cost detail", index=False)
+        pd.concat(ben_frames).to_excel(writer, sheet_name="Benefit detail", index=False)
+        pnl.to_excel(writer, sheet_name="Business case", index=False)
+        comparison.to_excel(writer, sheet_name="Scenario comparison", index=False)
+    st.download_button(
+        "⬇️ Download Excel workbook",
+        buf.getvalue(),
+        file_name=f"ctm_token_calculator_{scenario_name}.xlsx",
+        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    )
+    st.download_button(
+        "⬇️ Download scenario JSON",
+        scenario_state_to_json(sites, st.session_state.takeouts, scopes),
+        file_name="ctm_scenario.json", mime="application/json",
+    )
+    st.dataframe(comparison, width="stretch", hide_index=True)