palladium/core/export/report_data.py

"""
Build the structured JSON consumed by the report pipeline.

The Athena ``GET /tools/{public_id}/export/`` endpoint already returns most
of what we need; this module:

1. Calls the export endpoint.
2. Optionally augments it with locally computed scenario analysis
   (conservative / moderate / aggressive).
3. Stamps Palladium metadata (export timestamp, study slug, generator).
4. Serializes to a stable JSON file that html2docx / Peitho can consume.
"""

from __future__ import annotations

import json
from datetime import UTC, datetime
from pathlib import Path
from typing import Any

from core import __version__
from core.calculations import (
    apply_scenario,
    npv,
    payback_months,
    risk_adjust_benefit,
    risk_adjust_cost,
    roi_percentage,
)


def _split_by_table(values: list[dict]) -> tuple[list[dict], list[dict]]:
    benefits = [v for v in values if v.get("table") == "benefits"]
    costs = [v for v in values if v.get("table") == "costs"]
    return benefits, costs


def _yearly_totals(items: list[dict], analysis_years: int) -> list[float]:
    totals = [0.0] * analysis_years
    for it in items:
        yv = it.get("year_values") or {}
        for k, v in yv.items():
            try:
                year = int(k)
            except (TypeError, ValueError):
                continue
            if 1 <= year <= analysis_years:
                totals[year - 1] += float(v or 0)
    return totals


def _initial_total(items: list[dict]) -> float:
    return sum(float(it.get("initial") or 0) for it in items)


def _risk_adjusted(items: list[dict], for_table: str) -> list[dict]:
    out: list[dict] = []
    for it in items:
        rf = float(it.get("risk_adjustment") or 0.0)
        adj_year_values: dict[str, float] = {}
        for k, v in (it.get("year_values") or {}).items():
            v = float(v or 0)
            adj_year_values[str(k)] = (
                risk_adjust_benefit(v, rf)
                if for_table == "benefits"
                else risk_adjust_cost(v, rf)
            )
        adj = dict(it)
        adj["year_values"] = adj_year_values
        if "initial" in adj and adj["initial"] is not None:
            adj["initial"] = (
                risk_adjust_cost(float(adj["initial"]), rf)
                if for_table == "costs"
                else float(adj["initial"])
            )
        out.append(adj)
    return out


def _compute_summary(
    benefits: list[dict],
    costs: list[dict],
    discount_rate: float,
    analysis_years: int,
) -> dict[str, Any]:
    benefits_ra = _risk_adjusted(benefits, "benefits")
    costs_ra = _risk_adjusted(costs, "costs")

    benefits_yr = _yearly_totals(benefits_ra, analysis_years)
    costs_yr = _yearly_totals(costs_ra, analysis_years)
    initial_costs = _initial_total(costs_ra)

    benefits_pv = npv(benefits_yr, discount_rate)
    costs_pv = npv(costs_yr, discount_rate, initial=initial_costs)
    nominal_benefits = sum(benefits_yr)
    nominal_costs = sum(costs_yr) + initial_costs

    net_yearly = [b - c for b, c in zip(benefits_yr, costs_yr, strict=False)]
    pb = payback_months(initial_costs, net_yearly)

    return {
        "discount_rate": discount_rate,
        "analysis_years": analysis_years,
        "total_benefits_nominal": nominal_benefits,
        "total_benefits_pv": benefits_pv,
        "total_costs_nominal": nominal_costs,
        "total_costs_pv": costs_pv,
        "npv": benefits_pv - costs_pv,
        "roi_pct": roi_percentage(benefits_pv, costs_pv),
        "payback_months": pb,
        "yearly_breakdown": [
            {
                "year": idx + 1,
                "benefits": benefits_yr[idx],
                "costs": costs_yr[idx],
                "net": net_yearly[idx],
                "cumulative_net": sum(net_yearly[: idx + 1]) - initial_costs,
            }
            for idx in range(analysis_years)
        ],
        "initial_costs": initial_costs,
    }


def build_report_data(
    client,
    public_id: str,
    *,
    include_scenarios: bool = True,
    study_slug: str | None = None,
) -> dict[str, Any]:
    """
    Build the full export envelope for a TEI tool.

    Args:
        client: a :class:`core.tei_client.TEIClient` instance.
        public_id: the TEI tool's public_id.
        include_scenarios: if True, locally compute conservative / moderate /
            aggressive summaries and attach them under ``scenarios``.
        study_slug: optional human-friendly study identifier (e.g.
            ``"202602_AmazonConnect"``) — written into ``metadata``.

    Returns:
        A dict with keys::

            {
                "metadata": {...},      # client / opportunity / study / generator
                "report": {...},        # report template echo
                "values": {benefits, costs},
                "summary": {...},       # locally recomputed (mirrors Athena)
                "athena_export": {...}, # raw payload from Athena (if available)
                "scenarios": {...}      # optional
            }
    """
    # Pull everything we need
    bundle = client.get_tool_with_data(public_id)
    tool = bundle["tool"]
    fields = bundle["fields"]
    values = bundle["values"]

    report_obj = tool.get("report")
    if isinstance(report_obj, str):
        report = client.get_report(report_obj)
    elif isinstance(report_obj, dict):
        report = report_obj
    else:
        report = {}

    discount_rate = float(report.get("discount_rate") or 0.10)
    analysis_years = int(report.get("analysis_period_years") or 3)

    benefits, costs = _split_by_table(values)
    summary = _compute_summary(benefits, costs, discount_rate, analysis_years)

    try:
        athena_export = client.export(public_id)
    except Exception as e:  # pragma: no cover  – best effort
        athena_export = {"error": str(e)}

    envelope: dict[str, Any] = {
        "metadata": {
            "study_slug": study_slug or "",
            "tool_public_id": public_id,
            "tool_name": tool.get("name", ""),
            "report_name": report.get("name", ""),
            "report_vendor": report.get("vendor", ""),
            "report_version": report.get("version", ""),
            "report_public_id": report.get("id", ""),
            "proposal": tool.get("proposal") or tool.get("opportunity"),
            "engagement": tool.get("engagement"),
            "generated_at": datetime.now(UTC).isoformat(),
            "generator": f"palladium core {__version__}",
        },
        "report": report,
        "fields": fields,
        "values": {"benefits": benefits, "costs": costs},
        "summary": summary,
        "athena_export": athena_export,
    }

    if include_scenarios:
        scenario_results: dict[str, Any] = {}
        for scenario_name in ("conservative", "moderate", "aggressive"):
            sb = apply_scenario(benefits, scenario_name, table="benefits")
            sc = apply_scenario(costs, scenario_name, table="costs")
            scenario_results[scenario_name] = _compute_summary(
                sb, sc, discount_rate, analysis_years
            )
        envelope["scenarios"] = scenario_results

    return envelope


def write_report_data(
    envelope: dict[str, Any],
    output_path: str | Path,
    *,
    indent: int = 2,
) -> Path:
    """Serialize ``envelope`` to ``output_path`` and return the Path."""
    path = Path(output_path)
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(envelope, indent=indent, default=str))
    return path