refactor: restructure repo into core/app modules with per-study folders

Reorganize Palladium codebase into a modular architecture with `core/`
shared logic and `app/` Streamlit UI, separating per-study assets into
`studies/YYYYMM_<Vendor>/` folders containing notebooks, seed data, and
configuration. Update README to reflect new structure, add `.gitignore`
entries for `.env` and study exports, and refresh component documentation.
This commit is contained in:
2026-05-20 22:28:12 -04:00
parent a6f3ee3676
commit a2420ed692
52 changed files with 35300 additions and 105 deletions

5
core/export/__init__.py Normal file
View File

@@ -0,0 +1,5 @@
"""Export utilities — build the LLM-ready JSON envelope."""
from core.export.report_data import build_report_data, write_report_data
__all__ = ["build_report_data", "write_report_data"]

224
core/export/report_data.py Normal file
View File

@@ -0,0 +1,224 @@
"""
Build the structured JSON consumed by the report pipeline.
The Athena ``GET /tools/{public_id}/export/`` endpoint already returns most
of what we need; this module:
1. Calls the export endpoint.
2. Optionally augments it with locally computed scenario analysis
(conservative / moderate / aggressive).
3. Stamps Palladium metadata (export timestamp, study slug, generator).
4. Serializes to a stable JSON file that html2docx / Peitho can consume.
"""
from __future__ import annotations
import json
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from core import __version__
from core.calculations import (
apply_scenario,
npv,
payback_months,
risk_adjust_benefit,
risk_adjust_cost,
roi_percentage,
)
def _split_by_table(values: list[dict]) -> tuple[list[dict], list[dict]]:
benefits = [v for v in values if v.get("table") == "benefits"]
costs = [v for v in values if v.get("table") == "costs"]
return benefits, costs
def _yearly_totals(items: list[dict], analysis_years: int) -> list[float]:
totals = [0.0] * analysis_years
for it in items:
yv = it.get("year_values") or {}
for k, v in yv.items():
try:
year = int(k)
except (TypeError, ValueError):
continue
if 1 <= year <= analysis_years:
totals[year - 1] += float(v or 0)
return totals
def _initial_total(items: list[dict]) -> float:
return sum(float(it.get("initial") or 0) for it in items)
def _risk_adjusted(items: list[dict], for_table: str) -> list[dict]:
out: list[dict] = []
for it in items:
rf = float(it.get("risk_adjustment") or 0.0)
adj_year_values: dict[str, float] = {}
for k, v in (it.get("year_values") or {}).items():
v = float(v or 0)
adj_year_values[str(k)] = (
risk_adjust_benefit(v, rf)
if for_table == "benefits"
else risk_adjust_cost(v, rf)
)
adj = dict(it)
adj["year_values"] = adj_year_values
if "initial" in adj and adj["initial"] is not None:
adj["initial"] = (
risk_adjust_cost(float(adj["initial"]), rf)
if for_table == "costs"
else float(adj["initial"])
)
out.append(adj)
return out
def _compute_summary(
benefits: list[dict],
costs: list[dict],
discount_rate: float,
analysis_years: int,
) -> dict[str, Any]:
benefits_ra = _risk_adjusted(benefits, "benefits")
costs_ra = _risk_adjusted(costs, "costs")
benefits_yr = _yearly_totals(benefits_ra, analysis_years)
costs_yr = _yearly_totals(costs_ra, analysis_years)
initial_costs = _initial_total(costs_ra)
benefits_pv = npv(benefits_yr, discount_rate)
costs_pv = npv(costs_yr, discount_rate, initial=initial_costs)
nominal_benefits = sum(benefits_yr)
nominal_costs = sum(costs_yr) + initial_costs
net_yearly = [b - c for b, c in zip(benefits_yr, costs_yr, strict=False)]
pb = payback_months(initial_costs, net_yearly)
return {
"discount_rate": discount_rate,
"analysis_years": analysis_years,
"total_benefits_nominal": nominal_benefits,
"total_benefits_pv": benefits_pv,
"total_costs_nominal": nominal_costs,
"total_costs_pv": costs_pv,
"npv": benefits_pv - costs_pv,
"roi_pct": roi_percentage(benefits_pv, costs_pv),
"payback_months": pb,
"yearly_breakdown": [
{
"year": idx + 1,
"benefits": benefits_yr[idx],
"costs": costs_yr[idx],
"net": net_yearly[idx],
"cumulative_net": sum(net_yearly[: idx + 1]) - initial_costs,
}
for idx in range(analysis_years)
],
"initial_costs": initial_costs,
}
def build_report_data(
client,
public_id: str,
*,
include_scenarios: bool = True,
study_slug: str | None = None,
) -> dict[str, Any]:
"""
Build the full export envelope for a TEI tool.
Args:
client: a :class:`core.tei_client.TEIClient` instance.
public_id: the TEI tool's public_id.
include_scenarios: if True, locally compute conservative / moderate /
aggressive summaries and attach them under ``scenarios``.
study_slug: optional human-friendly study identifier (e.g.
``"202602_AmazonConnect"``) — written into ``metadata``.
Returns:
A dict with keys::
{
"metadata": {...}, # client / opportunity / study / generator
"report": {...}, # report template echo
"values": {benefits, costs},
"summary": {...}, # locally recomputed (mirrors Athena)
"athena_export": {...}, # raw payload from Athena (if available)
"scenarios": {...} # optional
}
"""
# Pull everything we need
bundle = client.get_tool_with_data(public_id)
tool = bundle["tool"]
fields = bundle["fields"]
values = bundle["values"]
report_obj = tool.get("report")
if isinstance(report_obj, str):
report = client.get_report(report_obj)
elif isinstance(report_obj, dict):
report = report_obj
else:
report = {}
discount_rate = float(report.get("discount_rate") or 0.10)
analysis_years = int(report.get("analysis_period_years") or 3)
benefits, costs = _split_by_table(values)
summary = _compute_summary(benefits, costs, discount_rate, analysis_years)
try:
athena_export = client.export(public_id)
except Exception as e: # pragma: no cover best effort
athena_export = {"error": str(e)}
envelope: dict[str, Any] = {
"metadata": {
"study_slug": study_slug or "",
"tool_public_id": public_id,
"tool_name": tool.get("name", ""),
"report_name": report.get("name", ""),
"report_vendor": report.get("vendor", ""),
"report_version": report.get("version", ""),
"report_public_id": report.get("id", ""),
"proposal": tool.get("proposal") or tool.get("opportunity"),
"engagement": tool.get("engagement"),
"generated_at": datetime.now(UTC).isoformat(),
"generator": f"palladium core {__version__}",
},
"report": report,
"fields": fields,
"values": {"benefits": benefits, "costs": costs},
"summary": summary,
"athena_export": athena_export,
}
if include_scenarios:
scenario_results: dict[str, Any] = {}
for scenario_name in ("conservative", "moderate", "aggressive"):
sb = apply_scenario(benefits, scenario_name, table="benefits")
sc = apply_scenario(costs, scenario_name, table="costs")
scenario_results[scenario_name] = _compute_summary(
sb, sc, discount_rate, analysis_years
)
envelope["scenarios"] = scenario_results
return envelope
def write_report_data(
envelope: dict[str, Any],
output_path: str | Path,
*,
indent: int = 2,
) -> Path:
"""Serialize ``envelope`` to ``output_path`` and return the Path."""
path = Path(output_path)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(envelope, indent=indent, default=str))
return path