Files
palladium/core/bootstrap.py

185 lines
5.8 KiB
Python

"""
Palladium notebook bootstrap — one import to set everything up.
From *any* notebook in the repo (root, ``studies/<slug>/notebooks/``, …)::
from core.bootstrap import init
pal = init() # loads .env, builds client, tests it
pal.client.list_reports()
or, for a study notebook::
pal = init(study="202602_AmazonConnect")
pal.config.STUDY_SLUG, pal.seed_data.BENEFITS
If ``core`` itself can't be imported (fresh kernel, notebook cwd deep in the
tree), put this two-liner first — it is the only path juggling left anywhere::
import sys, pathlib
sys.path.insert(0, str(next(p for p in pathlib.Path.cwd().parents
if (p / "pyproject.toml").exists())))
Better: ``pip install -e .`` once (``make setup`` does this) and even that
two-liner is unnecessary.
Credentials live in ``<repo root>/.env`` (gitignored)::
ATHENA_BASE_URL=https://athena.ouranos.helu.ca
ATHENA_API_KEY=...
``save_credentials()`` writes that file for you — 00_setup.ipynb uses it with
``getpass`` so the key never lands in notebook output.
"""
from __future__ import annotations
import importlib
import os
import sys
from dataclasses import dataclass, field
from pathlib import Path
from types import ModuleType
from typing import Any
from dotenv import load_dotenv
__all__ = ["init", "find_root", "save_credentials", "update_env", "Palladium"]
_ROOT_MARKERS = ("pyproject.toml", ".git")
def find_root(start: Path | None = None) -> Path:
"""Locate the repo root by walking up until pyproject.toml/.git is found."""
candidates = [start] if start else [Path.cwd(), Path(__file__).resolve().parent]
for cand in candidates:
for p in [cand, *cand.parents]:
if any((p / m).exists() for m in _ROOT_MARKERS):
return p
return Path.cwd() # pragma: no cover — degenerate fallback
def _ensure_importable(root: Path) -> None:
if str(root) not in sys.path:
sys.path.insert(0, str(root))
def update_env(root: Path | None = None, **pairs: str) -> Path:
"""
Write (or update) keys in ``<root>/.env``, preserving all other lines.
Also updates ``os.environ`` so the values take effect in the running
kernel immediately. Returns the path to the .env file.
"""
root = root or find_root()
env_path = root / ".env"
updates = {k: str(v) for k, v in pairs.items()}
lines: list[str] = []
if env_path.exists():
lines = env_path.read_text().splitlines()
seen: set[str] = set()
for i, line in enumerate(lines):
key = line.split("=", 1)[0].strip().lstrip("# ").strip()
if key in updates:
lines[i] = f"{key}={updates[key]}"
seen.add(key)
for key, val in updates.items():
if key not in seen:
lines.append(f"{key}={val}")
env_path.write_text("\n".join(lines) + "\n")
os.environ.update(updates)
return env_path
def save_credentials(
api_key: str,
base_url: str = "https://athena.ouranos.helu.ca",
root: Path | None = None,
) -> Path:
"""Write (or update) ``<root>/.env`` with Athena credentials."""
return update_env(
root, ATHENA_BASE_URL=base_url.rstrip("/"), ATHENA_API_KEY=api_key
)
@dataclass
class Palladium:
"""Everything a notebook session needs, in one object."""
root: Path
client: Any = None
config: ModuleType | None = None
seed_data: ModuleType | None = None
connection: dict = field(default_factory=dict)
def __repr__(self) -> str:
status = self.connection.get("status", "not tested")
study = getattr(self.config, "STUDY_SLUG", None)
return (
f"Palladium(root={self.root.name!r}, athena={status!r}"
+ (f", study={study!r}" if study else "")
+ ")"
)
def init(
study: str | None = None,
connect: bool = True,
quiet: bool = False,
) -> Palladium:
"""
One-call notebook setup.
1. Finds the repo root and makes ``core``/``studies`` importable.
2. Loads ``<root>/.env``.
3. Builds a :class:`TEIClient` and tests the connection (``connect=True``).
4. Optionally imports a study's ``config`` and ``seed_data`` modules.
Returns a :class:`Palladium` namespace: ``pal.client``, ``pal.config``,
``pal.seed_data``, ``pal.root``, ``pal.connection``.
"""
root = find_root()
_ensure_importable(root)
load_dotenv(root / ".env")
pal = Palladium(root=root)
if study:
pal.config = importlib.import_module(f"studies.{study}.config")
try:
pal.seed_data = importlib.import_module(f"studies.{study}.seed_data")
except ModuleNotFoundError:
pal.seed_data = None
if connect:
from core.tei_client import TEIClient
try:
pal.client = TEIClient()
pal.connection = pal.client.test_connection()
except ValueError as e: # missing credentials
pal.connection = {"status": "unconfigured", "detail": str(e)}
if not quiet:
c = pal.connection
if c.get("status") == "ok":
print(
f"✅ Athena connected — {c['base_url']} "
f"({c.get('reports_found', '?')} report templates visible)"
)
elif c.get("status") == "unconfigured":
print(
"⚠️ No credentials. Run 00_setup.ipynb, or:\n"
" from core.bootstrap import save_credentials\n"
" save_credentials(api_key='')"
)
else:
print(
f"❌ Athena connection failed "
f"({c.get('error_code')}): {c.get('detail')}"
)
if not quiet and study and pal.config is not None:
print(f"📁 Study: {study}")
return pal