From ac4af942ab15eba20fb8404391642c64ef828d0e Mon Sep 17 00:00:00 2001 From: Robert Helewka Date: Tue, 5 May 2026 22:46:14 -0400 Subject: [PATCH] log: route third-party + traceback records through the pallas log The existing setup only attached the file/stderr handlers to the 'pallas' namespace, so every record emitted by fast-agent, fastmcp, the MCP SDK, Anthropic, uvicorn etc. disappeared into Rich's progress display and never hit pallas.log. When one of those libraries raised and logged 'something failed' via logger.error(..., exc_info=True), we ended up grepping a Rich-overwritten TTY for a traceback that was already long gone -- exactly the situation blocking the current Mnemosyne debug. This patch: * Extends _JSONFormatter to serialise exc_info/stack_info as a 'traceback' field when present, so Loki/grep sees the full stack. * Attaches the same file+stderr handlers to the *root* logger so every library's records (and any uncaught logger.error tracebacks) land in pallas.log with the stack attached. * Keeps the 'pallas' logger's own handlers (propagate=False) so our records are unaffected by any later root-handler manipulation. * Tags our handlers with _pallas_attached so repeated setup_logging() calls are idempotent -- important because uvicorn workers and fast-agent subagent subprocesses each reinitialise logging. httpx/httpcore stay at WARNING so we don't flood the log with per- request body traces on a DEBUG deployment. Demote third-party namespaces further in a follow-up if needed. --- pallas/log.py | 148 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 105 insertions(+), 43 deletions(-) diff --git a/pallas/log.py b/pallas/log.py index a52151d..6e74854 100644 --- a/pallas/log.py +++ b/pallas/log.py @@ -26,19 +26,39 @@ from pathlib import Path class _JSONFormatter(logging.Formatter): - """Single-line JSON formatter compatible with Alloy's ``| json`` pipeline.""" + """Single-line JSON formatter compatible with Alloy's ``| json`` pipeline. + + Captures ``exc_info`` when present so callers that do + ``logger.error("...", exc_info=True)`` (or let an unhandled exception + bubble into a log handler) get the full traceback preserved as a + ``traceback`` field. Without this, every ``logger.error`` in + fast-agent / fastmcp / the MCP SDK loses its stack trace and we end + up guessing from the single-line message — which is exactly the + rabbit hole we spent hours in during the bearer-forwarding debug. + + Also pulls in ``record.exc_text`` if the formatter was already + populated upstream (e.g. by another handler), avoiding duplicate + ``traceback.format_exception`` work. + """ def format(self, record: logging.LogRecord) -> str: - return json.dumps( - { - "time": datetime.fromtimestamp(record.created, tz=timezone.utc).strftime( - "%Y-%m-%dT%H:%M:%SZ" - ), - "level": record.levelname, - "logger": record.name, - "message": record.getMessage(), - } - ) + payload: dict[str, object] = { + "time": datetime.fromtimestamp(record.created, tz=timezone.utc).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ), + "level": record.levelname, + "logger": record.name, + "message": record.getMessage(), + } + if record.exc_info: + if not record.exc_text: + record.exc_text = self.formatException(record.exc_info) + payload["traceback"] = record.exc_text + elif record.exc_text: + payload["traceback"] = record.exc_text + if record.stack_info: + payload["stack"] = self.formatStack(record.stack_info) + return json.dumps(payload) class _HealthAccessFilter(logging.Filter): @@ -131,13 +151,26 @@ def setup_logging() -> None: ``pallas.auth`` loggers — essential when troubleshooting Mnemosyne / workspace-scoped agent calls. + Scope of handlers: + + * The ``pallas`` logger gets its own copies (``propagate=False``) so our + own records never double-log if the root handlers are ever changed. + * The **root** logger also gets the same file+stderr handlers. This is + what captures everything emitted by fast-agent / fastmcp / the MCP SDK + / Anthropic / uvicorn — critically including their tracebacks via + ``_JSONFormatter``'s ``exc_info`` support. Without this, a + ``logger.error("send_request failed", exc_info=True)`` inside + fast-agent disappears into Rich's progress display and we're left + guessing from a single-line message. + Handlers attached: * ``RotatingFileHandler`` → ``PALLAS_LOG_FILE`` (or ``~/.local/state/pallas/pallas.log``). Durable capture that survives Rich's console takeover — fast-agent's ``progress_display`` hijacks stdout and swallows plain ``print`` / stderr writes mid-render, so a - file sink is the only guaranteed way to see DEBUG diagnostics. + file sink is the only guaranteed way to see DEBUG diagnostics and + tracebacks. * ``StreamHandler`` → ``sys.__stderr__``. Uses the *original* stderr file descriptor captured before Rich installed its Live display, so records bypass the progress renderer and land in the terminal or @@ -155,43 +188,72 @@ def setup_logging() -> None: ).upper() level = getattr(logging, level_name, logging.INFO) + # Build the shared handlers once — they get attached to both the + # ``pallas`` logger (with ``propagate=False``) *and* the root logger, + # so third-party libraries' records land in the same file sink. + handlers: list[logging.Handler] = [] + log_file: Path | None = None + try: + log_file = _resolve_log_file() + log_file.parent.mkdir(parents=True, exist_ok=True) + file_handler = logging.handlers.RotatingFileHandler( + log_file, + maxBytes=10 * 1024 * 1024, + backupCount=5, + encoding="utf-8", + ) + file_handler.setFormatter(formatter) + handlers.append(file_handler) + except Exception as exc: # pragma: no cover — never fatal + logging.getLogger(__name__).warning( + "could not attach file log handler: %s", exc + ) + + # Stderr sink — writes to the *original* FD Rich captured at import + # time, so our records skip the progress display and hit the TTY + # (or systemd journal) directly. + stream_handler = logging.StreamHandler(stream=sys.__stderr__) + stream_handler.setFormatter(formatter) + handlers.append(stream_handler) + + # Root logger carries everything from libraries we do NOT own. + # We set the level low enough to accept our configured level; each + # noisy namespace is dialled back below. Handlers are attached here, + # not on child loggers, so records propagate to a single sink. + root = logging.getLogger() + root.setLevel(level) + # Avoid duplicate handlers across re-entrant setup_logging() calls + # (e.g. if the agent subprocess re-imports pallas with an altered + # config). We tag ours so a safe idempotent re-attach is possible. + for h in list(root.handlers): + if getattr(h, "_pallas_attached", False): + root.removeHandler(h) + for h in handlers: + setattr(h, "_pallas_attached", True) + root.addHandler(h) + pallas_logger = logging.getLogger("pallas") pallas_logger.setLevel(level) + # Keep propagate=False so our own records don't appear twice (once via + # the dedicated handlers below, once via the root logger). We give + # ``pallas`` its own copies of the handlers instead of relying on + # propagation — this keeps its behaviour stable even if some other + # bootstrap code clears the root handlers later. pallas_logger.propagate = False + for h in list(pallas_logger.handlers): + if getattr(h, "_pallas_attached", False): + pallas_logger.removeHandler(h) + for h in handlers: + pallas_logger.addHandler(h) - if not pallas_logger.handlers: - # File sink — always present so DEBUG output is never lost to a - # Rich-owned terminal. Rotate at 10 MiB, keep 5 archives (≈50 MiB - # max per agent) which easily spans a multi-day debugging session. - try: - log_file = _resolve_log_file() - log_file.parent.mkdir(parents=True, exist_ok=True) - file_handler = logging.handlers.RotatingFileHandler( - log_file, - maxBytes=10 * 1024 * 1024, - backupCount=5, - encoding="utf-8", - ) - file_handler.setFormatter(formatter) - pallas_logger.addHandler(file_handler) - # Announce the sink so operators can tail the right path without - # guessing — emitted at INFO so even a WARNING-only deployment - # won't see it; at DEBUG it's easy to grep for. - pallas_logger.info("log file: %s", log_file) - except Exception as exc: # pragma: no cover — never fatal - # Fall back silently; the stderr handler below still works. - logging.getLogger(__name__).warning( - "could not attach file log handler: %s", exc - ) - - # Stderr sink — writes to the *original* FD Rich captured at import - # time, so our records skip the progress display and hit the TTY - # (or systemd journal) directly. - stream_handler = logging.StreamHandler(stream=sys.__stderr__) - stream_handler.setFormatter(formatter) - pallas_logger.addHandler(stream_handler) + if log_file is not None: + # Announce the sink so operators can tail the right path without + # guessing — emitted at INFO so even a WARNING-only deployment + # won't see it; at DEBUG it's easy to grep for. + pallas_logger.info("log file: %s", log_file) # Silence noisy HTTP client internals — only surface warnings and above. + # Applied *after* root-level configuration so these win. for noisy in ("httpx", "httpcore"): logging.getLogger(noisy).setLevel(logging.WARNING)