feat(log): add service/project/component labels for Loki

Inject stable JSON fields (`service`, `project`, `component`) on every
log record via a `logging.Filter` so Alloy-forwarded logs share the
same label shape as Mnemosyne/Daedalus. The `project` label is set
once from `agents.yaml`, and `component` is stored in a ContextVar
so each agent's asyncio task carries its own value without leaking
across sibling agents.

Also switch the opt-in console sink from `sys.__stderr__` to
`sys.__stdout__` (`PALLAS_LOG_STDOUT=1`) for cleaner journald capture
in systemd deployments.
This commit is contained in:
2026-05-11 13:52:25 -04:00
parent 2759c8428e
commit f92f3fc710
2 changed files with 143 additions and 32 deletions

View File

@@ -15,6 +15,7 @@ Level conventions (Ouranos Lab — Python services use UPPERCASE):
DEBUG — per-request detail; never enabled in production by default
"""
import contextvars
import json
import logging
import logging.handlers
@@ -25,6 +26,70 @@ from datetime import datetime, timezone
from pathlib import Path
# ── Static / contextual fields ────────────────────────────────────────────────
#
# Every log line Alloy forwards to Loki gets a small, stable set of JSON
# fields so operators can pivot between services with the same LogQL label
# shape used by Mnemosyne and Daedalus:
#
# service — always "pallas"
# project — the calling project's ``agents.yaml`` ``name:`` (e.g. "kottos",
# "mentor", "iolaus"), set once from ``server.main()``
# component — the agent currently executing (e.g. "harper", "scotty",
# "research"). Stored in a ContextVar so each asyncio task
# started by ``_start_agent`` carries its own value without
# leaking across agents.
#
# These are added by a ``logging.Filter`` rather than being baked into the
# formatter so DEBUG messages from fast-agent, fastmcp, the MCP SDK and
# anthropic/openai clients all get the same attribution — exactly what Loki
# query filters depend on.
_PROJECT: str = "pallas" # overridden once by set_project()
_COMPONENT_CTX: contextvars.ContextVar[str] = contextvars.ContextVar(
"pallas_component", default="runtime"
)
def set_project(name: str) -> None:
"""Set the project label on all subsequent log records.
Called once from ``pallas.server.main()`` after reading ``agents.yaml``.
"""
global _PROJECT
if name:
_PROJECT = name
def set_agent_component(name: str) -> None:
"""Bind the current asyncio task's component label to ``name``.
``pallas.server._start_agent`` calls this immediately before entering
``fast_instance.run()`` so every log line emitted by that agent's
task — including records that bubble up from fast-agent / fastmcp /
the MCP SDK — is tagged with the agent's name. ContextVar semantics
mean sibling agent tasks started from the same ``asyncio.gather`` call
each carry their own value without interference.
"""
_COMPONENT_CTX.set(name)
class _StaticFieldsFilter(logging.Filter):
"""Inject ``service`` / ``project`` / ``component`` onto every record.
Runs before the JSON formatter so the fields appear as top-level keys
(not nested under ``extra``). The filter is attached to every handler
— both on the root logger and on ``pallas.*`` — so third-party library
records pick up the same labels that Pallas's own records carry.
"""
def filter(self, record: logging.LogRecord) -> bool:
record.service = "pallas"
record.project = _PROJECT
record.component = _COMPONENT_CTX.get()
return True
class _JSONFormatter(logging.Formatter):
"""Single-line JSON formatter compatible with Alloy's ``| json`` pipeline.
@@ -51,6 +116,12 @@ class _JSONFormatter(logging.Formatter):
"level": record.levelname,
"logger": record.name,
"message": record.getMessage(),
# Set by _StaticFieldsFilter; fall back to module-level defaults
# in case the filter is somehow bypassed (e.g. third-party handler
# that doesn't inherit our filters).
"service": getattr(record, "service", "pallas"),
"project": getattr(record, "project", _PROJECT),
"component": getattr(record, "component", _COMPONENT_CTX.get()),
}
if record.exc_info:
if not record.exc_text:
@@ -173,20 +244,26 @@ def setup_logging() -> None:
stdout and swallows plain ``print`` / stderr writes mid-render, so a
file sink is the only guaranteed way to see DEBUG diagnostics and
tracebacks.
* ``StreamHandler`` → ``sys.__stderr__`` (opt-in). Uses the *original*
stderr FD captured before Rich installed its Live display, so records
would bypass the progress renderer and land in the terminal or journal
verbatim. In interactive ``fast-agent go`` sessions this still
smashes the Rich UI (Rich redraws on top but our JSON lines leak
through every repaint), so we only attach it when ``PALLAS_LOG_STDERR``
is set to a truthy value — production systemd deployments that want
journal capture should set ``PALLAS_LOG_STDERR=1``; interactive users
get a clean TUI and rely on the rotating file sink.
* ``StreamHandler`` → ``sys.__stdout__`` (opt-in via ``PALLAS_LOG_STDOUT=1``).
The preferred sink for a systemd-managed deployment: stdout becomes a
journald entry with ``PRIORITY=6`` (INFO) and SyslogIdentifier matching
the unit, which Alloy's ``loki.source.journal`` then relabels into
Loki. Stdout is also what Docker's ``json-file`` driver captures, so
this same flag works if Pallas is ever containerised.
* ``StreamHandler`` → ``sys.__stderr__`` (opt-in via ``PALLAS_LOG_STDERR=1``).
Uses the *original* stderr FD captured before Rich installed its Live
display, so records would bypass the progress renderer and land in the
terminal or journal verbatim. In interactive ``fast-agent go`` sessions
this still smashes the Rich UI (Rich redraws on top but our JSON lines
leak through every repaint), so we only attach it when the operator
explicitly opts in. Retained for backwards compatibility; new
deployments should prefer ``PALLAS_LOG_STDOUT=1``.
- ``httpx`` / ``httpcore``: WARNING (prevent request-level debug flooding)
- ``uvicorn.access``: health path filter applied
"""
formatter = _JSONFormatter()
static_fields = _StaticFieldsFilter()
# Pallas's own diagnostics level — drives the ``pallas.*`` loggers and
# the file sink verbosity. Unchanged behaviour for operators.
@@ -218,32 +295,51 @@ def setup_logging() -> None:
# so third-party libraries' records land in the same file sink.
handlers: list[logging.Handler] = []
log_file: Path | None = None
try:
log_file = _resolve_log_file()
log_file.parent.mkdir(parents=True, exist_ok=True)
file_handler = logging.handlers.RotatingFileHandler(
log_file,
maxBytes=10 * 1024 * 1024,
backupCount=5,
encoding="utf-8",
)
file_handler.setFormatter(formatter)
handlers.append(file_handler)
except Exception as exc: # pragma: no cover — never fatal
logging.getLogger(__name__).warning(
"could not attach file log handler: %s", exc
)
# Rotating file sink. Disable by pointing ``PALLAS_LOG_FILE`` at
# ``/dev/null`` — systemd deployments that rely on journal capture can do
# this to avoid writing a second copy of every record to disk.
log_path_override = os.environ.get("PALLAS_LOG_FILE", "")
if log_path_override == "/dev/null":
log_file = None
else:
try:
log_file = _resolve_log_file()
log_file.parent.mkdir(parents=True, exist_ok=True)
file_handler = logging.handlers.RotatingFileHandler(
log_file,
maxBytes=10 * 1024 * 1024,
backupCount=5,
encoding="utf-8",
)
file_handler.setFormatter(formatter)
file_handler.addFilter(static_fields)
handlers.append(file_handler)
except Exception as exc: # pragma: no cover — never fatal
logging.getLogger(__name__).warning(
"could not attach file log handler: %s", exc
)
# Stderr sink — *opt-in* only. Interactive ``fast-agent go`` sessions
# install Rich's Live display which owns the terminal; emitting JSON
# log records to stderr during that session corrupts the TUI on every
# redraw and made Pallas effectively unusable in dev. Operators who
# want systemd/journal capture can re-enable by exporting
# ``PALLAS_LOG_STDERR=1`` before launching. The rotating file sink
# above is the always-on durable capture.
# Stdout sink — preferred for systemd + journald deployments (PPLG path:
# journald → Alloy journal source → Loki). Emits the same JSON as the
# rotating file sink so nothing is lost when operators disable the file.
if os.environ.get("PALLAS_LOG_STDOUT", "").lower() in ("1", "true", "yes"):
stdout_handler = logging.StreamHandler(stream=sys.__stdout__)
stdout_handler.setFormatter(formatter)
stdout_handler.addFilter(static_fields)
handlers.append(stdout_handler)
# Stderr sink — *opt-in* only, retained for backwards compatibility.
# Interactive ``fast-agent go`` sessions install Rich's Live display
# which owns the terminal; emitting JSON log records to stderr during
# that session corrupts the TUI on every redraw and made Pallas
# effectively unusable in dev. New systemd deployments should prefer
# ``PALLAS_LOG_STDOUT=1`` — stderr remains available when operators
# want to keep stdout reserved for non-log output (rare, but cheap
# to support).
if os.environ.get("PALLAS_LOG_STDERR", "").lower() in ("1", "true", "yes"):
stream_handler = logging.StreamHandler(stream=sys.__stderr__)
stream_handler.setFormatter(formatter)
stream_handler.addFilter(static_fields)
handlers.append(stream_handler)
# Root logger carries everything from libraries we do NOT own. We

View File

@@ -18,7 +18,7 @@ from pathlib import Path
import yaml
from pallas.log import setup_logging
from pallas.log import set_agent_component, set_project, setup_logging
from pallas.multimodal_server import MultimodalAgentMCPServer
logger = logging.getLogger(__name__)
@@ -198,6 +198,14 @@ async def _preflight(deployment_config: dict) -> None:
async def _start_agent(name: str, agents: dict[str, dict]) -> None:
from pallas.health import register_health_tool
# Tag every log record emitted from this asyncio task with the agent
# name (``component`` field). ContextVar scope is per-task, so each
# agent started from ``asyncio.gather`` carries its own value and the
# registry's own task sees ``component="runtime"`` by default. Fast-
# agent, fastmcp, anthropic and openai records all inherit this tag
# because the _StaticFieldsFilter is attached to every handler.
set_agent_component(name)
entry = agents[name]
module_path = entry["module"]
port = entry["port"]
@@ -337,6 +345,13 @@ def main() -> None:
)
args = parser.parse_args()
# Project label (e.g. "kottos", "mentor", "iolaus") is read from
# ``agents.yaml`` and stamped onto every log record via
# ``pallas.log._StaticFieldsFilter``. Must be set before
# ``setup_logging()`` so the "log file: …" bootstrap record already
# carries the right project label.
set_project(deploy_name)
setup_logging()
if args.agent: