feat(log): add service/project/component labels for Loki
Inject stable JSON fields (`service`, `project`, `component`) on every log record via a `logging.Filter` so Alloy-forwarded logs share the same label shape as Mnemosyne/Daedalus. The `project` label is set once from `agents.yaml`, and `component` is stored in a ContextVar so each agent's asyncio task carries its own value without leaking across sibling agents. Also switch the opt-in console sink from `sys.__stderr__` to `sys.__stdout__` (`PALLAS_LOG_STDOUT=1`) for cleaner journald capture in systemd deployments.
This commit is contained in:
158
pallas/log.py
158
pallas/log.py
@@ -15,6 +15,7 @@ Level conventions (Ouranos Lab — Python services use UPPERCASE):
|
||||
DEBUG — per-request detail; never enabled in production by default
|
||||
"""
|
||||
|
||||
import contextvars
|
||||
import json
|
||||
import logging
|
||||
import logging.handlers
|
||||
@@ -25,6 +26,70 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ── Static / contextual fields ────────────────────────────────────────────────
|
||||
#
|
||||
# Every log line Alloy forwards to Loki gets a small, stable set of JSON
|
||||
# fields so operators can pivot between services with the same LogQL label
|
||||
# shape used by Mnemosyne and Daedalus:
|
||||
#
|
||||
# service — always "pallas"
|
||||
# project — the calling project's ``agents.yaml`` ``name:`` (e.g. "kottos",
|
||||
# "mentor", "iolaus"), set once from ``server.main()``
|
||||
# component — the agent currently executing (e.g. "harper", "scotty",
|
||||
# "research"). Stored in a ContextVar so each asyncio task
|
||||
# started by ``_start_agent`` carries its own value without
|
||||
# leaking across agents.
|
||||
#
|
||||
# These are added by a ``logging.Filter`` rather than being baked into the
|
||||
# formatter so DEBUG messages from fast-agent, fastmcp, the MCP SDK and
|
||||
# anthropic/openai clients all get the same attribution — exactly what Loki
|
||||
# query filters depend on.
|
||||
|
||||
_PROJECT: str = "pallas" # overridden once by set_project()
|
||||
_COMPONENT_CTX: contextvars.ContextVar[str] = contextvars.ContextVar(
|
||||
"pallas_component", default="runtime"
|
||||
)
|
||||
|
||||
|
||||
def set_project(name: str) -> None:
|
||||
"""Set the project label on all subsequent log records.
|
||||
|
||||
Called once from ``pallas.server.main()`` after reading ``agents.yaml``.
|
||||
"""
|
||||
global _PROJECT
|
||||
if name:
|
||||
_PROJECT = name
|
||||
|
||||
|
||||
def set_agent_component(name: str) -> None:
|
||||
"""Bind the current asyncio task's component label to ``name``.
|
||||
|
||||
``pallas.server._start_agent`` calls this immediately before entering
|
||||
``fast_instance.run()`` so every log line emitted by that agent's
|
||||
task — including records that bubble up from fast-agent / fastmcp /
|
||||
the MCP SDK — is tagged with the agent's name. ContextVar semantics
|
||||
mean sibling agent tasks started from the same ``asyncio.gather`` call
|
||||
each carry their own value without interference.
|
||||
"""
|
||||
_COMPONENT_CTX.set(name)
|
||||
|
||||
|
||||
class _StaticFieldsFilter(logging.Filter):
|
||||
"""Inject ``service`` / ``project`` / ``component`` onto every record.
|
||||
|
||||
Runs before the JSON formatter so the fields appear as top-level keys
|
||||
(not nested under ``extra``). The filter is attached to every handler
|
||||
— both on the root logger and on ``pallas.*`` — so third-party library
|
||||
records pick up the same labels that Pallas's own records carry.
|
||||
"""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
record.service = "pallas"
|
||||
record.project = _PROJECT
|
||||
record.component = _COMPONENT_CTX.get()
|
||||
return True
|
||||
|
||||
|
||||
class _JSONFormatter(logging.Formatter):
|
||||
"""Single-line JSON formatter compatible with Alloy's ``| json`` pipeline.
|
||||
|
||||
@@ -51,6 +116,12 @@ class _JSONFormatter(logging.Formatter):
|
||||
"level": record.levelname,
|
||||
"logger": record.name,
|
||||
"message": record.getMessage(),
|
||||
# Set by _StaticFieldsFilter; fall back to module-level defaults
|
||||
# in case the filter is somehow bypassed (e.g. third-party handler
|
||||
# that doesn't inherit our filters).
|
||||
"service": getattr(record, "service", "pallas"),
|
||||
"project": getattr(record, "project", _PROJECT),
|
||||
"component": getattr(record, "component", _COMPONENT_CTX.get()),
|
||||
}
|
||||
if record.exc_info:
|
||||
if not record.exc_text:
|
||||
@@ -173,20 +244,26 @@ def setup_logging() -> None:
|
||||
stdout and swallows plain ``print`` / stderr writes mid-render, so a
|
||||
file sink is the only guaranteed way to see DEBUG diagnostics and
|
||||
tracebacks.
|
||||
* ``StreamHandler`` → ``sys.__stderr__`` (opt-in). Uses the *original*
|
||||
stderr FD captured before Rich installed its Live display, so records
|
||||
would bypass the progress renderer and land in the terminal or journal
|
||||
verbatim. In interactive ``fast-agent go`` sessions this still
|
||||
smashes the Rich UI (Rich redraws on top but our JSON lines leak
|
||||
through every repaint), so we only attach it when ``PALLAS_LOG_STDERR``
|
||||
is set to a truthy value — production systemd deployments that want
|
||||
journal capture should set ``PALLAS_LOG_STDERR=1``; interactive users
|
||||
get a clean TUI and rely on the rotating file sink.
|
||||
* ``StreamHandler`` → ``sys.__stdout__`` (opt-in via ``PALLAS_LOG_STDOUT=1``).
|
||||
The preferred sink for a systemd-managed deployment: stdout becomes a
|
||||
journald entry with ``PRIORITY=6`` (INFO) and SyslogIdentifier matching
|
||||
the unit, which Alloy's ``loki.source.journal`` then relabels into
|
||||
Loki. Stdout is also what Docker's ``json-file`` driver captures, so
|
||||
this same flag works if Pallas is ever containerised.
|
||||
* ``StreamHandler`` → ``sys.__stderr__`` (opt-in via ``PALLAS_LOG_STDERR=1``).
|
||||
Uses the *original* stderr FD captured before Rich installed its Live
|
||||
display, so records would bypass the progress renderer and land in the
|
||||
terminal or journal verbatim. In interactive ``fast-agent go`` sessions
|
||||
this still smashes the Rich UI (Rich redraws on top but our JSON lines
|
||||
leak through every repaint), so we only attach it when the operator
|
||||
explicitly opts in. Retained for backwards compatibility; new
|
||||
deployments should prefer ``PALLAS_LOG_STDOUT=1``.
|
||||
|
||||
- ``httpx`` / ``httpcore``: WARNING (prevent request-level debug flooding)
|
||||
- ``uvicorn.access``: health path filter applied
|
||||
"""
|
||||
formatter = _JSONFormatter()
|
||||
static_fields = _StaticFieldsFilter()
|
||||
|
||||
# Pallas's own diagnostics level — drives the ``pallas.*`` loggers and
|
||||
# the file sink verbosity. Unchanged behaviour for operators.
|
||||
@@ -218,32 +295,51 @@ def setup_logging() -> None:
|
||||
# so third-party libraries' records land in the same file sink.
|
||||
handlers: list[logging.Handler] = []
|
||||
log_file: Path | None = None
|
||||
try:
|
||||
log_file = _resolve_log_file()
|
||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
file_handler = logging.handlers.RotatingFileHandler(
|
||||
log_file,
|
||||
maxBytes=10 * 1024 * 1024,
|
||||
backupCount=5,
|
||||
encoding="utf-8",
|
||||
)
|
||||
file_handler.setFormatter(formatter)
|
||||
handlers.append(file_handler)
|
||||
except Exception as exc: # pragma: no cover — never fatal
|
||||
logging.getLogger(__name__).warning(
|
||||
"could not attach file log handler: %s", exc
|
||||
)
|
||||
# Rotating file sink. Disable by pointing ``PALLAS_LOG_FILE`` at
|
||||
# ``/dev/null`` — systemd deployments that rely on journal capture can do
|
||||
# this to avoid writing a second copy of every record to disk.
|
||||
log_path_override = os.environ.get("PALLAS_LOG_FILE", "")
|
||||
if log_path_override == "/dev/null":
|
||||
log_file = None
|
||||
else:
|
||||
try:
|
||||
log_file = _resolve_log_file()
|
||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
file_handler = logging.handlers.RotatingFileHandler(
|
||||
log_file,
|
||||
maxBytes=10 * 1024 * 1024,
|
||||
backupCount=5,
|
||||
encoding="utf-8",
|
||||
)
|
||||
file_handler.setFormatter(formatter)
|
||||
file_handler.addFilter(static_fields)
|
||||
handlers.append(file_handler)
|
||||
except Exception as exc: # pragma: no cover — never fatal
|
||||
logging.getLogger(__name__).warning(
|
||||
"could not attach file log handler: %s", exc
|
||||
)
|
||||
|
||||
# Stderr sink — *opt-in* only. Interactive ``fast-agent go`` sessions
|
||||
# install Rich's Live display which owns the terminal; emitting JSON
|
||||
# log records to stderr during that session corrupts the TUI on every
|
||||
# redraw and made Pallas effectively unusable in dev. Operators who
|
||||
# want systemd/journal capture can re-enable by exporting
|
||||
# ``PALLAS_LOG_STDERR=1`` before launching. The rotating file sink
|
||||
# above is the always-on durable capture.
|
||||
# Stdout sink — preferred for systemd + journald deployments (PPLG path:
|
||||
# journald → Alloy journal source → Loki). Emits the same JSON as the
|
||||
# rotating file sink so nothing is lost when operators disable the file.
|
||||
if os.environ.get("PALLAS_LOG_STDOUT", "").lower() in ("1", "true", "yes"):
|
||||
stdout_handler = logging.StreamHandler(stream=sys.__stdout__)
|
||||
stdout_handler.setFormatter(formatter)
|
||||
stdout_handler.addFilter(static_fields)
|
||||
handlers.append(stdout_handler)
|
||||
|
||||
# Stderr sink — *opt-in* only, retained for backwards compatibility.
|
||||
# Interactive ``fast-agent go`` sessions install Rich's Live display
|
||||
# which owns the terminal; emitting JSON log records to stderr during
|
||||
# that session corrupts the TUI on every redraw and made Pallas
|
||||
# effectively unusable in dev. New systemd deployments should prefer
|
||||
# ``PALLAS_LOG_STDOUT=1`` — stderr remains available when operators
|
||||
# want to keep stdout reserved for non-log output (rare, but cheap
|
||||
# to support).
|
||||
if os.environ.get("PALLAS_LOG_STDERR", "").lower() in ("1", "true", "yes"):
|
||||
stream_handler = logging.StreamHandler(stream=sys.__stderr__)
|
||||
stream_handler.setFormatter(formatter)
|
||||
stream_handler.addFilter(static_fields)
|
||||
handlers.append(stream_handler)
|
||||
|
||||
# Root logger carries everything from libraries we do NOT own. We
|
||||
|
||||
@@ -18,7 +18,7 @@ from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from pallas.log import setup_logging
|
||||
from pallas.log import set_agent_component, set_project, setup_logging
|
||||
from pallas.multimodal_server import MultimodalAgentMCPServer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -198,6 +198,14 @@ async def _preflight(deployment_config: dict) -> None:
|
||||
async def _start_agent(name: str, agents: dict[str, dict]) -> None:
|
||||
from pallas.health import register_health_tool
|
||||
|
||||
# Tag every log record emitted from this asyncio task with the agent
|
||||
# name (``component`` field). ContextVar scope is per-task, so each
|
||||
# agent started from ``asyncio.gather`` carries its own value and the
|
||||
# registry's own task sees ``component="runtime"`` by default. Fast-
|
||||
# agent, fastmcp, anthropic and openai records all inherit this tag
|
||||
# because the _StaticFieldsFilter is attached to every handler.
|
||||
set_agent_component(name)
|
||||
|
||||
entry = agents[name]
|
||||
module_path = entry["module"]
|
||||
port = entry["port"]
|
||||
@@ -337,6 +345,13 @@ def main() -> None:
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Project label (e.g. "kottos", "mentor", "iolaus") is read from
|
||||
# ``agents.yaml`` and stamped onto every log record via
|
||||
# ``pallas.log._StaticFieldsFilter``. Must be set before
|
||||
# ``setup_logging()`` so the "log file: …" bootstrap record already
|
||||
# carries the right project label.
|
||||
set_project(deploy_name)
|
||||
|
||||
setup_logging()
|
||||
|
||||
if args.agent:
|
||||
|
||||
Reference in New Issue
Block a user