feat(log): add service/project/component labels for Loki
Inject stable JSON fields (`service`, `project`, `component`) on every log record via a `logging.Filter` so Alloy-forwarded logs share the same label shape as Mnemosyne/Daedalus. The `project` label is set once from `agents.yaml`, and `component` is stored in a ContextVar so each agent's asyncio task carries its own value without leaking across sibling agents. Also switch the opt-in console sink from `sys.__stderr__` to `sys.__stdout__` (`PALLAS_LOG_STDOUT=1`) for cleaner journald capture in systemd deployments.
This commit is contained in:
158
pallas/log.py
158
pallas/log.py
@@ -15,6 +15,7 @@ Level conventions (Ouranos Lab — Python services use UPPERCASE):
|
|||||||
DEBUG — per-request detail; never enabled in production by default
|
DEBUG — per-request detail; never enabled in production by default
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import contextvars
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
@@ -25,6 +26,70 @@ from datetime import datetime, timezone
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
# ── Static / contextual fields ────────────────────────────────────────────────
|
||||||
|
#
|
||||||
|
# Every log line Alloy forwards to Loki gets a small, stable set of JSON
|
||||||
|
# fields so operators can pivot between services with the same LogQL label
|
||||||
|
# shape used by Mnemosyne and Daedalus:
|
||||||
|
#
|
||||||
|
# service — always "pallas"
|
||||||
|
# project — the calling project's ``agents.yaml`` ``name:`` (e.g. "kottos",
|
||||||
|
# "mentor", "iolaus"), set once from ``server.main()``
|
||||||
|
# component — the agent currently executing (e.g. "harper", "scotty",
|
||||||
|
# "research"). Stored in a ContextVar so each asyncio task
|
||||||
|
# started by ``_start_agent`` carries its own value without
|
||||||
|
# leaking across agents.
|
||||||
|
#
|
||||||
|
# These are added by a ``logging.Filter`` rather than being baked into the
|
||||||
|
# formatter so DEBUG messages from fast-agent, fastmcp, the MCP SDK and
|
||||||
|
# anthropic/openai clients all get the same attribution — exactly what Loki
|
||||||
|
# query filters depend on.
|
||||||
|
|
||||||
|
_PROJECT: str = "pallas" # overridden once by set_project()
|
||||||
|
_COMPONENT_CTX: contextvars.ContextVar[str] = contextvars.ContextVar(
|
||||||
|
"pallas_component", default="runtime"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def set_project(name: str) -> None:
|
||||||
|
"""Set the project label on all subsequent log records.
|
||||||
|
|
||||||
|
Called once from ``pallas.server.main()`` after reading ``agents.yaml``.
|
||||||
|
"""
|
||||||
|
global _PROJECT
|
||||||
|
if name:
|
||||||
|
_PROJECT = name
|
||||||
|
|
||||||
|
|
||||||
|
def set_agent_component(name: str) -> None:
|
||||||
|
"""Bind the current asyncio task's component label to ``name``.
|
||||||
|
|
||||||
|
``pallas.server._start_agent`` calls this immediately before entering
|
||||||
|
``fast_instance.run()`` so every log line emitted by that agent's
|
||||||
|
task — including records that bubble up from fast-agent / fastmcp /
|
||||||
|
the MCP SDK — is tagged with the agent's name. ContextVar semantics
|
||||||
|
mean sibling agent tasks started from the same ``asyncio.gather`` call
|
||||||
|
each carry their own value without interference.
|
||||||
|
"""
|
||||||
|
_COMPONENT_CTX.set(name)
|
||||||
|
|
||||||
|
|
||||||
|
class _StaticFieldsFilter(logging.Filter):
|
||||||
|
"""Inject ``service`` / ``project`` / ``component`` onto every record.
|
||||||
|
|
||||||
|
Runs before the JSON formatter so the fields appear as top-level keys
|
||||||
|
(not nested under ``extra``). The filter is attached to every handler
|
||||||
|
— both on the root logger and on ``pallas.*`` — so third-party library
|
||||||
|
records pick up the same labels that Pallas's own records carry.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def filter(self, record: logging.LogRecord) -> bool:
|
||||||
|
record.service = "pallas"
|
||||||
|
record.project = _PROJECT
|
||||||
|
record.component = _COMPONENT_CTX.get()
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class _JSONFormatter(logging.Formatter):
|
class _JSONFormatter(logging.Formatter):
|
||||||
"""Single-line JSON formatter compatible with Alloy's ``| json`` pipeline.
|
"""Single-line JSON formatter compatible with Alloy's ``| json`` pipeline.
|
||||||
|
|
||||||
@@ -51,6 +116,12 @@ class _JSONFormatter(logging.Formatter):
|
|||||||
"level": record.levelname,
|
"level": record.levelname,
|
||||||
"logger": record.name,
|
"logger": record.name,
|
||||||
"message": record.getMessage(),
|
"message": record.getMessage(),
|
||||||
|
# Set by _StaticFieldsFilter; fall back to module-level defaults
|
||||||
|
# in case the filter is somehow bypassed (e.g. third-party handler
|
||||||
|
# that doesn't inherit our filters).
|
||||||
|
"service": getattr(record, "service", "pallas"),
|
||||||
|
"project": getattr(record, "project", _PROJECT),
|
||||||
|
"component": getattr(record, "component", _COMPONENT_CTX.get()),
|
||||||
}
|
}
|
||||||
if record.exc_info:
|
if record.exc_info:
|
||||||
if not record.exc_text:
|
if not record.exc_text:
|
||||||
@@ -173,20 +244,26 @@ def setup_logging() -> None:
|
|||||||
stdout and swallows plain ``print`` / stderr writes mid-render, so a
|
stdout and swallows plain ``print`` / stderr writes mid-render, so a
|
||||||
file sink is the only guaranteed way to see DEBUG diagnostics and
|
file sink is the only guaranteed way to see DEBUG diagnostics and
|
||||||
tracebacks.
|
tracebacks.
|
||||||
* ``StreamHandler`` → ``sys.__stderr__`` (opt-in). Uses the *original*
|
* ``StreamHandler`` → ``sys.__stdout__`` (opt-in via ``PALLAS_LOG_STDOUT=1``).
|
||||||
stderr FD captured before Rich installed its Live display, so records
|
The preferred sink for a systemd-managed deployment: stdout becomes a
|
||||||
would bypass the progress renderer and land in the terminal or journal
|
journald entry with ``PRIORITY=6`` (INFO) and SyslogIdentifier matching
|
||||||
verbatim. In interactive ``fast-agent go`` sessions this still
|
the unit, which Alloy's ``loki.source.journal`` then relabels into
|
||||||
smashes the Rich UI (Rich redraws on top but our JSON lines leak
|
Loki. Stdout is also what Docker's ``json-file`` driver captures, so
|
||||||
through every repaint), so we only attach it when ``PALLAS_LOG_STDERR``
|
this same flag works if Pallas is ever containerised.
|
||||||
is set to a truthy value — production systemd deployments that want
|
* ``StreamHandler`` → ``sys.__stderr__`` (opt-in via ``PALLAS_LOG_STDERR=1``).
|
||||||
journal capture should set ``PALLAS_LOG_STDERR=1``; interactive users
|
Uses the *original* stderr FD captured before Rich installed its Live
|
||||||
get a clean TUI and rely on the rotating file sink.
|
display, so records would bypass the progress renderer and land in the
|
||||||
|
terminal or journal verbatim. In interactive ``fast-agent go`` sessions
|
||||||
|
this still smashes the Rich UI (Rich redraws on top but our JSON lines
|
||||||
|
leak through every repaint), so we only attach it when the operator
|
||||||
|
explicitly opts in. Retained for backwards compatibility; new
|
||||||
|
deployments should prefer ``PALLAS_LOG_STDOUT=1``.
|
||||||
|
|
||||||
- ``httpx`` / ``httpcore``: WARNING (prevent request-level debug flooding)
|
- ``httpx`` / ``httpcore``: WARNING (prevent request-level debug flooding)
|
||||||
- ``uvicorn.access``: health path filter applied
|
- ``uvicorn.access``: health path filter applied
|
||||||
"""
|
"""
|
||||||
formatter = _JSONFormatter()
|
formatter = _JSONFormatter()
|
||||||
|
static_fields = _StaticFieldsFilter()
|
||||||
|
|
||||||
# Pallas's own diagnostics level — drives the ``pallas.*`` loggers and
|
# Pallas's own diagnostics level — drives the ``pallas.*`` loggers and
|
||||||
# the file sink verbosity. Unchanged behaviour for operators.
|
# the file sink verbosity. Unchanged behaviour for operators.
|
||||||
@@ -218,32 +295,51 @@ def setup_logging() -> None:
|
|||||||
# so third-party libraries' records land in the same file sink.
|
# so third-party libraries' records land in the same file sink.
|
||||||
handlers: list[logging.Handler] = []
|
handlers: list[logging.Handler] = []
|
||||||
log_file: Path | None = None
|
log_file: Path | None = None
|
||||||
try:
|
# Rotating file sink. Disable by pointing ``PALLAS_LOG_FILE`` at
|
||||||
log_file = _resolve_log_file()
|
# ``/dev/null`` — systemd deployments that rely on journal capture can do
|
||||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
# this to avoid writing a second copy of every record to disk.
|
||||||
file_handler = logging.handlers.RotatingFileHandler(
|
log_path_override = os.environ.get("PALLAS_LOG_FILE", "")
|
||||||
log_file,
|
if log_path_override == "/dev/null":
|
||||||
maxBytes=10 * 1024 * 1024,
|
log_file = None
|
||||||
backupCount=5,
|
else:
|
||||||
encoding="utf-8",
|
try:
|
||||||
)
|
log_file = _resolve_log_file()
|
||||||
file_handler.setFormatter(formatter)
|
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
handlers.append(file_handler)
|
file_handler = logging.handlers.RotatingFileHandler(
|
||||||
except Exception as exc: # pragma: no cover — never fatal
|
log_file,
|
||||||
logging.getLogger(__name__).warning(
|
maxBytes=10 * 1024 * 1024,
|
||||||
"could not attach file log handler: %s", exc
|
backupCount=5,
|
||||||
)
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
file_handler.addFilter(static_fields)
|
||||||
|
handlers.append(file_handler)
|
||||||
|
except Exception as exc: # pragma: no cover — never fatal
|
||||||
|
logging.getLogger(__name__).warning(
|
||||||
|
"could not attach file log handler: %s", exc
|
||||||
|
)
|
||||||
|
|
||||||
# Stderr sink — *opt-in* only. Interactive ``fast-agent go`` sessions
|
# Stdout sink — preferred for systemd + journald deployments (PPLG path:
|
||||||
# install Rich's Live display which owns the terminal; emitting JSON
|
# journald → Alloy journal source → Loki). Emits the same JSON as the
|
||||||
# log records to stderr during that session corrupts the TUI on every
|
# rotating file sink so nothing is lost when operators disable the file.
|
||||||
# redraw and made Pallas effectively unusable in dev. Operators who
|
if os.environ.get("PALLAS_LOG_STDOUT", "").lower() in ("1", "true", "yes"):
|
||||||
# want systemd/journal capture can re-enable by exporting
|
stdout_handler = logging.StreamHandler(stream=sys.__stdout__)
|
||||||
# ``PALLAS_LOG_STDERR=1`` before launching. The rotating file sink
|
stdout_handler.setFormatter(formatter)
|
||||||
# above is the always-on durable capture.
|
stdout_handler.addFilter(static_fields)
|
||||||
|
handlers.append(stdout_handler)
|
||||||
|
|
||||||
|
# Stderr sink — *opt-in* only, retained for backwards compatibility.
|
||||||
|
# Interactive ``fast-agent go`` sessions install Rich's Live display
|
||||||
|
# which owns the terminal; emitting JSON log records to stderr during
|
||||||
|
# that session corrupts the TUI on every redraw and made Pallas
|
||||||
|
# effectively unusable in dev. New systemd deployments should prefer
|
||||||
|
# ``PALLAS_LOG_STDOUT=1`` — stderr remains available when operators
|
||||||
|
# want to keep stdout reserved for non-log output (rare, but cheap
|
||||||
|
# to support).
|
||||||
if os.environ.get("PALLAS_LOG_STDERR", "").lower() in ("1", "true", "yes"):
|
if os.environ.get("PALLAS_LOG_STDERR", "").lower() in ("1", "true", "yes"):
|
||||||
stream_handler = logging.StreamHandler(stream=sys.__stderr__)
|
stream_handler = logging.StreamHandler(stream=sys.__stderr__)
|
||||||
stream_handler.setFormatter(formatter)
|
stream_handler.setFormatter(formatter)
|
||||||
|
stream_handler.addFilter(static_fields)
|
||||||
handlers.append(stream_handler)
|
handlers.append(stream_handler)
|
||||||
|
|
||||||
# Root logger carries everything from libraries we do NOT own. We
|
# Root logger carries everything from libraries we do NOT own. We
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from pallas.log import setup_logging
|
from pallas.log import set_agent_component, set_project, setup_logging
|
||||||
from pallas.multimodal_server import MultimodalAgentMCPServer
|
from pallas.multimodal_server import MultimodalAgentMCPServer
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -198,6 +198,14 @@ async def _preflight(deployment_config: dict) -> None:
|
|||||||
async def _start_agent(name: str, agents: dict[str, dict]) -> None:
|
async def _start_agent(name: str, agents: dict[str, dict]) -> None:
|
||||||
from pallas.health import register_health_tool
|
from pallas.health import register_health_tool
|
||||||
|
|
||||||
|
# Tag every log record emitted from this asyncio task with the agent
|
||||||
|
# name (``component`` field). ContextVar scope is per-task, so each
|
||||||
|
# agent started from ``asyncio.gather`` carries its own value and the
|
||||||
|
# registry's own task sees ``component="runtime"`` by default. Fast-
|
||||||
|
# agent, fastmcp, anthropic and openai records all inherit this tag
|
||||||
|
# because the _StaticFieldsFilter is attached to every handler.
|
||||||
|
set_agent_component(name)
|
||||||
|
|
||||||
entry = agents[name]
|
entry = agents[name]
|
||||||
module_path = entry["module"]
|
module_path = entry["module"]
|
||||||
port = entry["port"]
|
port = entry["port"]
|
||||||
@@ -337,6 +345,13 @@ def main() -> None:
|
|||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Project label (e.g. "kottos", "mentor", "iolaus") is read from
|
||||||
|
# ``agents.yaml`` and stamped onto every log record via
|
||||||
|
# ``pallas.log._StaticFieldsFilter``. Must be set before
|
||||||
|
# ``setup_logging()`` so the "log file: …" bootstrap record already
|
||||||
|
# carries the right project label.
|
||||||
|
set_project(deploy_name)
|
||||||
|
|
||||||
setup_logging()
|
setup_logging()
|
||||||
|
|
||||||
if args.agent:
|
if args.agent:
|
||||||
|
|||||||
Reference in New Issue
Block a user