chore(compose): add shared json-file logging config and component labels
Introduce x-logging anchor with json-file driver, size/file caps, and container name tagging so Alloy on puck can reliably tail every service through the Docker socket. Apply to all services and inject MNEMOSYNE_COMPONENT env vars (init/app/mcp/worker) for consistent log attribution both
This commit is contained in:
67
mnemosyne/mnemosyne/log_filters.py
Normal file
67
mnemosyne/mnemosyne/log_filters.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""Logging filters shared across Mnemosyne processes.
|
||||
|
||||
These are project-level (not tied to a Django app) so Celery workers and
|
||||
the FastMCP ASGI app can reuse them without importing app modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
|
||||
# Paths that should not show up in INFO when the response is a success.
|
||||
# Anything >= 400 still flows through — a failing probe is a real signal.
|
||||
_SUPPRESS_PATHS = frozenset(
|
||||
{
|
||||
"/live/",
|
||||
"/live",
|
||||
"/ready/",
|
||||
"/ready",
|
||||
"/healthz",
|
||||
"/metrics",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class SuppressHealthAccessFilter(logging.Filter):
|
||||
"""Demote successful access-log records for health endpoints to DEBUG.
|
||||
|
||||
Applied to ``django.server`` (runserver) and ``gunicorn.access`` via
|
||||
the ``access`` handler in :data:`mnemosyne.settings.LOGGING`. The filter
|
||||
returns ``False`` (drop the record) only when the request path is a
|
||||
health endpoint AND the HTTP status is 1xx/2xx/3xx. Any failure on
|
||||
``/ready/`` or ``/live/`` still propagates so an operator sees
|
||||
readiness flaps.
|
||||
|
||||
The two access loggers format their messages differently:
|
||||
|
||||
* ``django.server`` emits ``'"GET /live/ HTTP/1.1" 200 0'`` as the
|
||||
message with no args.
|
||||
* ``gunicorn.access`` typically has the path in ``record.args`` when
|
||||
the access log format is configured, but many deployments fall
|
||||
back to a pre-formatted message. We parse the final rendered
|
||||
message in both cases to keep the filter portable across Mnemosyne
|
||||
containers (which run gunicorn) and local dev (``runserver``).
|
||||
"""
|
||||
|
||||
# Matches the path portion of the quoted request line inside either
|
||||
# format. Tolerant of missing trailing slashes and query strings.
|
||||
_REQUEST_RE = re.compile(r'"\s*(?:GET|POST|HEAD|OPTIONS|PUT|PATCH|DELETE)\s+(\S+)')
|
||||
_STATUS_RE = re.compile(r'"\s+(\d{3})\b')
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
msg = record.getMessage()
|
||||
path_match = self._REQUEST_RE.search(msg)
|
||||
status_match = self._STATUS_RE.search(msg)
|
||||
if not path_match or not status_match:
|
||||
return True
|
||||
|
||||
path = path_match.group(1).split("?", 1)[0]
|
||||
status = int(status_match.group(1))
|
||||
|
||||
# Only suppress successful probes; surface any 4xx/5xx on a
|
||||
# health endpoint so operators see readiness flaps.
|
||||
if path in _SUPPRESS_PATHS and status < 400:
|
||||
return False
|
||||
return True
|
||||
@@ -278,35 +278,75 @@ THEMIS_NOTIFICATION_POLL_INTERVAL = 60
|
||||
THEMIS_NOTIFICATION_MAX_AGE_DAYS = 90
|
||||
|
||||
# --- Structured Logging ---
|
||||
# All log output is line-delimited JSON on stdout, one record per line.
|
||||
# Alloy (running on the host / container sidecar) tails the container's
|
||||
# stdout stream and ships to Loki. No log files, no syslog — a single,
|
||||
# uniform transport across every service on this host.
|
||||
#
|
||||
# Labels attached by Alloy (NOT embedded here): service, component,
|
||||
# environment, hostname. "component" is injected by the formatter as
|
||||
# a static field based on the MNEMOSYNE_COMPONENT env var set per
|
||||
# docker-compose service (app | mcp | worker). This keeps the label
|
||||
# shape consistent with Pallas and future services.
|
||||
#
|
||||
# Level policy (Ouranos Lab standard):
|
||||
# ERROR — broken; requires human attention
|
||||
# WARNING — degraded but self-recovering; retries, skipped items
|
||||
# INFO — lifecycle events and failures; no 200 OK health probes
|
||||
# DEBUG — health-probe success, per-request detail, verbose traces
|
||||
LOGGING_LEVEL = env("LOGGING_LEVEL", default="INFO")
|
||||
CELERY_LOGGING_LEVEL = env("CELERY_LOGGING_LEVEL", default="INFO")
|
||||
DJANGO_LOGGING_LEVEL = env("DJANGO_LOGGING_LEVEL", default="WARNING")
|
||||
MNEMOSYNE_COMPONENT = env("MNEMOSYNE_COMPONENT", default="app")
|
||||
|
||||
LOGGING = {
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"formatters": {
|
||||
"structured": {
|
||||
"format": (
|
||||
"[%(levelname)s] %(asctime)s "
|
||||
"service=mnemosyne "
|
||||
"module=%(name)s "
|
||||
"func=%(funcName)s "
|
||||
"line=%(lineno)d "
|
||||
"%(message)s"
|
||||
# JSON formatter — one line of JSON per record. Alloy's ``| json``
|
||||
# pipeline in LogQL will parse these fields into queryable columns
|
||||
# (level, logger, funcName, lineno, message, plus anything passed
|
||||
# via ``logger.info("...", extra={...})``).
|
||||
"json": {
|
||||
"()": "pythonjsonlogger.json.JsonFormatter",
|
||||
"fmt": (
|
||||
"%(asctime)s %(levelname)s %(name)s "
|
||||
"%(funcName)s %(lineno)d %(message)s"
|
||||
),
|
||||
"datefmt": "%Y-%m-%d %H:%M:%S",
|
||||
"rename_fields": {
|
||||
"asctime": "time",
|
||||
"levelname": "level",
|
||||
"name": "logger",
|
||||
},
|
||||
"static_fields": {
|
||||
"service": "mnemosyne",
|
||||
"component": MNEMOSYNE_COMPONENT,
|
||||
},
|
||||
},
|
||||
"simple": {
|
||||
"format": "[%(levelname)s] %(name)s: %(message)s",
|
||||
},
|
||||
"filters": {
|
||||
# Demotes successful health-probe access log lines from INFO to
|
||||
# DEBUG so production INFO output stays signal-only. Applied to
|
||||
# django.server and gunicorn.access; uvicorn does its own thing
|
||||
# via the structlog-style filter in mcp_server.
|
||||
"suppress_health_access": {
|
||||
"()": "mnemosyne.log_filters.SuppressHealthAccessFilter",
|
||||
},
|
||||
},
|
||||
"handlers": {
|
||||
"console": {
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "structured",
|
||||
"formatter": "json",
|
||||
"stream": "ext://sys.stdout",
|
||||
},
|
||||
# Separate handler for django/gunicorn access logs so we can apply
|
||||
# the health-path filter without affecting application loggers.
|
||||
"access": {
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "json",
|
||||
"stream": "ext://sys.stdout",
|
||||
"filters": ["suppress_health_access"],
|
||||
},
|
||||
},
|
||||
"loggers": {
|
||||
"library": {
|
||||
@@ -324,6 +364,11 @@ LOGGING = {
|
||||
"level": LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
"mcp_server": {
|
||||
"handlers": ["console"],
|
||||
"level": LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
"celery": {
|
||||
"handlers": ["console"],
|
||||
"level": CELERY_LOGGING_LEVEL,
|
||||
@@ -339,9 +384,31 @@ LOGGING = {
|
||||
"level": DJANGO_LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
# Django's runserver / gunicorn access logs — demote health probes
|
||||
# to DEBUG so "5xx on /ready/" is easy to spot in INFO.
|
||||
"django.server": {
|
||||
"handlers": ["access"],
|
||||
"level": DJANGO_LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
"gunicorn.access": {
|
||||
"handlers": ["access"],
|
||||
"level": DJANGO_LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
# Noisy library internals — pin to WARNING regardless of root level
|
||||
# so we don't drown in HTTP-client debug spam when LOGGING_LEVEL=DEBUG.
|
||||
"httpx": {"handlers": ["console"], "level": "WARNING", "propagate": False},
|
||||
"httpcore": {"handlers": ["console"], "level": "WARNING", "propagate": False},
|
||||
"openai": {"handlers": ["console"], "level": "WARNING", "propagate": False},
|
||||
"urllib3": {"handlers": ["console"], "level": "WARNING", "propagate": False},
|
||||
"botocore": {"handlers": ["console"], "level": "WARNING", "propagate": False},
|
||||
"boto3": {"handlers": ["console"], "level": "WARNING", "propagate": False},
|
||||
"s3transfer": {"handlers": ["console"], "level": "WARNING", "propagate": False},
|
||||
"neo4j": {"handlers": ["console"], "level": "WARNING", "propagate": False},
|
||||
},
|
||||
"root": {
|
||||
"handlers": ["console"],
|
||||
"level": LOGGING_LEVEL,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user