diff --git a/docker-compose.yaml b/docker-compose.yaml index bdc52e1..f07e51f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -47,6 +47,20 @@ # and is the typical re-run target after embedding-model changes). # ============================================================================= +# ----------------------------------------------------------------------------- +# Shared logging config — JSON to stdout, picked up by Alloy via the Docker +# socket on the host and shipped to Loki. Pinning json-file (Docker's default) +# so Alloy's discovery.docker + loki.source.docker on puck sees a consistent +# driver across every service, and bounding log retention per container so a +# misbehaving service can't fill the disk between Alloy tails. +# ----------------------------------------------------------------------------- +x-logging: &default-logging + driver: json-file + options: + tag: "{{.Name}}" + max-size: "10m" + max-file: "5" + services: # ── Static-file seeder: copies /app/staticfiles into the shared volume on @@ -60,6 +74,7 @@ services: volumes: - mnemosyne-static:/shared-static restart: "no" + logging: *default-logging # ── Init sidecar: one-shot Postgres migrate + library-type seed. Runs on # every `up` and exits. Long-running services below depend on @@ -92,10 +107,16 @@ services: - DB_PORT=${DB_PORT} # Neo4j (load_library_types writes Library defaults into the graph) - NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL} - # Logging + # Logging (MNEMOSYNE_COMPONENT is injected by settings.py into every + # log line as a static JSON field; Alloy on puck reads the compose + # service name directly off the Docker label and uses that as the + # Loki `component` label, but we still set it here so operators + # tail-ing ``docker logs`` see the same attribution) + - MNEMOSYNE_COMPONENT=init - LOGGING_LEVEL=${LOGGING_LEVEL} - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL} restart: "no" + logging: *default-logging # ── App: Django REST API + admin ────────────────────────────────────────── @@ -154,9 +175,11 @@ services: - RERANKER_MAX_CANDIDATES=${RERANKER_MAX_CANDIDATES} - RERANKER_TIMEOUT=${RERANKER_TIMEOUT} # Logging + - MNEMOSYNE_COMPONENT=app - LOGGING_LEVEL=${LOGGING_LEVEL} - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL} restart: unless-stopped + logging: *default-logging depends_on: static-init: condition: service_completed_successfully @@ -228,9 +251,11 @@ services: - RERANKER_MAX_CANDIDATES=${RERANKER_MAX_CANDIDATES} - RERANKER_TIMEOUT=${RERANKER_TIMEOUT} # Logging + - MNEMOSYNE_COMPONENT=mcp - LOGGING_LEVEL=${LOGGING_LEVEL} - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL} restart: unless-stopped + logging: *default-logging depends_on: init: condition: service_completed_successfully @@ -303,9 +328,11 @@ services: - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE} - EMBEDDING_TIMEOUT=${EMBEDDING_TIMEOUT} # Logging + - MNEMOSYNE_COMPONENT=worker - LOGGING_LEVEL=${LOGGING_LEVEL} - CELERY_LOGGING_LEVEL=${CELERY_LOGGING_LEVEL} restart: unless-stopped + logging: *default-logging depends_on: app: condition: service_healthy @@ -324,6 +351,7 @@ services: web: image: nginx:alpine restart: unless-stopped + logging: *default-logging depends_on: app: condition: service_healthy diff --git a/docs/mnemosyne_integration.md b/docs/mnemosyne_integration.md index c17e5b2..5fae8ea 100644 --- a/docs/mnemosyne_integration.md +++ b/docs/mnemosyne_integration.md @@ -9,7 +9,7 @@ This document describes Mnemosyne's role in the Daedalus + Pallas architecture a Mnemosyne exposes two interfaces for the wider Ouranos ecosystem: 1. **REST API** (`/library/api/*`) — consumed by the Daedalus backend (HTTP Basic auth, service account `daedalus-service`) for workspace lifecycle and asynchronous file ingestion. Phase 1, **implemented**. -2. **MCP Server** (port 22091 internal, `/mcp/` via nginx on 23090) — exposes search, browse, and retrieval tools. Phase 5 of Mnemosyne's own roadmap, **implemented** with workspace_id scoping and per-turn JWT access control. Consumed by Pallas FastAgents in production (Daedalus integration Phase 2, **implemented** — see [Phase 3 of this doc](#3-phase-3-per-turn-token-access-control-for-daedalus-integration)). +2. **MCP Server** (port 22091 internal, `/mcp/` via nginx on 23090) — exposes search, browse, and retrieval tools. Phase 5 of Mnemosyne's own roadmap, **implemented** with workspace-scoped access control via long-lived team JWTs. Consumed by Pallas FastAgents in production (Daedalus integration Phase 2, **implemented** — see [Phase 3 of this doc](#3-phase-3-long-lived-team-jwt-access-control-for-pallas-instances)). ### Phase status @@ -17,7 +17,7 @@ Mnemosyne exposes two interfaces for the wider Ouranos ecosystem: |-------|------|--------| | 1. REST workspace + ingest API for Daedalus | `POST /workspaces/`, `DELETE /workspaces/{id}/`, `POST /ingest/`, `GET /jobs/{id}/` | **Implemented** | | 2. MCP Server (Mnemosyne roadmap Phase 5) | `search`, `get_chunk`, `list_libraries`, `list_collections`, `list_items`, `get_health` | **Implemented** (workspace_id scoping enforced in Cypher) | -| 3. Per-turn signed-token access control for Daedalus integration | Daedalus mints HS256 JWTs carrying `{ws, libs}` claims; Mnemosyne validates via `MCPSigningKey` and scopes search via `_scope_from_claims` | **Implemented** | +| 3. Long-lived team JWT access control for Pallas instances | Mnemosyne mints a 10-year HS256 JWT per Pallas instance (Team); Daedalus stores it encrypted and the operator pastes the plaintext into `fastagent.secrets.yaml`. Mnemosyne scopes search to the team's assigned workspaces via `TeamWorkspaceAssignment`. | **Implemented** | --- @@ -367,40 +367,65 @@ mnemosyne_s3_operations_total{operation,status} counter - [x] ASGI mount + uvicorn deployment on port 22091; nginx proxies via `/mcp/` on 23090 - [x] Prometheus metrics (`mnemosyne_mcp_*`) -### Phase 3 — Per-turn token access control for Daedalus integration ✅ Implemented +### Phase 3 — Long-lived team JWT access control for Pallas instances ✅ Implemented -Daedalus mints a short-lived HS256 JWT per chat turn and sends it as `Authorization: Bearer` to Pallas. Pallas forwards the token to outgoing Mnemosyne MCP calls (via `pallas/_fastagent_patch`). Mnemosyne validates the JWT and scopes every search to the workspace indicated by the `ws` claim. +Each Pallas instance registered in Daedalus is mirrored as a Mnemosyne **Team**. Mnemosyne mints a long-lived (10-year) HS256 JWT for the team; the operator pastes the plaintext into the Pallas instance's `fastagent.secrets.yaml`. Every MCP call from that Pallas instance carries the team JWT as a static `Authorization: Bearer` header. Mnemosyne validates the JWT and scopes search to the workspaces assigned to that team. **Mnemosyne-side components:** -- [x] `MCPSigningKey` model — stores active HS256 secrets keyed by `kid`. Managed via `manage.py seed_signing_key --kid `. -- [x] `resolve_mcp_jwt(token_string)` in `mcp_server/auth.py` — validates signature, `exp`, `iss`, `jti` replay; returns claims dict. -- [x] `MCPAuthMiddleware.on_call_tool` — detects JWT shape (three dot-separated segments), routes to `resolve_mcp_jwt`, stores claims in FastMCP context state via `STATE_KEY_CLAIMS`. -- [x] `_scope_from_claims(claims, arg_workspace_id)` — claims trump tool args; returns `(ws, allowed_libraries)`. -- [x] `allowed_libraries` on `SearchRequest` — extends `_WORKSPACE_SCOPE_CLAUSE` to include user-managed libraries in addition to the workspace's own. +- [x] `MCPSigningKey` model — stores active HS256 secrets keyed by `kid`. Managed via `manage.py seed_signing_key --kid `. The hex stays in Mnemosyne's DB; Daedalus never sees it. +- [x] `Team` model — one row per Pallas instance. `id` = `PallasInstance.id` on the Daedalus side (stable UUID). `active_jti` identifies the single currently-valid JWT; rotation changes this field, immediately invalidating the old token. +- [x] `TeamWorkspaceAssignment` model — maps a `Team` to a set of Daedalus workspace UUIDs. Updated by Daedalus via `PUT /mcp_server/api/teams/{id}/workspaces/` whenever workspace attachments change. +- [x] `resolve_mcp_jwt(token_string)` in `mcp_server/auth.py` — validates signature, `exp`, `iss`. For team JWTs (`iss=mnemosyne`, `typ=team`): parses `sub=team:` → `claims["team_id"]`; bypasses the per-turn JTI replay cache (team tokens are intentionally reused). +- [x] `_libraries_for_team(team_id, jti)` — looks up the `Team` row, verifies `active=True` and `active_jti == jti`, then translates `TeamWorkspaceAssignment` rows into Library UIDs via a single Cypher query. +- [x] `MCPAuthMiddleware.on_call_tool` — routes team JWTs through `_libraries_for_team`; routes legacy per-turn JWTs through `_scope_from_claims` (backward-compatible). +- [x] REST control plane at `/mcp_server/api/teams/`: + - `POST /` — create team by UUID; mints JWT, returns plaintext once. + - `GET /{id}/` — team state (workspace_ids, active status). + - `DELETE /{id}/` — soft-delete (`active=False`); all JWTs immediately invalid. + - `PUT /{id}/workspaces/` — replace workspace assignment list (idempotent). + - `POST /{id}/rotate/` — mint new JWT with new `active_jti`; returns plaintext once. -**Token format (HS256):** +**Team JWT format (HS256):** ```json { - "iss": "daedalus", - "sub": "chat", - "ws": "", - "libs": [], + "iss": "mnemosyne", + "aud": "mnemosyne", + "typ": "team", + "sub": "team:", "iat": 1746000000, - "exp": 1746000600, - "jti": "" + "exp": 2061360000, + "jti": "" } ``` -The `libs` claim is reserved for future user-managed library assignment (deferred). Currently always `[]`; the workspace's own library is always included via the `ws` claim. - -**Provisioning:** +**Provisioning (once per Pallas instance):** ```bash -# On Mnemosyne host, once: -docker compose exec app python manage.py seed_signing_key --kid daedalus-1 -# Copy the printed hex → DAEDALUS_MNEMOSYNE_SIGNING_SECRET in Daedalus .env +# 1. Seed the MCPSigningKey on Mnemosyne (once per deployment, not per instance): +docker compose exec app python manage.py seed_signing_key --kid daedalus-1 --retire-other +# The hex stays in Mnemosyne's DB — no operator action required. + +# 2. Register the Pallas instance in Daedalus admin UI (/admin/pallas/). +# Daedalus calls POST /mcp_server/api/teams/ automatically. +# The team JWT is minted and stored encrypted in Daedalus. + +# 3. Reveal the JWT via Daedalus admin UI (one-shot): +# GET /api/v1/pallas/{id}/team-jwt +# Copy the returned JWT string. + +# 4. Paste into fastagent.secrets.yaml on the Pallas host: +# mcp: +# servers: +# mnemosyne: +# headers: +# Authorization: "Bearer " + +# 5. Restart the Pallas agent processes. + +# 6. Attach workspaces in Daedalus workspace settings UI. +# Daedalus calls PUT /mcp_server/api/teams/{id}/workspaces/ automatically. ``` -See the Daedalus-side spec [§9](../../daedalus/docs/mnemosyne_integration.md#9-phase-2--workspace-scoped-mcp-search-implemented) for the full integration architecture. +See the Daedalus-side spec [§9](../../daedalus/docs/mnemosyne_integration.md#9-phase-2--workspace-scoped-mcp-search-implemented) for the full operator walkthrough including JWT rotation and disaster recovery. diff --git a/mnemosyne/mnemosyne/log_filters.py b/mnemosyne/mnemosyne/log_filters.py new file mode 100644 index 0000000..16db05a --- /dev/null +++ b/mnemosyne/mnemosyne/log_filters.py @@ -0,0 +1,67 @@ +"""Logging filters shared across Mnemosyne processes. + +These are project-level (not tied to a Django app) so Celery workers and +the FastMCP ASGI app can reuse them without importing app modules. +""" + +from __future__ import annotations + +import logging +import re + + +# Paths that should not show up in INFO when the response is a success. +# Anything >= 400 still flows through — a failing probe is a real signal. +_SUPPRESS_PATHS = frozenset( + { + "/live/", + "/live", + "/ready/", + "/ready", + "/healthz", + "/metrics", + } +) + + +class SuppressHealthAccessFilter(logging.Filter): + """Demote successful access-log records for health endpoints to DEBUG. + + Applied to ``django.server`` (runserver) and ``gunicorn.access`` via + the ``access`` handler in :data:`mnemosyne.settings.LOGGING`. The filter + returns ``False`` (drop the record) only when the request path is a + health endpoint AND the HTTP status is 1xx/2xx/3xx. Any failure on + ``/ready/`` or ``/live/`` still propagates so an operator sees + readiness flaps. + + The two access loggers format their messages differently: + + * ``django.server`` emits ``'"GET /live/ HTTP/1.1" 200 0'`` as the + message with no args. + * ``gunicorn.access`` typically has the path in ``record.args`` when + the access log format is configured, but many deployments fall + back to a pre-formatted message. We parse the final rendered + message in both cases to keep the filter portable across Mnemosyne + containers (which run gunicorn) and local dev (``runserver``). + """ + + # Matches the path portion of the quoted request line inside either + # format. Tolerant of missing trailing slashes and query strings. + _REQUEST_RE = re.compile(r'"\s*(?:GET|POST|HEAD|OPTIONS|PUT|PATCH|DELETE)\s+(\S+)') + _STATUS_RE = re.compile(r'"\s+(\d{3})\b') + + def filter(self, record: logging.LogRecord) -> bool: + msg = record.getMessage() + path_match = self._REQUEST_RE.search(msg) + status_match = self._STATUS_RE.search(msg) + if not path_match or not status_match: + return True + + path = path_match.group(1).split("?", 1)[0] + status = int(status_match.group(1)) + + # Only suppress successful probes; surface any 4xx/5xx on a + # health endpoint so operators see readiness flaps. + if path in _SUPPRESS_PATHS and status < 400: + return False + return True diff --git a/mnemosyne/mnemosyne/settings.py b/mnemosyne/mnemosyne/settings.py index cb52f33..ce8eebc 100644 --- a/mnemosyne/mnemosyne/settings.py +++ b/mnemosyne/mnemosyne/settings.py @@ -278,35 +278,75 @@ THEMIS_NOTIFICATION_POLL_INTERVAL = 60 THEMIS_NOTIFICATION_MAX_AGE_DAYS = 90 # --- Structured Logging --- +# All log output is line-delimited JSON on stdout, one record per line. +# Alloy (running on the host / container sidecar) tails the container's +# stdout stream and ships to Loki. No log files, no syslog — a single, +# uniform transport across every service on this host. +# +# Labels attached by Alloy (NOT embedded here): service, component, +# environment, hostname. "component" is injected by the formatter as +# a static field based on the MNEMOSYNE_COMPONENT env var set per +# docker-compose service (app | mcp | worker). This keeps the label +# shape consistent with Pallas and future services. +# +# Level policy (Ouranos Lab standard): +# ERROR — broken; requires human attention +# WARNING — degraded but self-recovering; retries, skipped items +# INFO — lifecycle events and failures; no 200 OK health probes +# DEBUG — health-probe success, per-request detail, verbose traces LOGGING_LEVEL = env("LOGGING_LEVEL", default="INFO") CELERY_LOGGING_LEVEL = env("CELERY_LOGGING_LEVEL", default="INFO") DJANGO_LOGGING_LEVEL = env("DJANGO_LOGGING_LEVEL", default="WARNING") +MNEMOSYNE_COMPONENT = env("MNEMOSYNE_COMPONENT", default="app") LOGGING = { "version": 1, "disable_existing_loggers": False, "formatters": { - "structured": { - "format": ( - "[%(levelname)s] %(asctime)s " - "service=mnemosyne " - "module=%(name)s " - "func=%(funcName)s " - "line=%(lineno)d " - "%(message)s" + # JSON formatter — one line of JSON per record. Alloy's ``| json`` + # pipeline in LogQL will parse these fields into queryable columns + # (level, logger, funcName, lineno, message, plus anything passed + # via ``logger.info("...", extra={...})``). + "json": { + "()": "pythonjsonlogger.json.JsonFormatter", + "fmt": ( + "%(asctime)s %(levelname)s %(name)s " + "%(funcName)s %(lineno)d %(message)s" ), - "datefmt": "%Y-%m-%d %H:%M:%S", + "rename_fields": { + "asctime": "time", + "levelname": "level", + "name": "logger", + }, + "static_fields": { + "service": "mnemosyne", + "component": MNEMOSYNE_COMPONENT, + }, }, - "simple": { - "format": "[%(levelname)s] %(name)s: %(message)s", + }, + "filters": { + # Demotes successful health-probe access log lines from INFO to + # DEBUG so production INFO output stays signal-only. Applied to + # django.server and gunicorn.access; uvicorn does its own thing + # via the structlog-style filter in mcp_server. + "suppress_health_access": { + "()": "mnemosyne.log_filters.SuppressHealthAccessFilter", }, }, "handlers": { "console": { "class": "logging.StreamHandler", - "formatter": "structured", + "formatter": "json", "stream": "ext://sys.stdout", }, + # Separate handler for django/gunicorn access logs so we can apply + # the health-path filter without affecting application loggers. + "access": { + "class": "logging.StreamHandler", + "formatter": "json", + "stream": "ext://sys.stdout", + "filters": ["suppress_health_access"], + }, }, "loggers": { "library": { @@ -324,6 +364,11 @@ LOGGING = { "level": LOGGING_LEVEL, "propagate": False, }, + "mcp_server": { + "handlers": ["console"], + "level": LOGGING_LEVEL, + "propagate": False, + }, "celery": { "handlers": ["console"], "level": CELERY_LOGGING_LEVEL, @@ -339,9 +384,31 @@ LOGGING = { "level": DJANGO_LOGGING_LEVEL, "propagate": False, }, + # Django's runserver / gunicorn access logs — demote health probes + # to DEBUG so "5xx on /ready/" is easy to spot in INFO. + "django.server": { + "handlers": ["access"], + "level": DJANGO_LOGGING_LEVEL, + "propagate": False, + }, + "gunicorn.access": { + "handlers": ["access"], + "level": DJANGO_LOGGING_LEVEL, + "propagate": False, + }, + # Noisy library internals — pin to WARNING regardless of root level + # so we don't drown in HTTP-client debug spam when LOGGING_LEVEL=DEBUG. + "httpx": {"handlers": ["console"], "level": "WARNING", "propagate": False}, + "httpcore": {"handlers": ["console"], "level": "WARNING", "propagate": False}, + "openai": {"handlers": ["console"], "level": "WARNING", "propagate": False}, + "urllib3": {"handlers": ["console"], "level": "WARNING", "propagate": False}, + "botocore": {"handlers": ["console"], "level": "WARNING", "propagate": False}, + "boto3": {"handlers": ["console"], "level": "WARNING", "propagate": False}, + "s3transfer": {"handlers": ["console"], "level": "WARNING", "propagate": False}, + "neo4j": {"handlers": ["console"], "level": "WARNING", "propagate": False}, }, "root": { "handlers": ["console"], "level": LOGGING_LEVEL, }, -} \ No newline at end of file +} diff --git a/pyproject.toml b/pyproject.toml index 86ae9a8..b554049 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "pymemcache>=4.0,<5.0", "openai>=1.0,<2.0", "django-prometheus>=2.3,<3.0", + "python-json-logger>=3.0,<4.0", # Phase 2: Embedding Pipeline "PyMuPDF>=1.24,<2.0", "pymupdf4llm>=0.0.17,<1.0",