From c9328c58fc4b96a81dd5b2abc0adf18f30da40d1 Mon Sep 17 00:00:00 2001 From: Robert Helewka Date: Mon, 4 May 2026 07:41:15 -0400 Subject: [PATCH] refactor(nginx): overhaul config with dynamic resolution and media serving - Add Docker DNS resolver to prevent stale upstream IPs after container restarts - Preserve X-Forwarded-Proto from HAProxy for correct HTTPS detection - Mount mnemosyne-media volume for direct /media/ serving - Add IP allowlisting for probe/metrics endpoints (RFC1918 + loopback) - Fix access_log inheritance so probe paths are properly suppressed - Expand inline documentation covering routing model and conventions --- docker-compose.yaml | 1 + nginx/mnemosyne.conf | 271 +++++++++++++++++++++++++++++-------------- 2 files changed, 188 insertions(+), 84 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 63f4ae7..5c97c41 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -270,6 +270,7 @@ services: volumes: - ./nginx/mnemosyne.conf:/etc/nginx/conf.d/default.conf:ro - mnemosyne-static:/var/www/static:ro + - mnemosyne-media:/var/www/media:ro healthcheck: test: ["CMD", "curl", "-f", "http://localhost/live/"] interval: 30s diff --git a/nginx/mnemosyne.conf b/nginx/mnemosyne.conf index 05e7ba5..40bd624 100644 --- a/nginx/mnemosyne.conf +++ b/nginx/mnemosyne.conf @@ -1,127 +1,230 @@ -# Mnemosyne nginx — single virtual host that fronts the Django web app -# and the FastMCP server. HAProxy on Titania terminates TLS and routes by -# hostname; this nginx is plain HTTP on the internal network. +# Mnemosyne nginx — single virtual host fronting the Django web app and the +# FastMCP server. HAProxy on Titania terminates TLS; this nginx is plain HTTP +# on the internal 10.10.0.0/24 network. +# +# Routing model +# ------------- +# Everything proxies to the Django `app` container by default. The only paths +# NOT sent to Django are: +# /mcp/ → FastMCP ASGI server (Streamable HTTP + SSE) +# /static/ → served directly from the shared volume +# /media/ → served directly from the shared media volume +# /healthz → short-circuit proxy to FastMCP /mcp/health for HAProxy +# /nginx_status → nginx stub_status (Prospero scrape) +# Django returns its own themed 404 for anything it doesn't route. +# +# Conventions followed (Red Panda Standards + Athena reference config): +# * `resolver` + variable-based `proxy_pass` so container restarts don't +# leave nginx caching a dead IP and returning 502 until a full reload. +# * `$proxy_x_forwarded_proto` preserves HAProxy's `X-Forwarded-Proto: https` +# so `request.is_secure()`, secure cookies, and `build_absolute_uri()` +# work correctly behind TLS termination on Titania. +# * Probe paths are suppressed from access log; the `access_log off` line +# is needed to defeat the default in nginx:alpine's http-block config +# (server-level `access_log` is additive, not overriding, so without +# `off` first the probe paths still log via the inherited directive). +# * `/live/`, `/ready/`, `/healthz`, `/metrics`, `/nginx_status` are all +# IP-allowlisted to RFC1918 + loopback — defence in depth even though +# HAProxy is already on the internal network. -# Suppress probe paths from the access log (health checks, Prometheus scrapes). -# These fire every 15–30 s and would drown out real traffic in Loki. +# Docker's embedded DNS — forces nginx to re-resolve upstream hostnames when +# containers restart and get new IPs. Without this, nginx caches the first +# resolution at startup and returns 502 after `docker compose restart app`. +resolver 127.0.0.11 valid=10s; + +# Preserve X-Forwarded-Proto from the upstream reverse proxy (HAProxy TLS +# termination on Titania); fall back to $scheme only if there's no upstream +# header. Inside the compose network $scheme is always `http` because HAProxy +# already terminated TLS, so we MUST honour the incoming header. +map $http_x_forwarded_proto $proxy_x_forwarded_proto { + default $http_x_forwarded_proto; + "" $scheme; +} + +# Probe-path access-log filter. Genuine 4xx/5xx on these paths still surface +# via the error log and via the probe itself failing. map $request_uri $loggable { default 1; ~^/live(/|\?|$) 0; ~^/ready(/|\?|$) 0; ~^/metrics(/|\?|$) 0; ~^/healthz(/|\?|$) 0; + ~^/nginx_status(/|\?|$) 0; ~^/health 0; ~^/mcp/health(/|\?|$) 0; ~^/ping(/|\?|$) 0; } -# Map of upstreams to give us readable proxy_pass targets and easy retries. -upstream mnemosyne_app { - server app:8000 max_fails=3 fail_timeout=30s; -} - -upstream mnemosyne_mcp { - server mcp:8001 max_fails=3 fail_timeout=30s; -} +# Disable the default nginx.conf access_log (inherited from the http block +# in nginx:alpine's /etc/nginx/nginx.conf) and replace it with the filtered +# version. Without `off` first, both directives fire and probe paths still +# log through the inherited rule. +access_log off; +access_log /dev/stdout combined if=$loggable; server { listen 80 default_server; server_name _; - access_log /var/log/nginx/access.log combined if=$loggable; - - # Reasonable limits — file uploads to the ingest endpoint can be big, - # but the bulk path is S3-direct from Daedalus. 64 MB covers admin - # uploads and direct REST POST /library/api/items/upload. + # Reasonable limits — ingest can upload large content, but the bulk + # path is S3-direct from Daedalus. 64 MB covers admin uploads and + # direct REST POST /library/api/items/upload. client_max_body_size 64m; client_body_timeout 120s; - # Liveness probe — always 200 if the Django process is up. - # Use the trailing-slash form: /live/ returns 200 directly. - # /live (no slash) triggers Django's APPEND_SLASH 301 redirect, which - # will cause health check clients that don't follow redirects to fail. - location = /live/ { - proxy_pass http://mnemosyne_app; - proxy_http_version 1.1; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - access_log off; - } + # Variable-based upstreams force nginx to re-resolve via the Docker + # DNS on each request rather than caching the IP at startup. + set $backend_app http://app:8000; + set $backend_mcp http://mcp:8001; - # Readiness probe — 200 only when PostgreSQL + Memcached are reachable. - # Same trailing-slash rule applies. - location = /ready/ { - proxy_pass http://mnemosyne_app; - proxy_http_version 1.1; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - access_log off; - } + # ── Security headers ────────────────────────────────────────────────── + # `always` so they apply to 4xx/5xx responses from upstream too. + # Stronger policies (CSP, Referrer-Policy, HSTS) should be set on + # HAProxy so they're consistent across all backends. + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-XSS-Protection "1; mode=block" always; - # HAProxy liveness probe — proxies through to the MCP health endpoint. + # ── HAProxy liveness for the MCP server ─────────────────────────────── + # Short-circuits to FastMCP's /mcp/health without hitting Django. + # HAProxy on Titania and any internal uptime monitor use this. location = /healthz { - proxy_pass http://mnemosyne_mcp/mcp/health; - access_log off; - } + allow 127.0.0.0/8; # loopback + allow 10.0.0.0/8; # RFC1918 — primary internal range (Incus, HAProxy) + allow 172.16.0.0/12; # RFC1918 — Docker bridge networks + allow 192.168.0.0/16; # RFC1918 + deny all; - # Mnemosyne's REST API — Django REST Framework views + admin. - # Under /library/api/* per mnemosyne/urls.py and /admin/* per Django. - location /library/ { - proxy_pass http://mnemosyne_app; - proxy_http_version 1.1; + proxy_pass $backend_mcp/mcp/health; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_read_timeout 300s; + proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto; } - location /admin/ { - proxy_pass http://mnemosyne_app; - proxy_http_version 1.1; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_read_timeout 300s; - } - - # FastMCP Streamable HTTP at /mcp/ and SSE at /mcp/sse/. - # Long-running streams need disabled buffering and a generous timeout. - location /mcp/ { - proxy_pass http://mnemosyne_mcp; - proxy_http_version 1.1; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header Connection ""; - proxy_buffering off; - proxy_cache off; - proxy_read_timeout 600s; - } - - # Static files baked into the image at /app/staticfiles, mounted into - # this nginx via a named volume populated by the app service. + # ── Static files ────────────────────────────────────────────────────── + # Baked into the image at /app/staticfiles, seeded into the shared + # volume by the static-init one-shot service on every `up`. location /static/ { alias /var/www/static/; access_log off; expires 30d; + add_header Cache-Control "public, immutable"; } - # Prometheus scrape endpoint — internal networks only. - # Allows: loopback + all RFC1918 private ranges. - location /metrics { + # ── Media files ────────────────────────────────────────────────────── + # Empty in production (USE_LOCAL_STORAGE=False → S3Boto3Storage). + # Useful in staging / dev compose runs where FileSystemStorage writes + # into MEDIA_ROOT inside the shared volume. + location /media/ { + alias /var/www/media/; + access_log off; + expires 7d; + add_header Cache-Control "public"; + } + + # ── FastMCP server ──────────────────────────────────────────────────── + # Streamable HTTP at /mcp/ and SSE at /mcp/sse/. Long-running streams + # require disabled buffering, an unset Connection header (so HTTP/1.1 + # keep-alive stays upstream), and a generous read timeout. + location /mcp/ { + proxy_pass $backend_mcp; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto; + proxy_set_header Authorization $http_authorization; + proxy_set_header Connection ""; + proxy_redirect off; + proxy_buffering off; + proxy_cache off; + proxy_read_timeout 600s; + proxy_send_timeout 600s; + } + + # ── Prometheus scrape — internal networks only ──────────────────────── + # /metrics itself is owned by Django's django-prometheus middleware. + # Omitting 10.0.0.0/8 silently breaks scrapes from Prospero in the + # Incus network; omitting 172.16.0.0/12 breaks scrapes from a + # Prometheus container on the default Docker bridge. + location = /metrics { allow 127.0.0.0/8; allow 10.0.0.0/8; allow 172.16.0.0/12; allow 192.168.0.0/16; deny all; - proxy_pass http://mnemosyne_app; + + proxy_pass $backend_app; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto; access_log off; } + + # ── nginx stub_status — internal networks only ──────────────────────── + # Cheap endpoint for Prospero to watch active connections + request rate. + location = /nginx_status { + allow 127.0.0.0/8; + allow 10.0.0.0/8; + allow 172.16.0.0/12; + allow 192.168.0.0/16; + deny all; + + stub_status on; + access_log off; + } + + # ── Health probes on the Django app ─────────────────────────────────── + # Use the trailing-slash form: /live/ and /ready/ return 200 directly. + # The un-slashed forms trigger Django's APPEND_SLASH 301 redirect, so + # clients that don't follow redirects will report a failure even when + # the service is healthy. + location = /live/ { + allow 127.0.0.0/8; + allow 10.0.0.0/8; + allow 172.16.0.0/12; + allow 192.168.0.0/16; + deny all; + + proxy_pass $backend_app; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto; + access_log off; + } + + location = /ready/ { + allow 127.0.0.0/8; + allow 10.0.0.0/8; + allow 172.16.0.0/12; + allow 192.168.0.0/16; + deny all; + + proxy_pass $backend_app; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto; + access_log off; + } + + # ── Catch-all: everything else goes to Django ───────────────────────── + # Django owns `/`, `/dashboard/`, `/login/`, `/logout/`, `/profile/*`, + # `/notifications/*`, `/admin/*`, `/library/*`, `/llm/*`, `/api/v1/*`, + # and any future app URL. It returns its own themed 404 for anything + # unrouted — not nginx's default page. + location / { + proxy_pass $backend_app; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto; + proxy_redirect off; + proxy_read_timeout 300s; + proxy_send_timeout 300s; + } }