refactor(nginx): overhaul config with dynamic resolution and media serving
- Add Docker DNS resolver to prevent stale upstream IPs after container restarts - Preserve X-Forwarded-Proto from HAProxy for correct HTTPS detection - Mount mnemosyne-media volume for direct /media/ serving - Add IP allowlisting for probe/metrics endpoints (RFC1918 + loopback) - Fix access_log inheritance so probe paths are properly suppressed - Expand inline documentation covering routing model and conventions
This commit is contained in:
@@ -1,127 +1,230 @@
|
||||
# Mnemosyne nginx — single virtual host that fronts the Django web app
|
||||
# and the FastMCP server. HAProxy on Titania terminates TLS and routes by
|
||||
# hostname; this nginx is plain HTTP on the internal network.
|
||||
# Mnemosyne nginx — single virtual host fronting the Django web app and the
|
||||
# FastMCP server. HAProxy on Titania terminates TLS; this nginx is plain HTTP
|
||||
# on the internal 10.10.0.0/24 network.
|
||||
#
|
||||
# Routing model
|
||||
# -------------
|
||||
# Everything proxies to the Django `app` container by default. The only paths
|
||||
# NOT sent to Django are:
|
||||
# /mcp/ → FastMCP ASGI server (Streamable HTTP + SSE)
|
||||
# /static/ → served directly from the shared volume
|
||||
# /media/ → served directly from the shared media volume
|
||||
# /healthz → short-circuit proxy to FastMCP /mcp/health for HAProxy
|
||||
# /nginx_status → nginx stub_status (Prospero scrape)
|
||||
# Django returns its own themed 404 for anything it doesn't route.
|
||||
#
|
||||
# Conventions followed (Red Panda Standards + Athena reference config):
|
||||
# * `resolver` + variable-based `proxy_pass` so container restarts don't
|
||||
# leave nginx caching a dead IP and returning 502 until a full reload.
|
||||
# * `$proxy_x_forwarded_proto` preserves HAProxy's `X-Forwarded-Proto: https`
|
||||
# so `request.is_secure()`, secure cookies, and `build_absolute_uri()`
|
||||
# work correctly behind TLS termination on Titania.
|
||||
# * Probe paths are suppressed from access log; the `access_log off` line
|
||||
# is needed to defeat the default in nginx:alpine's http-block config
|
||||
# (server-level `access_log` is additive, not overriding, so without
|
||||
# `off` first the probe paths still log via the inherited directive).
|
||||
# * `/live/`, `/ready/`, `/healthz`, `/metrics`, `/nginx_status` are all
|
||||
# IP-allowlisted to RFC1918 + loopback — defence in depth even though
|
||||
# HAProxy is already on the internal network.
|
||||
|
||||
# Suppress probe paths from the access log (health checks, Prometheus scrapes).
|
||||
# These fire every 15–30 s and would drown out real traffic in Loki.
|
||||
# Docker's embedded DNS — forces nginx to re-resolve upstream hostnames when
|
||||
# containers restart and get new IPs. Without this, nginx caches the first
|
||||
# resolution at startup and returns 502 after `docker compose restart app`.
|
||||
resolver 127.0.0.11 valid=10s;
|
||||
|
||||
# Preserve X-Forwarded-Proto from the upstream reverse proxy (HAProxy TLS
|
||||
# termination on Titania); fall back to $scheme only if there's no upstream
|
||||
# header. Inside the compose network $scheme is always `http` because HAProxy
|
||||
# already terminated TLS, so we MUST honour the incoming header.
|
||||
map $http_x_forwarded_proto $proxy_x_forwarded_proto {
|
||||
default $http_x_forwarded_proto;
|
||||
"" $scheme;
|
||||
}
|
||||
|
||||
# Probe-path access-log filter. Genuine 4xx/5xx on these paths still surface
|
||||
# via the error log and via the probe itself failing.
|
||||
map $request_uri $loggable {
|
||||
default 1;
|
||||
~^/live(/|\?|$) 0;
|
||||
~^/ready(/|\?|$) 0;
|
||||
~^/metrics(/|\?|$) 0;
|
||||
~^/healthz(/|\?|$) 0;
|
||||
~^/nginx_status(/|\?|$) 0;
|
||||
~^/health 0;
|
||||
~^/mcp/health(/|\?|$) 0;
|
||||
~^/ping(/|\?|$) 0;
|
||||
}
|
||||
|
||||
# Map of upstreams to give us readable proxy_pass targets and easy retries.
|
||||
upstream mnemosyne_app {
|
||||
server app:8000 max_fails=3 fail_timeout=30s;
|
||||
}
|
||||
|
||||
upstream mnemosyne_mcp {
|
||||
server mcp:8001 max_fails=3 fail_timeout=30s;
|
||||
}
|
||||
# Disable the default nginx.conf access_log (inherited from the http block
|
||||
# in nginx:alpine's /etc/nginx/nginx.conf) and replace it with the filtered
|
||||
# version. Without `off` first, both directives fire and probe paths still
|
||||
# log through the inherited rule.
|
||||
access_log off;
|
||||
access_log /dev/stdout combined if=$loggable;
|
||||
|
||||
server {
|
||||
listen 80 default_server;
|
||||
server_name _;
|
||||
|
||||
access_log /var/log/nginx/access.log combined if=$loggable;
|
||||
|
||||
# Reasonable limits — file uploads to the ingest endpoint can be big,
|
||||
# but the bulk path is S3-direct from Daedalus. 64 MB covers admin
|
||||
# uploads and direct REST POST /library/api/items/upload.
|
||||
# Reasonable limits — ingest can upload large content, but the bulk
|
||||
# path is S3-direct from Daedalus. 64 MB covers admin uploads and
|
||||
# direct REST POST /library/api/items/upload.
|
||||
client_max_body_size 64m;
|
||||
client_body_timeout 120s;
|
||||
|
||||
# Liveness probe — always 200 if the Django process is up.
|
||||
# Use the trailing-slash form: /live/ returns 200 directly.
|
||||
# /live (no slash) triggers Django's APPEND_SLASH 301 redirect, which
|
||||
# will cause health check clients that don't follow redirects to fail.
|
||||
location = /live/ {
|
||||
proxy_pass http://mnemosyne_app;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
access_log off;
|
||||
}
|
||||
# Variable-based upstreams force nginx to re-resolve via the Docker
|
||||
# DNS on each request rather than caching the IP at startup.
|
||||
set $backend_app http://app:8000;
|
||||
set $backend_mcp http://mcp:8001;
|
||||
|
||||
# Readiness probe — 200 only when PostgreSQL + Memcached are reachable.
|
||||
# Same trailing-slash rule applies.
|
||||
location = /ready/ {
|
||||
proxy_pass http://mnemosyne_app;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
access_log off;
|
||||
}
|
||||
# ── Security headers ──────────────────────────────────────────────────
|
||||
# `always` so they apply to 4xx/5xx responses from upstream too.
|
||||
# Stronger policies (CSP, Referrer-Policy, HSTS) should be set on
|
||||
# HAProxy so they're consistent across all backends.
|
||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||
add_header X-Content-Type-Options "nosniff" always;
|
||||
add_header X-XSS-Protection "1; mode=block" always;
|
||||
|
||||
# HAProxy liveness probe — proxies through to the MCP health endpoint.
|
||||
# ── HAProxy liveness for the MCP server ───────────────────────────────
|
||||
# Short-circuits to FastMCP's /mcp/health without hitting Django.
|
||||
# HAProxy on Titania and any internal uptime monitor use this.
|
||||
location = /healthz {
|
||||
proxy_pass http://mnemosyne_mcp/mcp/health;
|
||||
access_log off;
|
||||
}
|
||||
allow 127.0.0.0/8; # loopback
|
||||
allow 10.0.0.0/8; # RFC1918 — primary internal range (Incus, HAProxy)
|
||||
allow 172.16.0.0/12; # RFC1918 — Docker bridge networks
|
||||
allow 192.168.0.0/16; # RFC1918
|
||||
deny all;
|
||||
|
||||
# Mnemosyne's REST API — Django REST Framework views + admin.
|
||||
# Under /library/api/* per mnemosyne/urls.py and /admin/* per Django.
|
||||
location /library/ {
|
||||
proxy_pass http://mnemosyne_app;
|
||||
proxy_http_version 1.1;
|
||||
proxy_pass $backend_mcp/mcp/health;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_read_timeout 300s;
|
||||
proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto;
|
||||
}
|
||||
|
||||
location /admin/ {
|
||||
proxy_pass http://mnemosyne_app;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_read_timeout 300s;
|
||||
}
|
||||
|
||||
# FastMCP Streamable HTTP at /mcp/ and SSE at /mcp/sse/.
|
||||
# Long-running streams need disabled buffering and a generous timeout.
|
||||
location /mcp/ {
|
||||
proxy_pass http://mnemosyne_mcp;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Connection "";
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_read_timeout 600s;
|
||||
}
|
||||
|
||||
# Static files baked into the image at /app/staticfiles, mounted into
|
||||
# this nginx via a named volume populated by the app service.
|
||||
# ── Static files ──────────────────────────────────────────────────────
|
||||
# Baked into the image at /app/staticfiles, seeded into the shared
|
||||
# volume by the static-init one-shot service on every `up`.
|
||||
location /static/ {
|
||||
alias /var/www/static/;
|
||||
access_log off;
|
||||
expires 30d;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# Prometheus scrape endpoint — internal networks only.
|
||||
# Allows: loopback + all RFC1918 private ranges.
|
||||
location /metrics {
|
||||
# ── Media files ──────────────────────────────────────────────────────
|
||||
# Empty in production (USE_LOCAL_STORAGE=False → S3Boto3Storage).
|
||||
# Useful in staging / dev compose runs where FileSystemStorage writes
|
||||
# into MEDIA_ROOT inside the shared volume.
|
||||
location /media/ {
|
||||
alias /var/www/media/;
|
||||
access_log off;
|
||||
expires 7d;
|
||||
add_header Cache-Control "public";
|
||||
}
|
||||
|
||||
# ── FastMCP server ────────────────────────────────────────────────────
|
||||
# Streamable HTTP at /mcp/ and SSE at /mcp/sse/. Long-running streams
|
||||
# require disabled buffering, an unset Connection header (so HTTP/1.1
|
||||
# keep-alive stays upstream), and a generous read timeout.
|
||||
location /mcp/ {
|
||||
proxy_pass $backend_mcp;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto;
|
||||
proxy_set_header Authorization $http_authorization;
|
||||
proxy_set_header Connection "";
|
||||
proxy_redirect off;
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_read_timeout 600s;
|
||||
proxy_send_timeout 600s;
|
||||
}
|
||||
|
||||
# ── Prometheus scrape — internal networks only ────────────────────────
|
||||
# /metrics itself is owned by Django's django-prometheus middleware.
|
||||
# Omitting 10.0.0.0/8 silently breaks scrapes from Prospero in the
|
||||
# Incus network; omitting 172.16.0.0/12 breaks scrapes from a
|
||||
# Prometheus container on the default Docker bridge.
|
||||
location = /metrics {
|
||||
allow 127.0.0.0/8;
|
||||
allow 10.0.0.0/8;
|
||||
allow 172.16.0.0/12;
|
||||
allow 192.168.0.0/16;
|
||||
deny all;
|
||||
proxy_pass http://mnemosyne_app;
|
||||
|
||||
proxy_pass $backend_app;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto;
|
||||
access_log off;
|
||||
}
|
||||
|
||||
# ── nginx stub_status — internal networks only ────────────────────────
|
||||
# Cheap endpoint for Prospero to watch active connections + request rate.
|
||||
location = /nginx_status {
|
||||
allow 127.0.0.0/8;
|
||||
allow 10.0.0.0/8;
|
||||
allow 172.16.0.0/12;
|
||||
allow 192.168.0.0/16;
|
||||
deny all;
|
||||
|
||||
stub_status on;
|
||||
access_log off;
|
||||
}
|
||||
|
||||
# ── Health probes on the Django app ───────────────────────────────────
|
||||
# Use the trailing-slash form: /live/ and /ready/ return 200 directly.
|
||||
# The un-slashed forms trigger Django's APPEND_SLASH 301 redirect, so
|
||||
# clients that don't follow redirects will report a failure even when
|
||||
# the service is healthy.
|
||||
location = /live/ {
|
||||
allow 127.0.0.0/8;
|
||||
allow 10.0.0.0/8;
|
||||
allow 172.16.0.0/12;
|
||||
allow 192.168.0.0/16;
|
||||
deny all;
|
||||
|
||||
proxy_pass $backend_app;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto;
|
||||
access_log off;
|
||||
}
|
||||
|
||||
location = /ready/ {
|
||||
allow 127.0.0.0/8;
|
||||
allow 10.0.0.0/8;
|
||||
allow 172.16.0.0/12;
|
||||
allow 192.168.0.0/16;
|
||||
deny all;
|
||||
|
||||
proxy_pass $backend_app;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto;
|
||||
access_log off;
|
||||
}
|
||||
|
||||
# ── Catch-all: everything else goes to Django ─────────────────────────
|
||||
# Django owns `/`, `/dashboard/`, `/login/`, `/logout/`, `/profile/*`,
|
||||
# `/notifications/*`, `/admin/*`, `/library/*`, `/llm/*`, `/api/v1/*`,
|
||||
# and any future app URL. It returns its own themed 404 for anything
|
||||
# unrouted — not nginx's default page.
|
||||
location / {
|
||||
proxy_pass $backend_app;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto;
|
||||
proxy_redirect off;
|
||||
proxy_read_timeout 300s;
|
||||
proxy_send_timeout 300s;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user