Files
mnemosyne/docker-compose.yaml
Robert Helewka cbe7921938
All checks were successful
CVE Scan & Docker Build / security-scan (push) Successful in 1m9s
CVE Scan & Docker Build / build-and-push (push) Successful in 2m31s
fix(deploy): use /ready/ healthcheck and /srv/mnemosyne path
- Change app healthcheck from /live/ to /ready/ to verify full
  readiness including dependencies (DB, Neo4j, S3)
- Increase healthcheck timeout from 5s to 10s to accommodate
  dependency checks
- Add S3 bucket connectivity check to readiness probe
- Update deployment documentation to use /srv/mnemosyne instead
  of /opt/mnemosyne as the compose project directory
2026-05-04 09:23:36 -04:00

289 lines
12 KiB
YAML

# =============================================================================
# Mnemosyne — production deployment
# =============================================================================
# Four services, all from the same image:
# app — Django REST API + admin (gunicorn, port 8000)
# mcp — FastMCP server (uvicorn, port 22091)
# worker — Celery worker (embedding/ingest/batch queues)
# web — reverse proxy, public port 23090 (nginx)
#
# External services (NOT spun up here): Postgres on Portia, Neo4j on Umbriel,
# RabbitMQ on Oberon, S3/MinIO on Nyx, Memcached on its own host, embedder
# and reranker on Nyx, smtp4dev on Oberon. All reached over the internal
# 10.10.0.0/24 network.
#
# Environment scoping
# -------------------
# Every service lists ONLY the environment variables it actually needs, with
# values interpolated from the shell (typically `.env` at the project root,
# which an Ansible role generates from a j2 template + vault secrets). No
# `env_file:` sharing — a compromised MCP container should not see the Celery
# broker creds or the LLM API encryption key, and the Celery worker has no
# business knowing `ALLOWED_HOSTS`. If you add a new Django setting, decide
# which services need it and add it only to those `environment:` blocks.
#
# Run:
# docker compose up -d
# docker compose run --rm app migrate # one-shot DB migrate
# docker compose run --rm app setup # Neo4j indexes + library types
# =============================================================================
services:
# ── Static-file seeder: copies /app/staticfiles into the shared volume on
# every `up`. Runs once and exits. Without this, the named volume is only
# seeded the first time it's empty, so static updates between deploys
# would not propagate to nginx.
static-init:
image: git.helu.ca/r/mnemosyne:latest
command: ["sh", "-c", "cp -a /app/staticfiles/. /shared-static/"]
user: "0:0"
volumes:
- mnemosyne-static:/shared-static
restart: "no"
# ── App: Django REST API + admin ──────────────────────────────────────────
# Serves /library/api/*, /admin/, /live/, /ready/, /metrics. Enqueues
# Celery tasks (hence CELERY_BROKER_URL is required here too — Django is
# the producer, the worker is the consumer).
app:
image: git.helu.ca/r/mnemosyne:latest
pull_policy: always
command: ["web"]
environment:
# Django core
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
- SECRET_KEY=${SECRET_KEY}
- DEBUG=${DEBUG}
- ALLOWED_HOSTS=${ALLOWED_HOSTS}
- CSRF_TRUSTED_ORIGINS=${CSRF_TRUSTED_ORIGINS}
- TIME_ZONE=${TIME_ZONE}
- LANGUAGE_CODE=${LANGUAGE_CODE}
# Postgres (Django ORM)
- APP_DB_NAME=${APP_DB_NAME}
- APP_DB_USER=${APP_DB_USER}
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
# Neo4j (knowledge graph + vectors)
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
# Memcached (readiness probe, theme/notification cache)
- KVDB_LOCATION=${KVDB_LOCATION}
- KVDB_PREFIX=${KVDB_PREFIX}
# S3 (Django storage backend — chunk text, item files)
- USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME}
- AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL}
- AWS_S3_USE_SSL=${AWS_S3_USE_SSL}
- AWS_S3_VERIFY=${AWS_S3_VERIFY}
- AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME}
# Celery (Django enqueues tasks; does NOT consume)
- CELERY_BROKER_URL=${CELERY_BROKER_URL}
- CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND}
- CELERY_TASK_ALWAYS_EAGER=${CELERY_TASK_ALWAYS_EAGER}
# LLM API secrets (admin + DRF pages decrypt stored provider API keys)
- LLM_API_SECRETS_ENCRYPTION_KEY=${LLM_API_SECRETS_ENCRYPTION_KEY}
# Email
- EMAIL_HOST=${EMAIL_HOST}
- EMAIL_PORT=${EMAIL_PORT}
- EMAIL_USE_TLS=${EMAIL_USE_TLS}
# Search & re-ranker (serves /library/api/search)
- SEARCH_VECTOR_TOP_K=${SEARCH_VECTOR_TOP_K}
- SEARCH_FULLTEXT_TOP_K=${SEARCH_FULLTEXT_TOP_K}
- SEARCH_GRAPH_MAX_DEPTH=${SEARCH_GRAPH_MAX_DEPTH}
- SEARCH_RRF_K=${SEARCH_RRF_K}
- SEARCH_DEFAULT_LIMIT=${SEARCH_DEFAULT_LIMIT}
- RERANKER_MAX_CANDIDATES=${RERANKER_MAX_CANDIDATES}
- RERANKER_TIMEOUT=${RERANKER_TIMEOUT}
# Logging
- LOGGING_LEVEL=${LOGGING_LEVEL}
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
restart: unless-stopped
depends_on:
static-init:
condition: service_completed_successfully
volumes:
- mnemosyne-media:/app/media
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/ready/"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
# ── MCP server: FastMCP Streamable HTTP at /mcp/ ───────────────────────────
# Read-only LLM-facing surface. Intentionally excluded:
# CELERY_BROKER_URL — MCP must not enqueue tasks
# LLM_API_SECRETS_ENCRYPTION_KEY — MCP must not decrypt stored provider keys
# DAEDALUS_S3_* — MCP does not ingest
# CSRF_TRUSTED_ORIGINS — MCP does not accept browser forms
# EMAIL_* — MCP does not send mail
# EMBEDDING_* (batch/timeout) — MCP does not embed
# S3 vars ARE passed so STORAGES initialises identically to the app container
# (simpler to reason about than having mcp use FileSystemStorage while the
# rest of the stack uses S3). MCP is read-only at the application layer so
# the S3 key here only matters if someone exploits a write path in the
# future — keep the credential scoped to read-only in your secret manager.
mcp:
image: git.helu.ca/r/mnemosyne:latest
pull_policy: always
command: ["mcp"]
environment:
# Django core (ASGI still imports settings)
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
- SECRET_KEY=${SECRET_KEY}
- DEBUG=${DEBUG}
- ALLOWED_HOSTS=${ALLOWED_HOSTS}
- TIME_ZONE=${TIME_ZONE}
- LANGUAGE_CODE=${LANGUAGE_CODE}
# Postgres (McpToken lookup lives in Django ORM)
- APP_DB_NAME=${APP_DB_NAME}
- APP_DB_USER=${APP_DB_USER}
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
# Neo4j (search + get_chunk)
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
# Memcached
- KVDB_LOCATION=${KVDB_LOCATION}
- KVDB_PREFIX=${KVDB_PREFIX}
# S3 (same block as app — STORAGES must initialise identically)
- USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME}
- AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL}
- AWS_S3_USE_SSL=${AWS_S3_USE_SSL}
- AWS_S3_VERIFY=${AWS_S3_VERIFY}
- AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME}
# MCP-specific
- MCP_REQUIRE_AUTH=${MCP_REQUIRE_AUTH}
# Search & re-ranker (the `search` MCP tool uses these)
- SEARCH_VECTOR_TOP_K=${SEARCH_VECTOR_TOP_K}
- SEARCH_FULLTEXT_TOP_K=${SEARCH_FULLTEXT_TOP_K}
- SEARCH_GRAPH_MAX_DEPTH=${SEARCH_GRAPH_MAX_DEPTH}
- SEARCH_RRF_K=${SEARCH_RRF_K}
- SEARCH_DEFAULT_LIMIT=${SEARCH_DEFAULT_LIMIT}
- RERANKER_MAX_CANDIDATES=${RERANKER_MAX_CANDIDATES}
- RERANKER_TIMEOUT=${RERANKER_TIMEOUT}
# Logging
- LOGGING_LEVEL=${LOGGING_LEVEL}
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
restart: unless-stopped
volumes:
- mnemosyne-media:/app/media
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8001/mcp/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
# ── Celery worker: embedding + ingest + batch queues ───────────────────────
# Consumer side of the queue. Needs the full S3 block (reads Daedalus's
# bucket, writes to Mnemosyne's), the LLM API encryption key (ingest calls
# vision models via stored provider keys), and both broker URL + result
# backend. Does NOT need HTTP-layer settings (ALLOWED_HOSTS, CSRF, MCP auth)
# or search tuning (the worker never serves queries).
worker:
image: git.helu.ca/r/mnemosyne:latest
pull_policy: always
command: ["worker"]
environment:
# Django core (Celery imports settings)
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
- SECRET_KEY=${SECRET_KEY}
- DEBUG=${DEBUG}
- TIME_ZONE=${TIME_ZONE}
- LANGUAGE_CODE=${LANGUAGE_CODE}
# Postgres
- APP_DB_NAME=${APP_DB_NAME}
- APP_DB_USER=${APP_DB_USER}
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
# Neo4j (graph writes during embed/ingest)
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
# Memcached (task progress cache)
- KVDB_LOCATION=${KVDB_LOCATION}
- KVDB_PREFIX=${KVDB_PREFIX}
# S3 — Mnemosyne's own bucket (chunk text writes, item file storage)
- USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME}
- AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL}
- AWS_S3_USE_SSL=${AWS_S3_USE_SSL}
- AWS_S3_VERIFY=${AWS_S3_VERIFY}
- AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME}
# Daedalus S3 — cross-bucket reads for ingest (worker-only)
- DAEDALUS_S3_ENDPOINT_URL=${DAEDALUS_S3_ENDPOINT_URL}
- DAEDALUS_S3_ACCESS_KEY_ID=${DAEDALUS_S3_ACCESS_KEY_ID}
- DAEDALUS_S3_SECRET_ACCESS_KEY=${DAEDALUS_S3_SECRET_ACCESS_KEY}
- DAEDALUS_S3_BUCKET_NAME=${DAEDALUS_S3_BUCKET_NAME}
- DAEDALUS_S3_REGION_NAME=${DAEDALUS_S3_REGION_NAME}
- DAEDALUS_S3_USE_SSL=${DAEDALUS_S3_USE_SSL}
- DAEDALUS_S3_VERIFY=${DAEDALUS_S3_VERIFY}
# Celery / RabbitMQ
- CELERY_BROKER_URL=${CELERY_BROKER_URL}
- CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND}
- CELERY_TASK_ALWAYS_EAGER=${CELERY_TASK_ALWAYS_EAGER}
# Worker tuning (entrypoint.sh reads these)
- CELERY_QUEUES=${CELERY_QUEUES}
- CELERY_CONCURRENCY=${CELERY_CONCURRENCY}
- CELERY_LOG_LEVEL=${CELERY_LOGGING_LEVEL}
# LLM API secrets (ingest vision pass decrypts stored provider keys)
- LLM_API_SECRETS_ENCRYPTION_KEY=${LLM_API_SECRETS_ENCRYPTION_KEY}
# Embedding pipeline
- EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE}
- EMBEDDING_TIMEOUT=${EMBEDDING_TIMEOUT}
# Logging
- LOGGING_LEVEL=${LOGGING_LEVEL}
- CELERY_LOGGING_LEVEL=${CELERY_LOGGING_LEVEL}
restart: unless-stopped
depends_on:
app:
condition: service_healthy
volumes:
- mnemosyne-media:/app/media
healthcheck:
test: ["CMD", "celery", "-A", "mnemosyne", "inspect", "ping", "-d", "celery@$$HOSTNAME"]
interval: 60s
timeout: 10s
retries: 3
start_period: 60s
# ── Web: nginx reverse proxy, public port 23181 ────────────────────────────
# No Django env — nginx only knows how to route. Public listener is
# templated into the conf file by Ansible if the port ever needs to change.
web:
image: nginx:alpine
restart: unless-stopped
depends_on:
app:
condition: service_healthy
mcp:
condition: service_healthy
ports:
- "23181:80"
volumes:
- ./nginx/mnemosyne.conf:/etc/nginx/conf.d/default.conf:ro
- mnemosyne-static:/var/www/static:ro
- mnemosyne-media:/var/www/media:ro
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost/live/"]
interval: 30s
timeout: 5s
retries: 3
volumes:
# Static files baked into the image at /app/staticfiles. The static-init
# service seeds this volume on every `up`, so nginx always serves the
# current image's static bundle.
mnemosyne-static:
# Local FileSystemStorage fallback. Production uses USE_LOCAL_STORAGE=False
# so this is mostly empty — kept for parity with dev and for any path
# that writes to MEDIA_ROOT directly.
mnemosyne-media: