Files
mnemosyne/docker-compose.yaml
Robert Helewka ec4f12d601 feat(ingest): source-bucket registry keyed on ingest source
Generalises the Daedalus-only cross-bucket fetch into a registry
(SOURCE_S3_BUCKETS) keyed on the IngestJob `source` field, so new
upstream sources (Spelunker) can ingest from their own buckets. The
ingest task now calls fetch_from_source(job.source, job.s3_key) and
falls back to "daedalus" for blank/unknown sources (backwards compatible).

Adds SPELUNKER_S3_* env vars and worker env scoping. Replaces
daedalus_s3.py with source_s3.py.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 22:30:08 -04:00

432 lines
18 KiB
YAML

# =============================================================================
# Mnemosyne — production deployment
# =============================================================================
# Five services:
# init — one-shot sidecar: migrate + collectstatic + load_library_types
# app — Django REST API + admin (gunicorn, port 8000)
# mcp — FastMCP server (uvicorn, port 8001)
# worker — Celery worker (embedding/ingest/batch queues)
# web — reverse proxy, public port 23081 (nginx)
#
# External services (NOT spun up here): Postgres on Despina, Neo4j on Naiad,
# RabbitMQ on Thalassa, S3/MinIO on Perseus, Memcached on host. All reached
# over the internal network.
#
# Environment scoping
# -------------------
# Every service lists ONLY the environment variables it actually needs, with
# values interpolated from the shell (the .env at the project root is
# generated by Ansible from a j2 template + vault secrets). No `env_file:`
# sharing — a compromised MCP container should not see the Celery broker
# creds or the LLM API encryption key, and the Celery worker has no business
# knowing `ALLOWED_HOSTS`. If you add a new Django setting, decide which
# services need it and add it only to those `environment:` blocks.
#
# Static files
# ------------
# collectstatic is run by the `init` sidecar on every `up`. Static files are
# baked into the image at build time (/app/staticfiles by collectstatic in
# the Dockerfile builder stage), then copied to STATIC_ROOT (/mnt/static) by
# the init sidecar. nginx serves them directly from that bind-mounted path.
# --clear removes stale files from the previous deploy on each run.
#
# Run:
# docker compose up -d
#
# The `init` sidecar runs migrate + collectstatic + load_library_types on
# every `up`. Long-running services wait for it via
# `depends_on: init: service_completed_successfully` — so a failure there
# (unreachable DB, broken migration) blocks the stack.
#
# Neo4j vector-index creation is deliberately NOT bundled into `init`.
# `setup_neo4j_indexes` requires a system embedding model configured in
# the admin, which only exists after first boot — an operator has to land
# in /admin/, pick an embedding API + model, and set its vector_dimensions
# value. Bootstrap order is therefore:
#
# 1. docker compose up # init sidecar: migrate + collectstatic + load_library_types
# 2. browse to /admin/ → llm_manager → configure system embedding model
# 3. docker compose exec app python manage.py setup_neo4j_indexes
#
# Until step 3, vector search returns empty results. library/apps.py logs
# a readiness warning when indexes are missing, so this is visible.
# The standalone `migrate` / `setup` entrypoint commands remain available
# for ad-hoc ops work (`setup` runs setup_neo4j_indexes + load_library_types
# and is the typical re-run target after embedding-model changes).
# =============================================================================
# -----------------------------------------------------------------------------
# Shared logging config — JSON to stdout, picked up by Alloy via the Docker
# socket on the host and shipped to Loki. Pinning json-file (Docker's default)
# so Alloy's discovery.docker + loki.source.docker on puck sees a consistent
# driver across every service, and bounding log retention per container so a
# misbehaving service can't fill the disk between Alloy tails.
# -----------------------------------------------------------------------------
x-logging: &default-logging
driver: json-file
options:
tag: "{{.Name}}"
max-size: "10m"
max-file: "5"
# -----------------------------------------------------------------------------
# Shared build config — build the Mnemosyne image locally from ./Dockerfile
# instead of pulling from git.helu.ca. All four Mnemosyne services
# (init/app/mcp/worker) share `image: mnemosyne:local`, so Compose builds
# once and reuses the resulting image across them.
# -----------------------------------------------------------------------------
x-mnemosyne-build: &mnemosyne-build
context: .
dockerfile: Dockerfile
services:
# ── Init sidecar: one-shot Postgres migrate + collectstatic + library-type seed. Runs on
# every `up` and exits. Long-running services below depend on
# `service_completed_successfully`, so a failure here (unreachable DB,
# broken migration) blocks `app`/`mcp`/`worker` from starting. All
# commands are idempotent.
#
# collectstatic copies static files baked into the image (/app/staticfiles)
# into STATIC_ROOT (/mnt/static) so nginx can serve them. --clear removes
# stale files from the previous deploy on each run.
#
# Neo4j vector-index setup is NOT run here — see the header comment for
# the operator bootstrap flow. Only library_type seeding touches Neo4j
# from this sidecar, and it does not depend on any embedding model.
#
# This sidecar only needs Postgres, Neo4j, static files, and logging env —
# no S3, no Celery, no LLM encryption key. Keep it that way.
init:
image: mnemosyne:local
build: *mnemosyne-build
command: ["init"]
environment:
# Django core (settings import)
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
- SECRET_KEY=${SECRET_KEY}
- DEBUG=${DEBUG}
- TIME_ZONE=${TIME_ZONE}
- LANGUAGE_CODE=${LANGUAGE_CODE}
# Postgres (migrate)
- APP_DB_NAME=${APP_DB_NAME}
- APP_DB_USER=${APP_DB_USER}
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
# Neo4j (load_library_types writes Library defaults into the graph)
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
# Static files (collectstatic destination)
- STATIC_ROOT=/mnt/static
- USE_LOCAL_STORAGE=True
# Logging
- MNEMOSYNE_COMPONENT=init
- LOGGING_LEVEL=${LOGGING_LEVEL}
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
restart: "no"
volumes:
- static:/mnt/static
logging: *default-logging
# ── App: Django REST API + admin ──────────────────────────────────────────
# Serves /library/api/*, /admin/, /live/, /ready/, /metrics. Enqueues
# Celery tasks (hence CELERY_BROKER_URL is required here too — Django is
# the producer, the worker is the consumer).
app:
image: mnemosyne:local
build: *mnemosyne-build
command: ["web"]
environment:
# Django core
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
- SECRET_KEY=${SECRET_KEY}
- DEBUG=${DEBUG}
- ALLOWED_HOSTS=${ALLOWED_HOSTS}
- CSRF_TRUSTED_ORIGINS=${CSRF_TRUSTED_ORIGINS}
- TIME_ZONE=${TIME_ZONE}
- LANGUAGE_CODE=${LANGUAGE_CODE}
- STATIC_ROOT=/mnt/static
- MEDIA_ROOT=/mnt/media
# Postgres (Django ORM)
- APP_DB_NAME=${APP_DB_NAME}
- APP_DB_USER=${APP_DB_USER}
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
# Neo4j (knowledge graph + vectors)
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
# Memcached (readiness probe, theme/notification cache)
- KVDB_LOCATION=${KVDB_LOCATION}
- KVDB_PREFIX=${KVDB_PREFIX}
# S3 (Django storage backend — chunk text, item files)
- USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME}
- AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL}
- AWS_S3_USE_SSL=${AWS_S3_USE_SSL}
- AWS_S3_VERIFY=${AWS_S3_VERIFY}
- AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME}
# Celery (Django enqueues tasks; does NOT consume)
- CELERY_BROKER_URL=${CELERY_BROKER_URL}
- CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND}
- CELERY_TASK_ALWAYS_EAGER=${CELERY_TASK_ALWAYS_EAGER}
# LLM API secrets (admin + DRF pages decrypt stored provider API keys)
- LLM_API_SECRETS_ENCRYPTION_KEY=${LLM_API_SECRETS_ENCRYPTION_KEY}
# Email
- EMAIL_HOST=${EMAIL_HOST}
- EMAIL_PORT=${EMAIL_PORT}
- EMAIL_USE_TLS=${EMAIL_USE_TLS}
# Search & re-ranker (serves /library/api/search)
- SEARCH_VECTOR_TOP_K=${SEARCH_VECTOR_TOP_K}
- SEARCH_FULLTEXT_TOP_K=${SEARCH_FULLTEXT_TOP_K}
- SEARCH_GRAPH_MAX_DEPTH=${SEARCH_GRAPH_MAX_DEPTH}
- SEARCH_RRF_K=${SEARCH_RRF_K}
- SEARCH_DEFAULT_LIMIT=${SEARCH_DEFAULT_LIMIT}
- RERANKER_MAX_CANDIDATES=${RERANKER_MAX_CANDIDATES}
- RERANKER_TIMEOUT=${RERANKER_TIMEOUT}
# SSO / Casdoor (app only — only this service renders the login page
# and initiates the OIDC flow; worker and mcp never touch OAuth)
- CASDOOR_ENABLED=${CASDOOR_ENABLED}
- CASDOOR_ORIGIN=${CASDOOR_ORIGIN}
- CASDOOR_ORIGIN_FRONTEND=${CASDOOR_ORIGIN_FRONTEND}
- CASDOOR_CLIENT_ID=${CASDOOR_CLIENT_ID}
- CASDOOR_CLIENT_SECRET=${CASDOOR_CLIENT_SECRET}
- CASDOOR_ORG_NAME=${CASDOOR_ORG_NAME}
- CASDOOR_SSL_VERIFY=${CASDOOR_SSL_VERIFY}
- ALLOW_LOCAL_LOGIN=${ALLOW_LOCAL_LOGIN}
# Logging
- MNEMOSYNE_COMPONENT=app
- LOGGING_LEVEL=${LOGGING_LEVEL}
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
restart: unless-stopped
logging: *default-logging
depends_on:
init:
condition: service_completed_successfully
volumes:
- static:/mnt/static
- media:/mnt/media
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/ready/"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
# ── MCP server: FastMCP Streamable HTTP at /mcp/ ───────────────────────────
# Read-only LLM-facing surface. Intentionally excluded:
# CELERY_BROKER_URL — MCP must not enqueue tasks
# LLM_API_SECRETS_ENCRYPTION_KEY — MCP must not decrypt stored provider keys
# DAEDALUS_S3_* — MCP does not ingest
# CSRF_TRUSTED_ORIGINS — MCP does not accept browser forms
# EMAIL_* — MCP does not send mail
# EMBEDDING_* (batch/timeout) — MCP does not embed
# S3 vars ARE passed so STORAGES initialises identically to the app container
# (simpler to reason about than having mcp use FileSystemStorage while the
# rest of the stack uses S3). MCP is read-only at the application layer so
# the S3 key here only matters if someone exploits a write path in the
# future — keep the credential scoped to read-only in your secret manager.
mcp:
image: mnemosyne:local
build: *mnemosyne-build
command: ["mcp"]
environment:
# Django core (ASGI still imports settings)
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
- SECRET_KEY=${SECRET_KEY}
- DEBUG=${DEBUG}
- ALLOWED_HOSTS=${ALLOWED_HOSTS}
- TIME_ZONE=${TIME_ZONE}
- LANGUAGE_CODE=${LANGUAGE_CODE}
- STATIC_ROOT=/mnt/static
- MEDIA_ROOT=/mnt/media
# Postgres (McpToken lookup lives in Django ORM)
- APP_DB_NAME=${APP_DB_NAME}
- APP_DB_USER=${APP_DB_USER}
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
# Neo4j (search + get_chunk)
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
# Memcached
- KVDB_LOCATION=${KVDB_LOCATION}
- KVDB_PREFIX=${KVDB_PREFIX}
# S3 (same block as app — STORAGES must initialise identically)
- USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME}
- AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL}
- AWS_S3_USE_SSL=${AWS_S3_USE_SSL}
- AWS_S3_VERIFY=${AWS_S3_VERIFY}
- AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME}
# MCP-specific
- MCP_REQUIRE_AUTH=${MCP_REQUIRE_AUTH}
# Search & re-ranker (the `search` MCP tool uses these)
- SEARCH_VECTOR_TOP_K=${SEARCH_VECTOR_TOP_K}
- SEARCH_FULLTEXT_TOP_K=${SEARCH_FULLTEXT_TOP_K}
- SEARCH_GRAPH_MAX_DEPTH=${SEARCH_GRAPH_MAX_DEPTH}
- SEARCH_RRF_K=${SEARCH_RRF_K}
- SEARCH_DEFAULT_LIMIT=${SEARCH_DEFAULT_LIMIT}
- RERANKER_MAX_CANDIDATES=${RERANKER_MAX_CANDIDATES}
- RERANKER_TIMEOUT=${RERANKER_TIMEOUT}
# Logging
- MNEMOSYNE_COMPONENT=mcp
- LOGGING_LEVEL=${LOGGING_LEVEL}
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
restart: unless-stopped
logging: *default-logging
depends_on:
init:
condition: service_completed_successfully
volumes:
- media:/mnt/media
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8001/mcp/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
# ── Celery worker: embedding + ingest + batch queues ───────────────────────
# Consumer side of the queue. Needs the full S3 block (reads Daedalus's
# bucket, writes to Mnemosyne's), the LLM API encryption key (ingest calls
# vision models via stored provider keys), and both broker URL + result
# backend. Does NOT need HTTP-layer settings (ALLOWED_HOSTS, CSRF, MCP auth)
# or search tuning (the worker never serves queries).
worker:
image: mnemosyne:local
build: *mnemosyne-build
command: ["worker"]
environment:
# Django core (Celery imports settings)
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
- SECRET_KEY=${SECRET_KEY}
- DEBUG=${DEBUG}
- TIME_ZONE=${TIME_ZONE}
- LANGUAGE_CODE=${LANGUAGE_CODE}
- STATIC_ROOT=/mnt/static
- MEDIA_ROOT=/mnt/media
# Postgres
- APP_DB_NAME=${APP_DB_NAME}
- APP_DB_USER=${APP_DB_USER}
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
# Neo4j (graph writes during embed/ingest)
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
# Memcached (task progress cache)
- KVDB_LOCATION=${KVDB_LOCATION}
- KVDB_PREFIX=${KVDB_PREFIX}
# S3 — Mnemosyne's own bucket (chunk text writes, item file storage)
- USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME}
- AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL}
- AWS_S3_USE_SSL=${AWS_S3_USE_SSL}
- AWS_S3_VERIFY=${AWS_S3_VERIFY}
- AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME}
# Daedalus S3 — cross-bucket reads for ingest (worker-only)
- DAEDALUS_S3_ENDPOINT_URL=${DAEDALUS_S3_ENDPOINT_URL}
- DAEDALUS_S3_ACCESS_KEY_ID=${DAEDALUS_S3_ACCESS_KEY_ID}
- DAEDALUS_S3_SECRET_ACCESS_KEY=${DAEDALUS_S3_SECRET_ACCESS_KEY}
- DAEDALUS_S3_BUCKET_NAME=${DAEDALUS_S3_BUCKET_NAME}
- DAEDALUS_S3_REGION_NAME=${DAEDALUS_S3_REGION_NAME}
- DAEDALUS_S3_USE_SSL=${DAEDALUS_S3_USE_SSL}
- DAEDALUS_S3_VERIFY=${DAEDALUS_S3_VERIFY}
- SPELUNKER_S3_ENDPOINT_URL=${SPELUNKER_S3_ENDPOINT_URL}
- SPELUNKER_S3_ACCESS_KEY_ID=${SPELUNKER_S3_ACCESS_KEY_ID}
- SPELUNKER_S3_SECRET_ACCESS_KEY=${SPELUNKER_S3_SECRET_ACCESS_KEY}
- SPELUNKER_S3_BUCKET_NAME=${SPELUNKER_S3_BUCKET_NAME}
- SPELUNKER_S3_REGION_NAME=${SPELUNKER_S3_REGION_NAME}
- SPELUNKER_S3_USE_SSL=${SPELUNKER_S3_USE_SSL}
- SPELUNKER_S3_VERIFY=${SPELUNKER_S3_VERIFY}
# Celery / RabbitMQ
- CELERY_BROKER_URL=${CELERY_BROKER_URL}
- CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND}
- CELERY_TASK_ALWAYS_EAGER=${CELERY_TASK_ALWAYS_EAGER}
# Worker tuning (entrypoint.sh reads these)
- CELERY_QUEUES=${CELERY_QUEUES}
- CELERY_CONCURRENCY=${CELERY_CONCURRENCY}
- CELERY_LOG_LEVEL=${CELERY_LOGGING_LEVEL}
# LLM API secrets (ingest vision pass decrypts stored provider keys)
- LLM_API_SECRETS_ENCRYPTION_KEY=${LLM_API_SECRETS_ENCRYPTION_KEY}
# Embedding pipeline
- EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE}
- EMBEDDING_TIMEOUT=${EMBEDDING_TIMEOUT}
# Logging
- MNEMOSYNE_COMPONENT=worker
- LOGGING_LEVEL=${LOGGING_LEVEL}
- CELERY_LOGGING_LEVEL=${CELERY_LOGGING_LEVEL}
restart: unless-stopped
logging: *default-logging
depends_on:
app:
condition: service_healthy
volumes:
- media:/mnt/media
healthcheck:
test: ["CMD", "celery", "-A", "mnemosyne", "inspect", "ping", "-d", "celery@$$HOSTNAME"]
interval: 60s
timeout: 10s
retries: 3
start_period: 60s
# ── Web: nginx reverse proxy, public port 23181 ────────────────────────────
# No Django env — nginx only knows how to route. Public listener is
# templated into the conf file by Ansible if the port ever needs to change.
web:
image: nginx:alpine
restart: unless-stopped
logging: *default-logging
depends_on:
app:
condition: service_healthy
mcp:
condition: service_healthy
ports:
- "23181:80"
volumes:
- ./nginx/mnemosyne.conf:/etc/nginx/conf.d/default.conf:ro
- static:/var/www/static:ro
- media:/var/www/media:ro
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost/live/"]
interval: 30s
timeout: 5s
retries: 3
# ── Web metrics: nginx-prometheus-exporter ─────────────────────────────────
# Scrapes the `web` container's stub_status endpoint and re-exposes it in
# Prometheus format on 9113. Prospero (Sao) scrapes this; see
# virgo/ansible/pplg/prometheus.yml.j2 → job_name: 'mnemosyne'.
# The Django /metrics endpoint (django-prometheus + custom pipeline metrics
# in mcp_server/metrics.py and library/metrics.py) is reached separately
# via nginx at /metrics — no sidecar needed for that.
web-metrics:
image: nginx/nginx-prometheus-exporter:latest
command:
- --nginx.scrape-uri
- http://web:80/nginx_status
depends_on:
web:
condition: service_started
ports:
- "23191:9113"
restart: unless-stopped
logging: *default-logging
volumes:
# Static files written by collectstatic (run by the init sidecar on every
# `up`). Docker-managed volume — no host path needed; storage is minimal
# and auto-regenerated on every `up`.
static:
# Media files. Production uses USE_LOCAL_STORAGE=False (S3) so this volume
# is effectively unused — kept so the mount points in services don't break.
media: