# ============================================================================= # Mnemosyne — production deployment # ============================================================================= # Four services, all from the same image: # app — Django REST API + admin (gunicorn, port 8000) # mcp — FastMCP server (uvicorn, port 22091) # worker — Celery worker (embedding/ingest/batch queues) # web — reverse proxy, public port 23090 (nginx) # # External services (NOT spun up here): Postgres on Portia, Neo4j on Umbriel, # RabbitMQ on Oberon, S3/MinIO on Nyx, Memcached on its own host, embedder # and reranker on Nyx, smtp4dev on Oberon. All reached over the internal # 10.10.0.0/24 network. # # Environment scoping # ------------------- # Every service lists ONLY the environment variables it actually needs, with # values interpolated from the shell (typically `.env` at the project root, # which an Ansible role generates from a j2 template + vault secrets). No # `env_file:` sharing — a compromised MCP container should not see the Celery # broker creds or the LLM API encryption key, and the Celery worker has no # business knowing `ALLOWED_HOSTS`. If you add a new Django setting, decide # which services need it and add it only to those `environment:` blocks. # # Run: # docker compose up -d # # The `init` sidecar (below) runs Postgres migrations and library-type # seeding on every `up`. Long-running services wait for it via # `depends_on: init: service_completed_successfully` — so a failure there # (unreachable DB, broken migration) blocks the stack. # # Neo4j vector-index creation is deliberately NOT bundled into `init`. # `setup_neo4j_indexes` requires a system embedding model configured in # the admin, which only exists after first boot — an operator has to land # in /admin/, pick an embedding API + model, and set its vector_dimensions # value. Bootstrap order is therefore: # # 1. docker compose up # init sidecar: migrate + load_library_types # 2. browse to /admin/ → llm_manager → configure system embedding model # 3. docker compose exec app python manage.py setup_neo4j_indexes # # Until step 3, vector search returns empty results. library/apps.py logs # a readiness warning when indexes are missing, so this is visible. # The standalone `migrate` / `setup` entrypoint commands remain available # for ad-hoc ops work (`setup` runs setup_neo4j_indexes + load_library_types # and is the typical re-run target after embedding-model changes). # ============================================================================= # ----------------------------------------------------------------------------- # Shared logging config — JSON to stdout, picked up by Alloy via the Docker # socket on the host and shipped to Loki. Pinning json-file (Docker's default) # so Alloy's discovery.docker + loki.source.docker on puck sees a consistent # driver across every service, and bounding log retention per container so a # misbehaving service can't fill the disk between Alloy tails. # ----------------------------------------------------------------------------- x-logging: &default-logging driver: json-file options: tag: "{{.Name}}" max-size: "10m" max-file: "5" services: # ── Static-file seeder: copies /app/staticfiles into the shared volume on # every `up`. Runs once and exits. Without this, the named volume is only # seeded the first time it's empty, so static updates between deploys # would not propagate to nginx. static-init: image: git.helu.ca/r/mnemosyne:latest command: ["sh", "-c", "cp -a /app/staticfiles/. /shared-static/"] user: "0:0" volumes: - mnemosyne-static:/shared-static restart: "no" logging: *default-logging # ── Init sidecar: one-shot Postgres migrate + library-type seed. Runs on # every `up` and exits. Long-running services below depend on # `service_completed_successfully`, so a failure here (unreachable DB, # broken migration) blocks `app`/`mcp`/`worker` from starting. Both # commands are idempotent. # # Neo4j vector-index setup is NOT run here — see the header comment for # the operator bootstrap flow. Only library_type seeding touches Neo4j # from this sidecar, and it does not depend on any embedding model. # # This sidecar only needs Postgres, Neo4j, and logging env — no S3, no # Celery, no LLM encryption key. Keep it that way. init: image: git.helu.ca/r/mnemosyne:latest pull_policy: always command: ["init"] environment: # Django core (settings import) - DJANGO_SETTINGS_MODULE=mnemosyne.settings - SECRET_KEY=${SECRET_KEY} - DEBUG=${DEBUG} - TIME_ZONE=${TIME_ZONE} - LANGUAGE_CODE=${LANGUAGE_CODE} # Postgres (migrate) - APP_DB_NAME=${APP_DB_NAME} - APP_DB_USER=${APP_DB_USER} - APP_DB_PASSWORD=${APP_DB_PASSWORD} - DB_HOST=${DB_HOST} - DB_PORT=${DB_PORT} # Neo4j (load_library_types writes Library defaults into the graph) - NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL} # Logging (MNEMOSYNE_COMPONENT is injected by settings.py into every # log line as a static JSON field; Alloy on puck reads the compose # service name directly off the Docker label and uses that as the # Loki `component` label, but we still set it here so operators # tail-ing ``docker logs`` see the same attribution) - MNEMOSYNE_COMPONENT=init - LOGGING_LEVEL=${LOGGING_LEVEL} - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL} restart: "no" logging: *default-logging # ── App: Django REST API + admin ────────────────────────────────────────── # Serves /library/api/*, /admin/, /live/, /ready/, /metrics. Enqueues # Celery tasks (hence CELERY_BROKER_URL is required here too — Django is # the producer, the worker is the consumer). app: image: git.helu.ca/r/mnemosyne:latest pull_policy: always command: ["web"] environment: # Django core - DJANGO_SETTINGS_MODULE=mnemosyne.settings - SECRET_KEY=${SECRET_KEY} - DEBUG=${DEBUG} - ALLOWED_HOSTS=${ALLOWED_HOSTS} - CSRF_TRUSTED_ORIGINS=${CSRF_TRUSTED_ORIGINS} - TIME_ZONE=${TIME_ZONE} - LANGUAGE_CODE=${LANGUAGE_CODE} # Postgres (Django ORM) - APP_DB_NAME=${APP_DB_NAME} - APP_DB_USER=${APP_DB_USER} - APP_DB_PASSWORD=${APP_DB_PASSWORD} - DB_HOST=${DB_HOST} - DB_PORT=${DB_PORT} # Neo4j (knowledge graph + vectors) - NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL} # Memcached (readiness probe, theme/notification cache) - KVDB_LOCATION=${KVDB_LOCATION} - KVDB_PREFIX=${KVDB_PREFIX} # S3 (Django storage backend — chunk text, item files) - USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE} - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME} - AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL} - AWS_S3_USE_SSL=${AWS_S3_USE_SSL} - AWS_S3_VERIFY=${AWS_S3_VERIFY} - AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME} # Celery (Django enqueues tasks; does NOT consume) - CELERY_BROKER_URL=${CELERY_BROKER_URL} - CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND} - CELERY_TASK_ALWAYS_EAGER=${CELERY_TASK_ALWAYS_EAGER} # LLM API secrets (admin + DRF pages decrypt stored provider API keys) - LLM_API_SECRETS_ENCRYPTION_KEY=${LLM_API_SECRETS_ENCRYPTION_KEY} # Email - EMAIL_HOST=${EMAIL_HOST} - EMAIL_PORT=${EMAIL_PORT} - EMAIL_USE_TLS=${EMAIL_USE_TLS} # Search & re-ranker (serves /library/api/search) - SEARCH_VECTOR_TOP_K=${SEARCH_VECTOR_TOP_K} - SEARCH_FULLTEXT_TOP_K=${SEARCH_FULLTEXT_TOP_K} - SEARCH_GRAPH_MAX_DEPTH=${SEARCH_GRAPH_MAX_DEPTH} - SEARCH_RRF_K=${SEARCH_RRF_K} - SEARCH_DEFAULT_LIMIT=${SEARCH_DEFAULT_LIMIT} - RERANKER_MAX_CANDIDATES=${RERANKER_MAX_CANDIDATES} - RERANKER_TIMEOUT=${RERANKER_TIMEOUT} # SSO / Casdoor (app only — only this service renders the login page # and initiates the OIDC flow; worker and mcp never touch OAuth) - CASDOOR_ENABLED=${CASDOOR_ENABLED} - CASDOOR_ORIGIN=${CASDOOR_ORIGIN} - CASDOOR_ORIGIN_FRONTEND=${CASDOOR_ORIGIN_FRONTEND} - CASDOOR_CLIENT_ID=${CASDOOR_CLIENT_ID} - CASDOOR_CLIENT_SECRET=${CASDOOR_CLIENT_SECRET} - CASDOOR_ORG_NAME=${CASDOOR_ORG_NAME} - CASDOOR_SSL_VERIFY=${CASDOOR_SSL_VERIFY} - ALLOW_LOCAL_LOGIN=${ALLOW_LOCAL_LOGIN} # Logging - MNEMOSYNE_COMPONENT=app - LOGGING_LEVEL=${LOGGING_LEVEL} - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL} restart: unless-stopped logging: *default-logging depends_on: static-init: condition: service_completed_successfully init: condition: service_completed_successfully volumes: - mnemosyne-media:/app/media healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/ready/"] interval: 30s timeout: 10s retries: 3 start_period: 30s # ── MCP server: FastMCP Streamable HTTP at /mcp/ ─────────────────────────── # Read-only LLM-facing surface. Intentionally excluded: # CELERY_BROKER_URL — MCP must not enqueue tasks # LLM_API_SECRETS_ENCRYPTION_KEY — MCP must not decrypt stored provider keys # DAEDALUS_S3_* — MCP does not ingest # CSRF_TRUSTED_ORIGINS — MCP does not accept browser forms # EMAIL_* — MCP does not send mail # EMBEDDING_* (batch/timeout) — MCP does not embed # S3 vars ARE passed so STORAGES initialises identically to the app container # (simpler to reason about than having mcp use FileSystemStorage while the # rest of the stack uses S3). MCP is read-only at the application layer so # the S3 key here only matters if someone exploits a write path in the # future — keep the credential scoped to read-only in your secret manager. mcp: image: git.helu.ca/r/mnemosyne:latest pull_policy: always command: ["mcp"] environment: # Django core (ASGI still imports settings) - DJANGO_SETTINGS_MODULE=mnemosyne.settings - SECRET_KEY=${SECRET_KEY} - DEBUG=${DEBUG} - ALLOWED_HOSTS=${ALLOWED_HOSTS} - TIME_ZONE=${TIME_ZONE} - LANGUAGE_CODE=${LANGUAGE_CODE} # Postgres (McpToken lookup lives in Django ORM) - APP_DB_NAME=${APP_DB_NAME} - APP_DB_USER=${APP_DB_USER} - APP_DB_PASSWORD=${APP_DB_PASSWORD} - DB_HOST=${DB_HOST} - DB_PORT=${DB_PORT} # Neo4j (search + get_chunk) - NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL} # Memcached - KVDB_LOCATION=${KVDB_LOCATION} - KVDB_PREFIX=${KVDB_PREFIX} # S3 (same block as app — STORAGES must initialise identically) - USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE} - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME} - AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL} - AWS_S3_USE_SSL=${AWS_S3_USE_SSL} - AWS_S3_VERIFY=${AWS_S3_VERIFY} - AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME} # MCP-specific - MCP_REQUIRE_AUTH=${MCP_REQUIRE_AUTH} # Search & re-ranker (the `search` MCP tool uses these) - SEARCH_VECTOR_TOP_K=${SEARCH_VECTOR_TOP_K} - SEARCH_FULLTEXT_TOP_K=${SEARCH_FULLTEXT_TOP_K} - SEARCH_GRAPH_MAX_DEPTH=${SEARCH_GRAPH_MAX_DEPTH} - SEARCH_RRF_K=${SEARCH_RRF_K} - SEARCH_DEFAULT_LIMIT=${SEARCH_DEFAULT_LIMIT} - RERANKER_MAX_CANDIDATES=${RERANKER_MAX_CANDIDATES} - RERANKER_TIMEOUT=${RERANKER_TIMEOUT} # Logging - MNEMOSYNE_COMPONENT=mcp - LOGGING_LEVEL=${LOGGING_LEVEL} - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL} restart: unless-stopped logging: *default-logging depends_on: init: condition: service_completed_successfully volumes: - mnemosyne-media:/app/media healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8001/mcp/health"] interval: 30s timeout: 5s retries: 3 start_period: 30s # ── Celery worker: embedding + ingest + batch queues ─────────────────────── # Consumer side of the queue. Needs the full S3 block (reads Daedalus's # bucket, writes to Mnemosyne's), the LLM API encryption key (ingest calls # vision models via stored provider keys), and both broker URL + result # backend. Does NOT need HTTP-layer settings (ALLOWED_HOSTS, CSRF, MCP auth) # or search tuning (the worker never serves queries). worker: image: git.helu.ca/r/mnemosyne:latest pull_policy: always command: ["worker"] environment: # Django core (Celery imports settings) - DJANGO_SETTINGS_MODULE=mnemosyne.settings - SECRET_KEY=${SECRET_KEY} - DEBUG=${DEBUG} - TIME_ZONE=${TIME_ZONE} - LANGUAGE_CODE=${LANGUAGE_CODE} # Postgres - APP_DB_NAME=${APP_DB_NAME} - APP_DB_USER=${APP_DB_USER} - APP_DB_PASSWORD=${APP_DB_PASSWORD} - DB_HOST=${DB_HOST} - DB_PORT=${DB_PORT} # Neo4j (graph writes during embed/ingest) - NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL} # Memcached (task progress cache) - KVDB_LOCATION=${KVDB_LOCATION} - KVDB_PREFIX=${KVDB_PREFIX} # S3 — Mnemosyne's own bucket (chunk text writes, item file storage) - USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE} - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME} - AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL} - AWS_S3_USE_SSL=${AWS_S3_USE_SSL} - AWS_S3_VERIFY=${AWS_S3_VERIFY} - AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME} # Daedalus S3 — cross-bucket reads for ingest (worker-only) - DAEDALUS_S3_ENDPOINT_URL=${DAEDALUS_S3_ENDPOINT_URL} - DAEDALUS_S3_ACCESS_KEY_ID=${DAEDALUS_S3_ACCESS_KEY_ID} - DAEDALUS_S3_SECRET_ACCESS_KEY=${DAEDALUS_S3_SECRET_ACCESS_KEY} - DAEDALUS_S3_BUCKET_NAME=${DAEDALUS_S3_BUCKET_NAME} - DAEDALUS_S3_REGION_NAME=${DAEDALUS_S3_REGION_NAME} - DAEDALUS_S3_USE_SSL=${DAEDALUS_S3_USE_SSL} - DAEDALUS_S3_VERIFY=${DAEDALUS_S3_VERIFY} # Celery / RabbitMQ - CELERY_BROKER_URL=${CELERY_BROKER_URL} - CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND} - CELERY_TASK_ALWAYS_EAGER=${CELERY_TASK_ALWAYS_EAGER} # Worker tuning (entrypoint.sh reads these) - CELERY_QUEUES=${CELERY_QUEUES} - CELERY_CONCURRENCY=${CELERY_CONCURRENCY} - CELERY_LOG_LEVEL=${CELERY_LOGGING_LEVEL} # LLM API secrets (ingest vision pass decrypts stored provider keys) - LLM_API_SECRETS_ENCRYPTION_KEY=${LLM_API_SECRETS_ENCRYPTION_KEY} # Embedding pipeline - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE} - EMBEDDING_TIMEOUT=${EMBEDDING_TIMEOUT} # Logging - MNEMOSYNE_COMPONENT=worker - LOGGING_LEVEL=${LOGGING_LEVEL} - CELERY_LOGGING_LEVEL=${CELERY_LOGGING_LEVEL} restart: unless-stopped logging: *default-logging depends_on: app: condition: service_healthy volumes: - mnemosyne-media:/app/media healthcheck: test: ["CMD", "celery", "-A", "mnemosyne", "inspect", "ping", "-d", "celery@$$HOSTNAME"] interval: 60s timeout: 10s retries: 3 start_period: 60s # ── Web: nginx reverse proxy, public port 23181 ──────────────────────────── # No Django env — nginx only knows how to route. Public listener is # templated into the conf file by Ansible if the port ever needs to change. web: image: nginx:alpine restart: unless-stopped logging: *default-logging depends_on: app: condition: service_healthy mcp: condition: service_healthy ports: - "23181:80" volumes: - ./nginx/mnemosyne.conf:/etc/nginx/conf.d/default.conf:ro - mnemosyne-static:/var/www/static:ro - mnemosyne-media:/var/www/media:ro healthcheck: test: ["CMD", "curl", "-f", "http://localhost/live/"] interval: 30s timeout: 5s retries: 3 volumes: # Static files baked into the image at /app/staticfiles. The static-init # service seeds this volume on every `up`, so nginx always serves the # current image's static bundle. mnemosyne-static: # Local FileSystemStorage fallback. Production uses USE_LOCAL_STORAGE=False # so this is mostly empty — kept for parity with dev and for any path # that writes to MEDIA_ROOT directly. mnemosyne-media: