diff --git a/docker-compose.yaml b/docker-compose.yaml index dd1554c..22b1a5d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,32 +1,40 @@ # ============================================================================= # Mnemosyne — production deployment # ============================================================================= -# Four services, all from the same image: +# Five services: +# init — one-shot sidecar: migrate + collectstatic + load_library_types # app — Django REST API + admin (gunicorn, port 8000) -# mcp — FastMCP server (uvicorn, port 22091) +# mcp — FastMCP server (uvicorn, port 8001) # worker — Celery worker (embedding/ingest/batch queues) -# web — reverse proxy, public port 23090 (nginx) +# web — reverse proxy, public port 23081 (nginx) # -# External services (NOT spun up here): Postgres on Portia, Neo4j on Umbriel, -# RabbitMQ on Oberon, S3/MinIO on Nyx, Memcached on its own host, embedder -# and reranker on Nyx, smtp4dev on Oberon. All reached over the internal -# 10.10.0.0/24 network. +# External services (NOT spun up here): Postgres on Despina, Neo4j on Naiad, +# RabbitMQ on Thalassa, S3/MinIO on Perseus, Memcached on host. All reached +# over the internal network. # # Environment scoping # ------------------- # Every service lists ONLY the environment variables it actually needs, with -# values interpolated from the shell (typically `.env` at the project root, -# which an Ansible role generates from a j2 template + vault secrets). No -# `env_file:` sharing — a compromised MCP container should not see the Celery -# broker creds or the LLM API encryption key, and the Celery worker has no -# business knowing `ALLOWED_HOSTS`. If you add a new Django setting, decide -# which services need it and add it only to those `environment:` blocks. +# values interpolated from the shell (the .env at the project root is +# generated by Ansible from a j2 template + vault secrets). No `env_file:` +# sharing — a compromised MCP container should not see the Celery broker +# creds or the LLM API encryption key, and the Celery worker has no business +# knowing `ALLOWED_HOSTS`. If you add a new Django setting, decide which +# services need it and add it only to those `environment:` blocks. +# +# Static files +# ------------ +# collectstatic is run by the `init` sidecar on every `up`. Static files are +# baked into the image at build time (/app/staticfiles by collectstatic in +# the Dockerfile builder stage), then copied to STATIC_ROOT (/mnt/static) by +# the init sidecar. nginx serves them directly from that bind-mounted path. +# --clear removes stale files from the previous deploy on each run. # # Run: # docker compose up -d # -# The `init` sidecar (below) runs Postgres migrations and library-type -# seeding on every `up`. Long-running services wait for it via +# The `init` sidecar runs migrate + collectstatic + load_library_types on +# every `up`. Long-running services wait for it via # `depends_on: init: service_completed_successfully` — so a failure there # (unreachable DB, broken migration) blocks the stack. # @@ -36,7 +44,7 @@ # in /admin/, pick an embedding API + model, and set its vector_dimensions # value. Bootstrap order is therefore: # -# 1. docker compose up # init sidecar: migrate + load_library_types +# 1. docker compose up # init sidecar: migrate + collectstatic + load_library_types # 2. browse to /admin/ → llm_manager → configure system embedding model # 3. docker compose exec app python manage.py setup_neo4j_indexes # @@ -63,31 +71,22 @@ x-logging: &default-logging services: - # ── Static-file seeder: copies /app/staticfiles into the shared volume on - # every `up`. Runs once and exits. Without this, the named volume is only - # seeded the first time it's empty, so static updates between deploys - # would not propagate to nginx. - static-init: - image: git.helu.ca/r/mnemosyne:latest - command: ["sh", "-c", "cp -a /app/staticfiles/. /shared-static/"] - user: "0:0" - volumes: - - mnemosyne-static:/shared-static - restart: "no" - logging: *default-logging - - # ── Init sidecar: one-shot Postgres migrate + library-type seed. Runs on + # ── Init sidecar: one-shot Postgres migrate + collectstatic + library-type seed. Runs on # every `up` and exits. Long-running services below depend on # `service_completed_successfully`, so a failure here (unreachable DB, - # broken migration) blocks `app`/`mcp`/`worker` from starting. Both + # broken migration) blocks `app`/`mcp`/`worker` from starting. All # commands are idempotent. # + # collectstatic copies static files baked into the image (/app/staticfiles) + # into STATIC_ROOT (/mnt/static) so nginx can serve them. --clear removes + # stale files from the previous deploy on each run. + # # Neo4j vector-index setup is NOT run here — see the header comment for # the operator bootstrap flow. Only library_type seeding touches Neo4j # from this sidecar, and it does not depend on any embedding model. # - # This sidecar only needs Postgres, Neo4j, and logging env — no S3, no - # Celery, no LLM encryption key. Keep it that way. + # This sidecar only needs Postgres, Neo4j, static files, and logging env — + # no S3, no Celery, no LLM encryption key. Keep it that way. init: image: git.helu.ca/r/mnemosyne:latest pull_policy: always @@ -107,15 +106,16 @@ services: - DB_PORT=${DB_PORT} # Neo4j (load_library_types writes Library defaults into the graph) - NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL} - # Logging (MNEMOSYNE_COMPONENT is injected by settings.py into every - # log line as a static JSON field; Alloy on puck reads the compose - # service name directly off the Docker label and uses that as the - # Loki `component` label, but we still set it here so operators - # tail-ing ``docker logs`` see the same attribution) + # Static files (collectstatic destination) + - STATIC_ROOT=/mnt/static + - USE_LOCAL_STORAGE=True + # Logging - MNEMOSYNE_COMPONENT=init - LOGGING_LEVEL=${LOGGING_LEVEL} - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL} restart: "no" + volumes: + - static:/mnt/static logging: *default-logging @@ -136,6 +136,8 @@ services: - CSRF_TRUSTED_ORIGINS=${CSRF_TRUSTED_ORIGINS} - TIME_ZONE=${TIME_ZONE} - LANGUAGE_CODE=${LANGUAGE_CODE} + - STATIC_ROOT=/mnt/static + - MEDIA_ROOT=/mnt/media # Postgres (Django ORM) - APP_DB_NAME=${APP_DB_NAME} - APP_DB_USER=${APP_DB_USER} @@ -191,12 +193,11 @@ services: restart: unless-stopped logging: *default-logging depends_on: - static-init: - condition: service_completed_successfully init: condition: service_completed_successfully volumes: - - mnemosyne-media:/app/media + - static:/mnt/static + - media:/mnt/media healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/ready/"] interval: 30s @@ -230,6 +231,8 @@ services: - ALLOWED_HOSTS=${ALLOWED_HOSTS} - TIME_ZONE=${TIME_ZONE} - LANGUAGE_CODE=${LANGUAGE_CODE} + - STATIC_ROOT=/mnt/static + - MEDIA_ROOT=/mnt/media # Postgres (McpToken lookup lives in Django ORM) - APP_DB_NAME=${APP_DB_NAME} - APP_DB_USER=${APP_DB_USER} @@ -270,7 +273,7 @@ services: init: condition: service_completed_successfully volumes: - - mnemosyne-media:/app/media + - media:/mnt/media healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8001/mcp/health"] interval: 30s @@ -296,6 +299,8 @@ services: - DEBUG=${DEBUG} - TIME_ZONE=${TIME_ZONE} - LANGUAGE_CODE=${LANGUAGE_CODE} + - STATIC_ROOT=/mnt/static + - MEDIA_ROOT=/mnt/media # Postgres - APP_DB_NAME=${APP_DB_NAME} - APP_DB_USER=${APP_DB_USER} @@ -347,7 +352,7 @@ services: app: condition: service_healthy volumes: - - mnemosyne-media:/app/media + - media:/mnt/media healthcheck: test: ["CMD", "celery", "-A", "mnemosyne", "inspect", "ping", "-d", "celery@$$HOSTNAME"] interval: 60s @@ -371,8 +376,8 @@ services: - "23181:80" volumes: - ./nginx/mnemosyne.conf:/etc/nginx/conf.d/default.conf:ro - - mnemosyne-static:/var/www/static:ro - - mnemosyne-media:/var/www/media:ro + - static:/var/www/static:ro + - media:/var/www/media:ro healthcheck: test: ["CMD", "curl", "-f", "http://localhost/live/"] interval: 30s @@ -380,11 +385,20 @@ services: retries: 3 volumes: - # Static files baked into the image at /app/staticfiles. The static-init - # service seeds this volume on every `up`, so nginx always serves the - # current image's static bundle. - mnemosyne-static: - # Local FileSystemStorage fallback. Production uses USE_LOCAL_STORAGE=False - # so this is mostly empty — kept for parity with dev and for any path - # that writes to MEDIA_ROOT directly. - mnemosyne-media: + # Static files written by collectstatic (run by the init sidecar on every + # `up`). Bind-mounted from the host so nginx can serve them directly. + # The host path is created by Ansible before `docker compose up`. + static: + driver: local + driver_opts: + type: none + device: ${STATIC_ROOT} + o: bind + # Media files. Production uses USE_LOCAL_STORAGE=False so this is mostly + # empty — kept for any path that writes to MEDIA_ROOT directly. + media: + driver: local + driver_opts: + type: none + device: ${MEDIA_ROOT} + o: bind diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index e3ea590..bdbd589 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -63,6 +63,11 @@ case "$1" in # or library_type defaults need to change. A non-zero exit here blocks # `app`, `mcp`, and `worker` from starting. # + # collectstatic copies the static files baked into the image at build + # time (/app/staticfiles) into STATIC_ROOT (/mnt/static), which nginx + # serves directly. --clear removes any stale files from the previous + # deploy before copying, so deleted assets don't linger. + # # Neo4j vector-index creation is *deliberately not* bundled here. That # command (``setup_neo4j_indexes``) requires a system embedding model # with a configured ``vector_dimensions`` value, and that model is @@ -71,7 +76,7 @@ case "$1" in # whole stack on it would make the admin unreachable — a chicken-and- # egg. Operator bootstrap flow: # - # 1. docker compose up # init sidecar: migrate + load_library_types + # 1. docker compose up # init sidecar: migrate + collectstatic + load_library_types # 2. browse to admin, configure system embedding model # 3. docker compose exec app python manage.py setup_neo4j_indexes # @@ -80,6 +85,7 @@ case "$1" in # missing so this is visible, not silent. set -e python manage.py migrate --noinput + python manage.py collectstatic --noinput --clear python manage.py load_library_types ;;