feat(deploy): production docker compose stack + Gitea CI image build

Adds a complete deployment surface for production:

  Dockerfile               multi-stage 3.12-slim build, collectstatic
                           baked into the image, runs as non-root mnemosyne
                           uid/gid 1000.
  docker/entrypoint.sh     dispatches `web | mcp | worker | beat | migrate
                           | setup | shell` from a single image, so every
                           service in compose runs the same artifact.
  docker-compose.yaml      five services: static-init (one-shot copies
                           statics into the shared volume on every up),
                           web (gunicorn), mcp (uvicorn), worker (celery),
                           nginx. External services (Postgres, Neo4j,
                           RabbitMQ, S3, Memcached, embedder, reranker)
                           reached over the 10.10.0.0/24 internal network
                           and configured via mnemosyne/.env.
  nginx/mnemosyne.conf     reverse proxy: /library/* and /admin/* → web,
                           /mcp/* → mcp, /static/* → volume, /metrics
                           internal-network-only (127/8 + RFC1918), /healthz
                           proxies to /mcp/health for liveness probes.
  .gitea/workflows/        CVE scan + image build, image pushed to
                           git.helu.ca/r/mnemosyne. Trivy scans pyproject
                           extras (dev/test/lint/docs) and the built image.
  pyproject.toml           adds [test], [lint], [docs] extras so the CI
                           pip-compile step has something to resolve.

README documents the bring-up flow (`docker compose run --rm web migrate`,
then `setup`, then `up -d`), day-to-day commands, and the env-var values
that need adjusting for production (DEBUG=False, KVDB_LOCATION pointing
at the external memcached, AWS keys filled in, etc.).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-29 12:05:23 -04:00
parent 1cd556c3f6
commit 236d9e2e74
7 changed files with 547 additions and 0 deletions

View File

@@ -0,0 +1,120 @@
name: CVE Scan & Docker Build
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
REGISTRY: git.helu.ca
IMAGE_NAME: ${{ gitea.repository }}
TRIVY_SEVERITY: MEDIUM,HIGH,CRITICAL
TRIVY_NO_PROGRESS: "true"
TRIVY_DISABLE_VEX_NOTICE: "true"
jobs:
security-scan:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install Trivy
run: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin
trivy --version
- name: Resolve full dependency set (incl. dev/test/lint/docs extras)
run: |
python3 -m venv /tmp/scanenv
/tmp/scanenv/bin/pip install --quiet pip-tools
/tmp/scanenv/bin/pip-compile pyproject.toml \
--extra dev --extra test --extra lint --extra docs \
-o requirements.txt --no-header --quiet --allow-unsafe
echo "Resolved $(grep -cv '^\s*\(#\|$\)' requirements.txt) pinned packages."
- name: Scan Python dependencies for CVEs
run: |
trivy fs \
--scanners vuln \
--severity ${TRIVY_SEVERITY} \
--format table \
--exit-code 0 \
requirements.txt
- name: Scan repository for secrets
run: |
trivy fs \
--scanners secret \
--format table \
--exit-code 0 \
.
build-and-push:
runs-on: ubuntu-latest
needs: security-scan
if: always()
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Gitea Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ gitea.actor }}
password: ${{ secrets.PACKAGE_TOKEN }}
- name: Extract metadata for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=sha,prefix=
type=raw,value=latest,enable=${{ gitea.ref == 'refs/heads/main' }}
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
file: Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Install Trivy
run: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin
trivy --version
- name: Scan built Docker image (OS + Python + system libs)
run: |
IMAGE_TAG=$(echo "${{ steps.meta.outputs.tags }}" | head -n1)
echo "🔍 Scanning image: ${IMAGE_TAG}"
trivy image \
--scanners vuln \
--severity ${TRIVY_SEVERITY} \
--format table \
--pkg-types os,library \
--exit-code 0 \
"${IMAGE_TAG}"
- name: Scan built Docker image for misconfigurations
continue-on-error: true
run: |
IMAGE_TAG=$(echo "${{ steps.meta.outputs.tags }}" | head -n1)
trivy image \
--scanners misconfig \
--severity ${TRIVY_SEVERITY} \
--format table \
--exit-code 0 \
"${IMAGE_TAG}"

93
Dockerfile Normal file
View File

@@ -0,0 +1,93 @@
# =============================================================================
# Mnemosyne — production image
# =============================================================================
# Multi-stage:
# builder installs Python deps and runs `collectstatic` once.
# runtime copies only the artifacts the running process needs.
#
# The same image runs three different processes (Django web, MCP server,
# Celery worker) — the compose file picks the command per service.
# =============================================================================
# ── Stage 1: builder ────────────────────────────────────────────────────────
FROM python:3.12-slim AS builder
# Build deps for psycopg, PyMuPDF, Pillow, cryptography, etc.
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libpq-dev \
libffi-dev \
libssl-dev \
libjpeg-dev \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
WORKDIR /build
# Install dependencies first (better layer caching).
COPY pyproject.toml README.md ./
COPY mnemosyne/ ./mnemosyne/
RUN pip install --upgrade pip \
&& pip install .
# Bake static files into the image. The env vars below are build-time-only
# stubs needed for settings.py to import without real infrastructure — they
# never reach the runtime image because this is the builder stage.
# Inlined into the RUN command (rather than ENV/ARG) so static analysis
# tools (Trivy) don't flag them as baked-in secrets.
ENV DJANGO_SETTINGS_MODULE=mnemosyne.settings \
DEBUG=False \
USE_LOCAL_STORAGE=True \
APP_DB_NAME=collectstatic \
APP_DB_USER=collectstatic
WORKDIR /build/mnemosyne
RUN SECRET_KEY=collectstatic-stub \
APP_DB_PASSWORD=collectstatic-stub \
python manage.py collectstatic --noinput --clear
# ── Stage 2: runtime ────────────────────────────────────────────────────────
FROM python:3.12-slim AS runtime
# Runtime libs for psycopg + PyMuPDF + Pillow + cryptography.
RUN apt-get update && apt-get install -y --no-install-recommends \
libpq5 \
libjpeg62-turbo \
zlib1g \
libssl3 \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
DJANGO_SETTINGS_MODULE=mnemosyne.settings \
PATH=/usr/local/bin:$PATH
# Copy installed packages from the builder.
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# Application code + collected statics.
WORKDIR /app
COPY --from=builder /build/mnemosyne /app
COPY docker/entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh
# Non-root user for everything that runs in this image. uid:gid 1000:1000
# matches the convention for a single-application container.
RUN groupadd --gid 1000 mnemosyne \
&& useradd --uid 1000 --gid mnemosyne --home /app --no-create-home --shell /sbin/nologin mnemosyne \
&& mkdir -p /app/media /app/logs \
&& chown -R mnemosyne:mnemosyne /app
USER mnemosyne
# The compose file overrides this per service. Default = Django web.
EXPOSE 8000 22091
ENTRYPOINT ["entrypoint.sh"]
CMD ["web"]

View File

@@ -153,6 +153,66 @@ These endpoints are used by the Daedalus FastAPI backend (HTTP Basic auth). All
See [docs/mnemosyne_integration.md](docs/mnemosyne_integration.md) for the full Daedalus contract.
## Production Deployment
Production runs as four containers from a single image (built and pushed by [`.gitea/workflows/cve-scan-docker-build.yml`](.gitea/workflows/cve-scan-docker-build.yml) on every push to `main`):
| Service | Role | Port |
|---------|------|------|
| `web` | Django REST API + admin (gunicorn) | internal :8000 |
| `mcp` | FastMCP server (uvicorn) | internal :22091 |
| `worker` | Celery worker — embedding/ingest/batch | — |
| `nginx` | Reverse proxy + static files | host :23090 |
Plus a one-shot `static-init` service that copies `/app/staticfiles` (baked into the image at build time via `collectstatic`) into the shared volume nginx reads from. It runs to completion on every `up`, so static-file changes propagate on each deploy without manual intervention.
External services (NOT spun up by compose): Postgres on Portia, Neo4j on Ariel, RabbitMQ on Oberon, S3/MinIO on Nyx, Memcached, embedder + reranker. All reached over the internal 10.10.0.0/24 network. URLs and credentials live in `mnemosyne/.env`.
### First-time bring-up
```bash
# Pull the image (or build locally with `docker compose build`)
docker compose pull
# DB migrations (one-shot)
docker compose run --rm web migrate
# Neo4j indexes + library_type defaults (one-shot)
docker compose run --rm web setup
# Bring the stack up
docker compose up -d
```
### Day-to-day
```bash
docker compose ps # service status + health
docker compose logs -f web # tail web logs
docker compose logs -f worker # tail Celery worker logs
docker compose restart mcp # restart just the MCP server
# After a new image is published:
docker compose pull && docker compose up -d
```
### Things to verify in `mnemosyne/.env` before bringing up
The development `.env` has a few values that need adjusting for production:
- `DEBUG=False`
- `USE_LOCAL_STORAGE=False` (already set; just confirm)
- `KVDB_LOCATION=<external-memcached-host>:11211``127.0.0.1` does not resolve from inside containers
- `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` filled in
- `DAEDALUS_S3_*` filled in for cross-bucket reads from the Daedalus bucket
- `ALLOWED_HOSTS` includes the public hostname HAProxy routes to (e.g. `mnemosyne.ouranos.helu.ca`)
- `LLM_API_SECRETS_ENCRYPTION_KEY` set to a real Fernet key
### Health probes
- `GET http://nginx-host:23090/healthz` → proxies to `/mcp/health`, returns `{"status":"ok"}` when the MCP server is up
- `GET http://nginx-host:23090/metrics` → Prometheus scrape endpoint, internal-network-only
## Architecture Note: Retrieval, Not Synthesis
Mnemosyne is a **retrieval engine**, not a RAG pipeline. It stores, embeds, and ranks — it does not synthesize answers.

111
docker-compose.yaml Normal file
View File

@@ -0,0 +1,111 @@
# =============================================================================
# Mnemosyne — production deployment
# =============================================================================
# Four services, all from the same image:
# web — Django REST API + admin (gunicorn, port 8000)
# mcp — FastMCP server (uvicorn, port 22091)
# worker — Celery worker (embedding/ingest/batch queues)
# nginx — reverse proxy, public port 23090
#
# External services (NOT spun up here): Postgres on Portia, Neo4j on Ariel,
# RabbitMQ on Oberon, S3/MinIO on Nyx, Memcached on its own host, embedder
# and reranker on Nyx, smtp4dev on Oberon. All reached over the internal
# 10.10.0.0/24 network.
#
# Run:
# docker compose up -d
# docker compose run --rm web migrate # one-shot DB migrate
# docker compose run --rm web setup # Neo4j indexes + library types
# =============================================================================
services:
# ── Static-file seeder: copies /app/staticfiles into the shared volume on
# every `up`. Runs once and exits. Without this, the named volume is only
# seeded the first time it's empty, so static updates between deploys
# would not propagate to nginx.
static-init:
image: git.helu.ca/r/mnemosyne:latest
command: ["sh", "-c", "cp -a /app/staticfiles/. /shared-static/"]
user: "0:0"
volumes:
- mnemosyne-static:/shared-static
restart: "no"
# ── Web app: Django REST API + admin ───────────────────────────────────────
web:
image: git.helu.ca/r/mnemosyne:latest
command: ["web"]
env_file: mnemosyne/.env
restart: unless-stopped
depends_on:
static-init:
condition: service_completed_successfully
volumes:
- mnemosyne-media:/app/media
expose:
- "8000"
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/admin/login/').read()"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
# ── MCP server: FastMCP Streamable HTTP at /mcp/ ───────────────────────────
mcp:
image: git.helu.ca/r/mnemosyne:latest
command: ["mcp"]
env_file: mnemosyne/.env
restart: unless-stopped
volumes:
- mnemosyne-media:/app/media
expose:
- "22091"
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:22091/mcp/health').read()"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
# ── Celery worker: embedding + ingest + batch queues ───────────────────────
worker:
image: git.helu.ca/r/mnemosyne:latest
command: ["worker"]
env_file: mnemosyne/.env
restart: unless-stopped
volumes:
- mnemosyne-media:/app/media
healthcheck:
test: ["CMD", "celery", "-A", "mnemosyne", "inspect", "ping", "-d", "celery@$$HOSTNAME"]
interval: 60s
timeout: 10s
retries: 3
start_period: 60s
# ── nginx: reverse proxy, public port 23090 ────────────────────────────────
nginx:
image: nginx:alpine
restart: unless-stopped
depends_on:
- web
- mcp
ports:
- "23090:80"
volumes:
- ./nginx/mnemosyne.conf:/etc/nginx/conf.d/default.conf:ro
- mnemosyne-static:/var/www/static:ro
healthcheck:
test: ["CMD", "wget", "-qO-", "http://localhost/healthz"]
interval: 30s
timeout: 5s
retries: 3
volumes:
# Static files baked into the image at /app/staticfiles. The web service
# mounts this volume, populating it on first start; nginx reads from it.
mnemosyne-static:
# Local FileSystemStorage fallback. Production uses USE_LOCAL_STORAGE=False
# so this is mostly empty — kept for parity with dev and for any path
# that writes to MEDIA_ROOT directly.
mnemosyne-media:

66
docker/entrypoint.sh Normal file
View File

@@ -0,0 +1,66 @@
#!/bin/sh
# Mnemosyne container entrypoint.
#
# The same image runs all three processes — the compose service supplies
# `web`, `mcp`, `worker`, or `migrate` as CMD.
set -e
case "$1" in
web)
# Django REST API + admin (gunicorn → wsgi).
exec gunicorn \
--bind 0.0.0.0:8000 \
--workers "${GUNICORN_WORKERS:-3}" \
--access-logfile - \
--error-logfile - \
mnemosyne.wsgi:application
;;
mcp)
# FastMCP over Streamable HTTP at /mcp/, mounted by mnemosyne.asgi.
exec uvicorn \
--host 0.0.0.0 \
--port 22091 \
--workers "${UVICORN_WORKERS:-1}" \
mnemosyne.asgi:app
;;
worker)
# Celery worker covering embedding + ingest + batch + default queues.
# In production you may want to split these onto separate worker
# services for queue-level isolation; one process is fine to start.
exec celery -A mnemosyne worker \
--loglevel="${CELERY_LOG_LEVEL:-info}" \
--queues="${CELERY_QUEUES:-celery,embedding,batch}" \
--concurrency="${CELERY_CONCURRENCY:-2}"
;;
beat)
# Celery scheduled tasks (only needed if/when periodic jobs are wired).
exec celery -A mnemosyne beat \
--loglevel="${CELERY_LOG_LEVEL:-info}"
;;
migrate)
# One-shot DB migration runner — invoke before bringing services up
# for the first time or after a deploy.
exec python manage.py migrate --noinput
;;
setup)
# One-shot init — Neo4j indexes + library_type seed data.
python manage.py setup_neo4j_indexes
python manage.py load_library_types
;;
shell)
# Drop into the management shell for ad-hoc work.
exec python manage.py shell
;;
*)
# Fall through: run whatever was passed (e.g. `manage.py <cmd>`).
exec "$@"
;;
esac

86
nginx/mnemosyne.conf Normal file
View File

@@ -0,0 +1,86 @@
# Mnemosyne nginx — single virtual host that fronts the Django web app
# and the FastMCP server. HAProxy on Titania terminates TLS and routes by
# hostname; this nginx is plain HTTP on the internal network.
# Map of upstreams to give us readable proxy_pass targets and easy retries.
upstream mnemosyne_web {
server web:8000 max_fails=3 fail_timeout=30s;
}
upstream mnemosyne_mcp {
server mcp:22091 max_fails=3 fail_timeout=30s;
}
server {
listen 80 default_server;
server_name _;
# Reasonable limits — file uploads to the ingest endpoint can be big,
# but the bulk path is S3-direct from Daedalus. 64 MB covers admin
# uploads and direct REST POST /library/api/items/upload.
client_max_body_size 64m;
client_body_timeout 120s;
# Mnemosyne's REST API — Django REST Framework views + admin.
# Under /library/api/* per mnemosyne/urls.py and /admin/* per Django.
location /library/ {
proxy_pass http://mnemosyne_web;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 300s;
}
location /admin/ {
proxy_pass http://mnemosyne_web;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 300s;
}
# FastMCP Streamable HTTP at /mcp/ and SSE at /mcp/sse/.
# Long-running streams need disabled buffering and a generous timeout.
location /mcp/ {
proxy_pass http://mnemosyne_mcp;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Connection "";
proxy_buffering off;
proxy_cache off;
proxy_read_timeout 600s;
}
# Static files baked into the image at /app/staticfiles, mounted into
# this nginx via a named volume populated by the web service.
location /static/ {
alias /var/www/static/;
access_log off;
expires 30d;
}
# Prometheus scrape endpoint — internal networks only.
# Allows: localhost + RFC1918 private ranges (10/8, 172.16/12, 192.168/16).
location /metrics {
allow 127.0.0.0/8;
allow 10.0.0.0/8;
allow 172.16.0.0/12;
allow 192.168.0.0/16;
deny all;
proxy_pass http://mnemosyne_web;
access_log off;
}
# Liveness probe — proxies through to the MCP health endpoint.
location = /healthz {
proxy_pass http://mnemosyne_mcp/mcp/health;
access_log off;
}
}

View File

@@ -40,6 +40,17 @@ dev = [
"django-debug-toolbar>=4.0,<5.0",
"docker>=7.0,<8.0",
]
test = [
"pytest>=8.0,<9.0",
"pytest-django>=4.8,<5.0",
]
lint = [
"ruff>=0.6,<1.0",
]
docs = [
"mkdocs>=1.6,<2.0",
"mkdocs-material>=9.5,<10.0",
]
[build-system]
requires = ["setuptools>=68.0"]