From cbe792193835015dd661d99445e21bf759d0b6bc Mon Sep 17 00:00:00 2001 From: Robert Helewka Date: Mon, 4 May 2026 09:23:36 -0400 Subject: [PATCH] fix(deploy): use /ready/ healthcheck and /srv/mnemosyne path - Change app healthcheck from /live/ to /ready/ to verify full readiness including dependencies (DB, Neo4j, S3) - Increase healthcheck timeout from 5s to 10s to accommodate dependency checks - Add S3 bucket connectivity check to readiness probe - Update deployment documentation to use /srv/mnemosyne instead of /opt/mnemosyne as the compose project directory --- docker-compose.yaml | 4 ++-- docs/deploy.md | 22 +++++++++++----------- mnemosyne/mnemosyne/views.py | 27 +++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 5c97c41..66eabef 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -106,9 +106,9 @@ services: volumes: - mnemosyne-media:/app/media healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/live/"] + test: ["CMD", "curl", "-f", "http://localhost:8000/ready/"] interval: 30s - timeout: 5s + timeout: 10s retries: 3 start_period: 30s diff --git a/docs/deploy.md b/docs/deploy.md index 9fcfcd6..d41d832 100644 --- a/docs/deploy.md +++ b/docs/deploy.md @@ -13,7 +13,7 @@ one-time steps, and verification checks. | Item | Value | |------|-------| | Deploy target | `puck.incus` (Incus container, 10.10.0.0/24) | -| Compose project directory | `/opt/mnemosyne` | +| Compose project directory | `/srv/mnemosyne` | | Image registry | `git.helu.ca/r/mnemosyne:latest` | | Public host port | **23181** (nginx → HAProxy on Titania → `https://mnemosyne.ouranos.helu.ca`) | | Internal app port | `app:8000` (Django/gunicorn) | @@ -49,7 +49,7 @@ credentials; it does not provision these hosts. ### 3.1 Directory & file layout ``` -/opt/mnemosyne/ +/srv/mnemosyne/ ├── docker-compose.yaml ← copied from repo (or symlinked via git pull) ├── nginx/ │ └── mnemosyne.conf ← copied from repo nginx/mnemosyne.conf @@ -57,7 +57,7 @@ credentials; it does not provision these hosts. ``` The role should: -1. Create `/opt/mnemosyne/` and `nginx/` (owner: `root`, mode `0750`). +1. Create `/srv/mnemosyne/` and `nginx/` (owner: `root`, mode `0750`). 2. Render `.env` from the vault-sourced Jinja2 template (mode `0600`, owner `root`). 3. Copy (or `git pull`) `docker-compose.yaml` and `nginx/mnemosyne.conf` from the repo. @@ -66,12 +66,12 @@ The role should: ```yaml - name: Pull latest image community.docker.docker_compose_v2: - project_src: /opt/mnemosyne + project_src: /srv/mnemosyne pull: always - name: Bring stack up community.docker.docker_compose_v2: - project_src: /opt/mnemosyne + project_src: /srv/mnemosyne state: present ``` @@ -85,16 +85,16 @@ an explicit `when: mnemosyne_first_deploy` flag. ```bash # Apply Django ORM migrations (PostgreSQL schema) -docker compose -f /opt/mnemosyne/docker-compose.yaml \ +docker compose -f /srv/mnemosyne/docker-compose.yaml \ run --rm app migrate # Create Neo4j vector + full-text indexes and load library-type defaults -docker compose -f /opt/mnemosyne/docker-compose.yaml \ +docker compose -f /srv/mnemosyne/docker-compose.yaml \ run --rm app setup # Create the daedalus-service user (HTTP Basic auth for ingest API) # Pass --password from vault; idempotent if user already exists. -docker compose -f /opt/mnemosyne/docker-compose.yaml \ +docker compose -f /srv/mnemosyne/docker-compose.yaml \ run --rm app \ python manage.py ensure_service_user \ --username daedalus-service \ @@ -103,7 +103,7 @@ docker compose -f /opt/mnemosyne/docker-compose.yaml \ # Seed the MCP signing key (for Phase 2 per-turn JWT auth) # --retire-other deactivates any previously-active key. # Print the secret_hex and store in vault as vault_mnemosyne_signing_secret. -docker compose -f /opt/mnemosyne/docker-compose.yaml \ +docker compose -f /srv/mnemosyne/docker-compose.yaml \ run --rm app \ python manage.py seed_signing_key --kid daedalus-1 --retire-other ``` @@ -261,7 +261,7 @@ relevant service `environment:` blocks. The per-service scoping is defined in After `docker compose up -d`, wait for all services to report healthy: ```bash -docker compose -f /opt/mnemosyne/docker-compose.yaml ps +docker compose -f /srv/mnemosyne/docker-compose.yaml ps ``` Expected: `app`, `mcp`, `worker`, `web` all `healthy`; `static-init` `exited (0)`. @@ -315,7 +315,7 @@ curl -H "Authorization: Bearer " \ A standard upgrade (new image pushed to `git.helu.ca/r/mnemosyne:latest`): ```bash -cd /opt/mnemosyne +cd /srv/mnemosyne docker compose pull docker compose up -d # static-init re-seeds; running containers replaced docker compose run --rm app migrate # no-op if no new migrations diff --git a/mnemosyne/mnemosyne/views.py b/mnemosyne/mnemosyne/views.py index 2769346..97b6e3d 100644 --- a/mnemosyne/mnemosyne/views.py +++ b/mnemosyne/mnemosyne/views.py @@ -52,6 +52,30 @@ def live(request): return JsonResponse({"status": "ok"}) +def _check_s3() -> str | None: + """Return an error string if the Mnemosyne S3 bucket is unreachable, else None.""" + import boto3 + import botocore.exceptions + from django.conf import settings + + try: + client = boto3.client( + "s3", + endpoint_url=settings.AWS_S3_ENDPOINT_URL or None, + aws_access_key_id=settings.AWS_ACCESS_KEY_ID or None, + aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY or None, + region_name=settings.AWS_S3_REGION_NAME or None, + use_ssl=getattr(settings, "AWS_S3_USE_SSL", True), + verify=getattr(settings, "AWS_S3_VERIFY", True), + ) + client.head_bucket(Bucket=settings.AWS_STORAGE_BUCKET_NAME) + return None + except botocore.exceptions.ClientError as e: + return f"HTTP {e.response['Error']['Code']}" + except Exception as e: + return str(e) + + def ready(request): errors = {} try: @@ -62,6 +86,9 @@ def ready(request): cache.get("__readiness_probe__") except Exception as e: errors["cache"] = str(e) + s3_error = _check_s3() + if s3_error: + errors["s3"] = s3_error if errors: return JsonResponse({"status": "error", "errors": errors}, status=503) return JsonResponse({"status": "ok"})