Compare commits
86 Commits
2df22941d2
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 2af72d6e82 | |||
| 70b1fc510b | |||
| 46ca2a934d | |||
| dd06f923cd | |||
| 539d9b6c34 | |||
| 142e9675b5 | |||
| a90c6e7479 | |||
| 4dde063299 | |||
| ec4f12d601 | |||
| 75013ebfc3 | |||
| bc80d90b38 | |||
| 7d95133c74 | |||
| 93639188d3 | |||
| 735eb9de1a | |||
| 5bf9fa89cf | |||
| 8b2dcf01c1 | |||
| f8a2cf0c3d | |||
| 50dffe688b | |||
| 409da7d109 | |||
| 7296b8c42f | |||
| 55551fe9af | |||
| e1545139ab | |||
| 9f6176c478 | |||
| f88ec30110 | |||
| 4fb3676204 | |||
| 2a45cb2622 | |||
| 9629ca595d | |||
| a3d017a70d | |||
| ba3ab3d855 | |||
| ef733cb7bf | |||
| 88afd5d307 | |||
| e5682c2573 | |||
| 0a318c7620 | |||
| 3764ae9919 | |||
| e5e58e5fc5 | |||
| 673b7bcffc | |||
| d8b07975dd | |||
| ed4d0db930 | |||
| 955761b748 | |||
| 4f77ed39b9 | |||
| d57294db67 | |||
| 551c641e90 | |||
| 8ddbcf4612 | |||
| 38274825d9 | |||
| afcbee8819 | |||
| 19e2aee91c | |||
| bbd65b1300 | |||
| 6a4fecf488 | |||
| 16fb7ff4dc | |||
| e9f6eeb1a3 | |||
| 55523adbf7 | |||
| a945b382e6 | |||
| 9ceb01f829 | |||
| 642268cec1 | |||
| d11ee72527 | |||
| 3c7f85cba0 | |||
| 027de096bc | |||
| 4cf022e615 | |||
| e0fa825189 | |||
| 15d70c2cf9 | |||
| 8b2e2068e0 | |||
| f8536b5474 | |||
| 8d650c0570 | |||
| 56e977ffb5 | |||
| 37bb38ee43 | |||
| cbe7921938 | |||
| de0d7a4317 | |||
| e34b7f46a5 | |||
| df2e495660 | |||
| c9328c58fc | |||
| 003f958f7b | |||
| d84f0e548b | |||
| 72bd4b381d | |||
| 7185d326eb | |||
| a2c885cf34 | |||
| e5618973fc | |||
| 236d9e2e74 | |||
| 1cd556c3f6 | |||
| e2a6d45b77 | |||
| 97a14fb03a | |||
| 2a8a3d75b4 | |||
| 5527cf6bdb | |||
| f2af28d96d | |||
| c485a8560c | |||
| 33658fbc8d | |||
| 81426327bf |
160
.env.example
Normal file
160
.env.example
Normal file
@@ -0,0 +1,160 @@
|
||||
# =============================================================================
|
||||
# Mnemosyne — docker compose interpolation template
|
||||
# =============================================================================
|
||||
# This file is consumed by `docker compose` as the source for `${VAR}`
|
||||
# interpolations in docker-compose.yaml. In production it is generated from
|
||||
# a Jinja2 template by an Ansible role, with secrets pulled from the Ansible
|
||||
# vault — do not commit a populated copy.
|
||||
#
|
||||
# Copy to `.env` (at the repo root, NOT inside `mnemosyne/`) and fill in the
|
||||
# blanks before running `docker compose up -d`. The in-tree `mnemosyne/.env`
|
||||
# file (used by bare-Python development on caliban) is a separate concern
|
||||
# and is NOT read by the compose stack.
|
||||
#
|
||||
# Every variable below is referenced by at least one service in
|
||||
# docker-compose.yaml. Per-service scoping (which container sees which var)
|
||||
# is defined by the `environment:` blocks in that file; this template just
|
||||
# provides the values.
|
||||
# =============================================================================
|
||||
|
||||
# --- Django core ------------------------------------------------------------
|
||||
# Consumed by: app, mcp, worker
|
||||
SECRET_KEY=change-me-to-a-real-secret-key
|
||||
DEBUG=False
|
||||
TIME_ZONE=UTC
|
||||
LANGUAGE_CODE=en-us
|
||||
|
||||
# --- HTTP surface -----------------------------------------------------------
|
||||
# Consumed by: app (CSRF_TRUSTED_ORIGINS: app only; ALLOWED_HOSTS: app + mcp)
|
||||
# Include every hostname HAProxy routes to this stack, plus localhost for the
|
||||
# inter-container health probes.
|
||||
ALLOWED_HOSTS=localhost,127.0.0.1,mnemosyne.ouranos.helu.ca
|
||||
CSRF_TRUSTED_ORIGINS=https://mnemosyne.ouranos.helu.ca
|
||||
|
||||
# --- PostgreSQL (Portia) ----------------------------------------------------
|
||||
# Consumed by: app, mcp, worker
|
||||
APP_DB_NAME=mnemosyne
|
||||
APP_DB_USER=mnemosyne
|
||||
APP_DB_PASSWORD=change-me
|
||||
DB_HOST=portia.incus
|
||||
DB_PORT=5432
|
||||
|
||||
# --- Neo4j (Umbriel — dedicated Mnemosyne instance) -------------------------
|
||||
# Consumed by: app, mcp, worker
|
||||
# Umbriel MUST be dedicated to Mnemosyne; do not share with Spelunker or any
|
||||
# other graph workload. See README.md for the full rationale.
|
||||
NEOMODEL_NEO4J_BOLT_URL=bolt://neo4j:change-me@umbriel.incus:7687
|
||||
|
||||
# --- Memcached --------------------------------------------------------------
|
||||
# Consumed by: app, mcp, worker
|
||||
# Must resolve from inside containers — 127.0.0.1 will NOT work.
|
||||
KVDB_LOCATION=oberon.incus:11211
|
||||
KVDB_PREFIX=mnemosyne
|
||||
|
||||
# --- S3 / MinIO (Nyx) — Mnemosyne's own bucket ------------------------------
|
||||
# Consumed by: app, mcp, worker
|
||||
# Mnemosyne writes chunk text and item files here. Set USE_LOCAL_STORAGE=False
|
||||
# in production so the S3Boto3Storage backend is used instead of the local
|
||||
# FileSystemStorage fallback.
|
||||
USE_LOCAL_STORAGE=False
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_STORAGE_BUCKET_NAME=mnemosyne-content
|
||||
AWS_S3_ENDPOINT_URL=https://nyx.helu.ca:8555
|
||||
AWS_S3_USE_SSL=True
|
||||
AWS_S3_VERIFY=False
|
||||
AWS_S3_REGION_NAME=us-east-1
|
||||
|
||||
# --- Daedalus S3 (cross-bucket reads for ingest) ----------------------------
|
||||
# Consumed by: worker only
|
||||
# Mnemosyne's ingest Celery task reads files from Daedalus's bucket and
|
||||
# copies them into AWS_STORAGE_BUCKET_NAME for processing. These creds
|
||||
# should be scoped read-only to the Daedalus bucket in your secret manager.
|
||||
DAEDALUS_S3_ENDPOINT_URL=https://nyx.helu.ca:8555
|
||||
DAEDALUS_S3_ACCESS_KEY_ID=
|
||||
DAEDALUS_S3_SECRET_ACCESS_KEY=
|
||||
DAEDALUS_S3_BUCKET_NAME=daedalus
|
||||
DAEDALUS_S3_REGION_NAME=us-east-1
|
||||
DAEDALUS_S3_USE_SSL=True
|
||||
DAEDALUS_S3_VERIFY=True
|
||||
|
||||
# --- Spelunker S3 (cross-bucket reads for ingest, source="spelunker") ---
|
||||
# Consumed by: worker only
|
||||
# Spelunker scrapes web/git documents into its own bucket and posts ingest
|
||||
# requests with source="spelunker". These creds should be scoped read-only
|
||||
# to the Spelunker bucket in your secret manager.
|
||||
SPELUNKER_S3_ENDPOINT_URL=https://nyx.helu.ca:8555
|
||||
SPELUNKER_S3_ACCESS_KEY_ID=
|
||||
SPELUNKER_S3_SECRET_ACCESS_KEY=
|
||||
SPELUNKER_S3_BUCKET_NAME=spelunker
|
||||
SPELUNKER_S3_REGION_NAME=us-east-1
|
||||
SPELUNKER_S3_USE_SSL=True
|
||||
SPELUNKER_S3_VERIFY=True
|
||||
|
||||
# --- Celery / RabbitMQ (Oberon) ---------------------------------------------
|
||||
# Consumed by: app (producer), worker (consumer). NOT mcp.
|
||||
# Remember to percent-encode any password characters that have meaning in a
|
||||
# URL (`@ : / # % + ? & =` and space). Kombu's AMQP URL parser is strict —
|
||||
# an unencoded password is the most common cause of PLAIN 403 failures when
|
||||
# the bare-Python client happens to connect fine.
|
||||
CELERY_BROKER_URL=amqp://mnemosyne:change-me@oberon.incus:5672/mnemosyne
|
||||
CELERY_RESULT_BACKEND=rpc://
|
||||
CELERY_TASK_ALWAYS_EAGER=False
|
||||
|
||||
# --- Worker tuning ---------------------------------------------------------
|
||||
# Consumed by: worker only (read by entrypoint.sh → `celery -A mnemosyne worker`)
|
||||
# Override per host if you want to dedicate a worker to a single queue.
|
||||
CELERY_QUEUES=celery,embedding,batch
|
||||
CELERY_CONCURRENCY=2
|
||||
|
||||
# --- SSO / Casdoor ----------------------------------------------------------
|
||||
# Consumed by: app only
|
||||
# Set CASDOOR_ENABLED=True in production to activate OIDC login. All other
|
||||
# CASDOOR_* vars are required when enabled. CASDOOR_SSL_VERIFY should always
|
||||
# be true in production; set to false only in sandboxes with self-signed certs.
|
||||
CASDOOR_ENABLED=False
|
||||
CASDOOR_ORIGIN=https://casdoor.example.com
|
||||
CASDOOR_ORIGIN_FRONTEND=https://casdoor.example.com
|
||||
CASDOOR_CLIENT_ID=
|
||||
CASDOOR_CLIENT_SECRET=
|
||||
CASDOOR_ORG_NAME=
|
||||
CASDOOR_SSL_VERIFY=true
|
||||
ALLOW_LOCAL_LOGIN=False
|
||||
|
||||
# --- MCP server -------------------------------------------------------------
|
||||
# Consumed by: mcp only
|
||||
MCP_REQUIRE_AUTH=True
|
||||
|
||||
# --- LLM API encryption -----------------------------------------------------
|
||||
# Consumed by: app (admin pages), worker (ingest vision pass). NOT mcp.
|
||||
# Generate once per deployment, store in the vault, never rotate without
|
||||
# re-encrypting every stored provider key first.
|
||||
# python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
LLM_API_SECRETS_ENCRYPTION_KEY=
|
||||
|
||||
# --- Email (smtp4dev on Oberon) --------------------------------------------
|
||||
# Consumed by: app only
|
||||
EMAIL_HOST=oberon.incus
|
||||
EMAIL_PORT=22025
|
||||
EMAIL_USE_TLS=False
|
||||
|
||||
# --- Embedding pipeline -----------------------------------------------------
|
||||
# Consumed by: worker only
|
||||
EMBEDDING_BATCH_SIZE=8
|
||||
EMBEDDING_TIMEOUT=120
|
||||
|
||||
# --- Search & re-ranker -----------------------------------------------------
|
||||
# Consumed by: app, mcp. Not worker (workers never serve queries).
|
||||
SEARCH_VECTOR_TOP_K=50
|
||||
SEARCH_FULLTEXT_TOP_K=30
|
||||
SEARCH_GRAPH_MAX_DEPTH=2
|
||||
SEARCH_RRF_K=60
|
||||
SEARCH_DEFAULT_LIMIT=20
|
||||
RERANKER_MAX_CANDIDATES=32
|
||||
RERANKER_TIMEOUT=30
|
||||
|
||||
# --- Logging ----------------------------------------------------------------
|
||||
# Consumed by: app, mcp, worker (each picks the levels it cares about)
|
||||
LOGGING_LEVEL=INFO
|
||||
CELERY_LOGGING_LEVEL=INFO
|
||||
DJANGO_LOGGING_LEVEL=WARNING
|
||||
118
.gitea/workflows/cve-scan-docker-build.yml
Normal file
118
.gitea/workflows/cve-scan-docker-build.yml
Normal file
@@ -0,0 +1,118 @@
|
||||
name: CVE Scan & Docker Build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
env:
|
||||
REGISTRY: git.helu.ca
|
||||
IMAGE_NAME: ${{ gitea.repository }}
|
||||
TRIVY_SEVERITY: MEDIUM,HIGH,CRITICAL
|
||||
TRIVY_NO_PROGRESS: "true"
|
||||
TRIVY_DISABLE_VEX_NOTICE: "true"
|
||||
|
||||
jobs:
|
||||
security-scan:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Trivy
|
||||
run: |
|
||||
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin
|
||||
trivy --version
|
||||
|
||||
- name: Resolve full dependency set (incl. dev/test/lint/docs extras)
|
||||
run: |
|
||||
python3 -m venv /tmp/scanenv
|
||||
/tmp/scanenv/bin/pip install --quiet pip-tools
|
||||
/tmp/scanenv/bin/pip-compile pyproject.toml \
|
||||
--extra dev --extra test --extra lint --extra docs \
|
||||
-o requirements.txt --no-header --quiet --allow-unsafe
|
||||
echo "Resolved $(grep -cv '^\s*\(#\|$\)' requirements.txt) pinned packages."
|
||||
|
||||
- name: Scan Python dependencies for CVEs
|
||||
run: |
|
||||
trivy fs \
|
||||
--scanners vuln \
|
||||
--severity ${TRIVY_SEVERITY} \
|
||||
--format table \
|
||||
--exit-code 0 \
|
||||
requirements.txt
|
||||
|
||||
- name: Scan repository for secrets
|
||||
run: |
|
||||
trivy fs \
|
||||
--scanners secret \
|
||||
--format table \
|
||||
--exit-code 0 \
|
||||
.
|
||||
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
needs: security-scan
|
||||
if: always()
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Gitea Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ gitea.actor }}
|
||||
password: ${{ secrets.PACKAGE_TOKEN }}
|
||||
|
||||
- name: Extract metadata for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=ref,event=pr
|
||||
type=sha,prefix=
|
||||
type=raw,value=latest,enable=${{ gitea.ref == 'refs/heads/main' }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Install Trivy
|
||||
run: |
|
||||
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin
|
||||
trivy --version
|
||||
|
||||
- name: Scan built Docker image (OS + Python + system libs)
|
||||
run: |
|
||||
IMAGE_TAG=$(echo "${{ steps.meta.outputs.tags }}" | head -n1)
|
||||
echo "🔍 Scanning image: ${IMAGE_TAG}"
|
||||
trivy image \
|
||||
--scanners vuln \
|
||||
--severity ${TRIVY_SEVERITY} \
|
||||
--format table \
|
||||
--pkg-types os,library \
|
||||
--exit-code 0 \
|
||||
"${IMAGE_TAG}"
|
||||
|
||||
- name: Scan built Docker image for misconfigurations
|
||||
continue-on-error: true
|
||||
run: |
|
||||
IMAGE_TAG=$(echo "${{ steps.meta.outputs.tags }}" | head -n1)
|
||||
trivy image \
|
||||
--scanners misconfig \
|
||||
--severity ${TRIVY_SEVERITY} \
|
||||
--format table \
|
||||
--exit-code 0 \
|
||||
"${IMAGE_TAG}"
|
||||
102
.gitea/workflows/docs.yml
Normal file
102
.gitea/workflows/docs.yml
Normal file
@@ -0,0 +1,102 @@
|
||||
name: Build & Deploy Docs
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'mnemosyne/**'
|
||||
- 'docs/**'
|
||||
- 'pyproject.toml'
|
||||
- '.gitea/workflows/docs.yml'
|
||||
|
||||
jobs:
|
||||
build-and-deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install package + docs deps
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install -e ".[docs]"
|
||||
|
||||
- name: Read version from pyproject.toml
|
||||
id: version
|
||||
run: |
|
||||
VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
||||
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# ─── Failure-debug trio (REQUIRED) ─────────────────────────────────
|
||||
- name: Build HTML
|
||||
id: build_html
|
||||
run: |
|
||||
cd docs
|
||||
./regenerate_docs.sh
|
||||
continue-on-error: true
|
||||
|
||||
- name: Print Sphinx error log on failure
|
||||
if: steps.build_html.outcome == 'failure'
|
||||
run: |
|
||||
echo "=== Sphinx error log ==="
|
||||
cat /tmp/sphinx-err-*.log 2>/dev/null || echo "(no sphinx error log found)"
|
||||
|
||||
- name: Fail if build failed
|
||||
if: steps.build_html.outcome == 'failure'
|
||||
run: exit 1
|
||||
# ───────────────────────────────────────────────────────────────────
|
||||
|
||||
- name: Install rsync + openssh
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends rsync openssh-client
|
||||
|
||||
- name: Configure SSH
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
printf '%s\n' "${{ secrets.CLIO_DOCS_KEY }}" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
ssh-keyscan -p ${{ vars.CLIO_PORT }} ${{ vars.CLIO_HOST }} >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Test SSH connectivity
|
||||
run: |
|
||||
ssh -o BatchMode=yes -o ConnectTimeout=10 \
|
||||
-p ${{ vars.CLIO_PORT }} -i ~/.ssh/id_ed25519 \
|
||||
git@${{ vars.CLIO_HOST }} "id && echo 'SSH OK'"
|
||||
|
||||
- name: Rsync to versioned path
|
||||
run: |
|
||||
rsync -av --delete \
|
||||
-e "ssh -p ${{ vars.CLIO_PORT }} -i ~/.ssh/id_ed25519" \
|
||||
docs/_build/html/ \
|
||||
git@${{ vars.CLIO_HOST }}:/var/www/docs/mnemosyne/${{ steps.version.outputs.version }}/
|
||||
|
||||
- name: Rsync to latest
|
||||
run: |
|
||||
rsync -av --delete \
|
||||
-e "ssh -p ${{ vars.CLIO_PORT }} -i ~/.ssh/id_ed25519" \
|
||||
docs/_build/html/ \
|
||||
git@${{ vars.CLIO_HOST }}:/var/www/docs/mnemosyne/latest/
|
||||
|
||||
- name: Regenerate versions index
|
||||
run: |
|
||||
ssh -p ${{ vars.CLIO_PORT }} -i ~/.ssh/id_ed25519 git@${{ vars.CLIO_HOST }} \
|
||||
'python3 - <<PY
|
||||
import pathlib
|
||||
root = pathlib.Path("/var/www/docs/mnemosyne")
|
||||
versions = sorted(
|
||||
(p.name for p in root.iterdir() if p.is_dir()),
|
||||
reverse=True,
|
||||
)
|
||||
html = ["<!DOCTYPE html><html><head><title>Mnemosyne Docs</title></head><body>",
|
||||
"<h1>Mnemosyne Documentation</h1><ul>"]
|
||||
for v in versions:
|
||||
html.append(f"<li><a href=\"{v}/\">{v}</a></li>")
|
||||
html.append("</ul></body></html>")
|
||||
(root / "index.html").write_text("\n".join(html))
|
||||
PY'
|
||||
188
CLAUDE.md
Normal file
188
CLAUDE.md
Normal file
@@ -0,0 +1,188 @@
|
||||
## 🐾 Red Panda Approval™
|
||||
|
||||
The standard every change is judged against. Don't satisfy a checklist —
|
||||
satisfy the red pandas. Ask of each change: *does this earn approval?*
|
||||
|
||||
1. **Fresh Migration Test** — migrations apply cleanly from an empty database.
|
||||
2. **Elegant Simplicity** — no unnecessary complexity; the obvious solution, done well.
|
||||
3. **Observable & Debuggable** — proper logging; failures say what broke and why.
|
||||
4. **Consistent Patterns** — follows Django conventions and the patterns already in this repo.
|
||||
5. **Actually Works** — passes all checks *and* serves a real user need.
|
||||
|
||||
Criteria 1 and 5 are **externally verifiable** — migrations apply or they
|
||||
don't; checks pass or they don't. Verify them, don't assert them. Criteria
|
||||
2–4 are judgement calls: when in doubt, match what the repo already does
|
||||
rather than grading your own elegance.
|
||||
|
||||
> If a paw print isn't leading the response, the rest of this file probably
|
||||
> isn't being honoured either. Lead with one. 🐾
|
||||
|
||||
---
|
||||
|
||||
## Conventions (always-on)
|
||||
|
||||
These are the rubric made concrete for the common case — writing models,
|
||||
views, forms, templates, and queries.
|
||||
|
||||
### Models
|
||||
- Names: singular PascalCase (`User`, `BlogPost`, `OrderItem`).
|
||||
- Every model defines `__str__` and `get_absolute_url`.
|
||||
- Every model has `created_at = DateTimeField(auto_now_add=True)` and
|
||||
`updated_at = DateTimeField(auto_now=True)`.
|
||||
- `TextChoices` for status fields.
|
||||
- `related_name` on every `ForeignKey`; plural snake_case with correct
|
||||
English pluralisation.
|
||||
- Public-facing models: consider `UUIDField` primary key and
|
||||
`is_active` for soft deletes.
|
||||
|
||||
### Field naming
|
||||
- Foreign keys: singular, no `_id` suffix (`author`, `category`, `parent`).
|
||||
- Booleans: prefixed (`is_active`, `has_permission`, `can_edit`).
|
||||
- Dates: suffixed (`created_at`, `updated_at`, `published_on`).
|
||||
- No abbreviations (`description`, not `desc`).
|
||||
|
||||
### Views
|
||||
- **Function-based views exclusively.** Explicit logic over implicit
|
||||
inheritance. Extract shared logic into utility functions.
|
||||
- Business logic lives in service functions, not views and not `save()`.
|
||||
|
||||
### Forms
|
||||
- `ModelForm` with an explicit `fields` list — never `__all__`, never `exclude`.
|
||||
- Validate at the boundary; never trust client-side validation alone.
|
||||
|
||||
### Queries
|
||||
- `select_related()` for FKs; `prefetch_related()` for reverse and M2M.
|
||||
- No queries inside loops (N+1). No `.all()` when you need a subset.
|
||||
- `.only()` / `.defer()` for large models. Comment non-obvious querysets.
|
||||
|
||||
### URLs & identifiers
|
||||
- Public URLs use 12-char short UUIDs via `shortuuid`. Never expose
|
||||
sequential IDs (enumeration risk). Internal refs may use PKs.
|
||||
- Resource-based, namespaced URL names per app, trailing slashes, flat
|
||||
structure preferred.
|
||||
|
||||
### Docstrings
|
||||
- **Google style.** Document public classes, functions, methods, modules.
|
||||
- Imperative one-line summary. `Args:`/`Returns:`/`Raises:` only when the
|
||||
signature doesn't already convey it. Don't restate type hints in prose.
|
||||
- Skip obvious one-liners and standard Django overrides.
|
||||
|
||||
### Code organisation
|
||||
- PEP 8 import ordering (stdlib, third-party, local). Type hints on params.
|
||||
- CSS and JS in external files only — no inline styles, `<style>`,
|
||||
inline handlers, or `<script>` blocks.
|
||||
- File length: split by domain concept past ~500 lines; hard ceiling 1000.
|
||||
|
||||
### Testing
|
||||
- Django `TestCase` (not pytest). Separate files per module:
|
||||
`test_models.py`, `test_views.py`, `test_forms.py`.
|
||||
|
||||
An app isn't done until it's reachable
|
||||
django-admin startapp builds an island. A complete-from-its-own-boundary
|
||||
app — models, views, urls, templates, tests all present and passing — is
|
||||
|
||||
# Add to always-on Django CLAUDE.md — Conventions section
|
||||
|
||||
Insert this block under "Conventions (always-on)", as its own subsection.
|
||||
It is the universal Django definition-of-done. It fires for *every* app,
|
||||
not just registered tools.
|
||||
|
||||
### An app isn't done until it's reachable
|
||||
|
||||
`django-admin startapp` builds an **island**. A complete-from-its-own-boundary
|
||||
app — models, views, urls, templates, tests all present and passing — is
|
||||
still *unfinished* if nothing in the running site links to it. "It works in
|
||||
isolation" is not done; **"a user can reach it from the running site" is done.**
|
||||
|
||||
Before reporting a new app complete, wire it into the site:
|
||||
|
||||
1. **`INSTALLED_APPS`** — add the app's config.
|
||||
2. **Root URLconf** — `include()` the app's `urls.py` in `config/urls.py`.
|
||||
An app whose URLconf isn't included has unreachable views, full stop.
|
||||
3. **Navigation / discovery** — register the app so it surfaces wherever
|
||||
this project expects apps to appear. This project uses an **app
|
||||
registry** (see Project Setup): the app registers itself in its own
|
||||
`apps.py.ready()` and the navigation template tag picks it up. Do **not**
|
||||
hand-edit nav templates or central list views — they read from the
|
||||
registry.
|
||||
4. **Verify reachability** — confirm the app's main page actually loads
|
||||
from the running site (not just that its tests pass). Per Red Panda
|
||||
criterion 5, this is externally verifiable: load the page, don't assert
|
||||
it works.
|
||||
Why this rule exists: an LLM reasons locally and closes the visible task at
|
||||
the app's own boundary. The wiring that makes an app reachable lives in
|
||||
*other* files (`config/urls.py`, `INSTALLED_APPS`, the registry) with no
|
||||
signal inside the new app pointing to them. Without this rule, the
|
||||
near-certain result is a fully-built, completely inaccessible app. The
|
||||
registry exists precisely so that "surface it" happens *inside* the app's
|
||||
own boundary (a `register()` call in `ready()`) — collapsing the wiring
|
||||
into the one place local reasoning will actually look.
|
||||
|
||||
> The same principle generalises beyond Django: a new route that isn't
|
||||
> mounted, a CLI subcommand not added to the dispatcher, a handler not
|
||||
> registered — all the same failure. Done means *connected*, not *written*.
|
||||
|
||||
---
|
||||
|
||||
## Always-on anti-patterns
|
||||
|
||||
The cross-cutting tripwires worth carrying everywhere. File-specific
|
||||
landmines (nginx, compose, broker) are in path-scoped rules.
|
||||
|
||||
- **Models:** no `.get()` without handling `DoesNotExist`; no `null=True`
|
||||
on `CharField`/`TextField` (use `blank=True, default=""`); always specify
|
||||
`on_delete`; don't override `save()` for business logic; no
|
||||
`Meta.ordering` on large tables.
|
||||
- **Security:** secrets via env vars, never in `settings.py`; never commit
|
||||
`.env`; never `DEBUG=True` in production; never `mark_safe()` on
|
||||
user-supplied content; never disable CSRF.
|
||||
- **Templates:** `{% url %}` not `{{ variable }}` for URLs; no logic in
|
||||
templates; `{% csrf_token %}` in every form.
|
||||
- **Imports/style:** no `import *`; no mutable default args; no bare
|
||||
`except:`; don't silence linter warnings without a documented reason.
|
||||
|
||||
---
|
||||
|
||||
## Environment
|
||||
|
||||
- Virtual environment: `~/env/PROJECT/bin/activate` (replace PROJECT).
|
||||
- `pyproject.toml` for config — no `setup.py`, no `requirements.txt`.
|
||||
- Dependencies floor-pinned with ceiling (`Django>=5.2,<6.0`). Exact `==`
|
||||
pins only in application lock files, never in reusable packages.
|
||||
- Dev DB: SQLite. Production DB: PostgreSQL.
|
||||
|
||||
---
|
||||
|
||||
## Path-scoped rules to create (`.claude/rules/`)
|
||||
|
||||
These hold the landmines extracted from the standards doc. Each loads only
|
||||
when its `paths` match, keeping this file lean. Frontmatter shown.
|
||||
|
||||
- **`nginx.md`** — `paths: ["nginx/**", "**/*.conf"]` — reverse-proxy
|
||||
reference config: Docker DNS resolver + variable `proxy_pass`,
|
||||
`$proxy_x_forwarded_proto` map, access-log filtering, RFC1918 allowlists
|
||||
(all four ranges), `always` security headers.
|
||||
- **`docker-compose.md`** — `paths: ["docker-compose*.y*ml", ".env*"]` —
|
||||
per-service `environment:` scoping (no shared `env_file:`), `${VAR}`
|
||||
interpolation, `.env.example` annotation convention, the `repr()` parse
|
||||
diagnostic.
|
||||
- **`celery-tasks.md`** — `paths: ["**/tasks.py"]` — idempotency, retry
|
||||
logic, pass IDs not instances, synchronous-by-default, broker URL
|
||||
percent-encoding, progress pattern `{app}:task:{task_id}:progress`.
|
||||
- **`migrations.md`** — `paths: ["**/migrations/**"]` — never edit deployed
|
||||
migrations; `RunPython` needs a reverse; no non-nullable field without a
|
||||
default; meaningful `--name`; test forward and backward.
|
||||
- **`memcached.md`** — `paths: ["**/settings.py", ".env*"]` — bind
|
||||
`0.0.0.0` not localhost; container can't reach `127.0.0.1`; LAN hostname
|
||||
in `KVDB_LOCATION`; key pattern `{app}:{model}:{identifier}:{field}`.
|
||||
- **`frontend.md`** — `paths: ["**/templates/**", "**/static/**"]` — DaisyUI+
|
||||
Tailwind for new projects / Bootstrap 5 for existing; extend
|
||||
`themis/base.html`; no inline styles or scripts.
|
||||
|
||||
## Reference docs (consult on demand, don't inline)
|
||||
|
||||
- `docs/` gotcha writeups: broker-URL/Kombu parsing, env-file parsing
|
||||
differences, nginx IP-caching. State the rule in the rule file; link the
|
||||
*why* here.
|
||||
- Preferred-packages list and per-app architecture: keep in `docs/`, not in
|
||||
this always-on file.
|
||||
94
Dockerfile
Normal file
94
Dockerfile
Normal file
@@ -0,0 +1,94 @@
|
||||
# =============================================================================
|
||||
# Mnemosyne — production image
|
||||
# =============================================================================
|
||||
# Multi-stage:
|
||||
# builder installs Python deps and runs `collectstatic` once.
|
||||
# runtime copies only the artifacts the running process needs.
|
||||
#
|
||||
# The same image runs three different processes (Django web, MCP server,
|
||||
# Celery worker) — the compose file picks the command per service.
|
||||
# =============================================================================
|
||||
|
||||
# ── Stage 1: builder ────────────────────────────────────────────────────────
|
||||
FROM python:3.12-slim AS builder
|
||||
|
||||
# Build deps for psycopg, PyMuPDF, Pillow, cryptography, etc.
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
libpq-dev \
|
||||
libffi-dev \
|
||||
libssl-dev \
|
||||
libjpeg-dev \
|
||||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# Install dependencies first (better layer caching).
|
||||
COPY pyproject.toml README.md ./
|
||||
COPY mnemosyne/ ./mnemosyne/
|
||||
|
||||
RUN pip install --upgrade pip \
|
||||
&& pip install .
|
||||
|
||||
# Bake static files into the image. The env vars below are build-time-only
|
||||
# stubs needed for settings.py to import without real infrastructure — they
|
||||
# never reach the runtime image because this is the builder stage.
|
||||
# Inlined into the RUN command (rather than ENV/ARG) so static analysis
|
||||
# tools (Trivy) don't flag them as baked-in secrets.
|
||||
ENV DJANGO_SETTINGS_MODULE=mnemosyne.settings \
|
||||
DEBUG=False \
|
||||
USE_LOCAL_STORAGE=True \
|
||||
APP_DB_NAME=collectstatic \
|
||||
APP_DB_USER=collectstatic
|
||||
|
||||
WORKDIR /build/mnemosyne
|
||||
RUN SECRET_KEY=collectstatic-stub \
|
||||
APP_DB_PASSWORD=collectstatic-stub \
|
||||
python manage.py collectstatic --noinput --clear
|
||||
|
||||
# ── Stage 2: runtime ────────────────────────────────────────────────────────
|
||||
FROM python:3.12-slim AS runtime
|
||||
|
||||
# Runtime libs for psycopg + PyMuPDF + Pillow + cryptography.
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libpq5 \
|
||||
libjpeg62-turbo \
|
||||
zlib1g \
|
||||
libssl3 \
|
||||
ca-certificates \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
DJANGO_SETTINGS_MODULE=mnemosyne.settings \
|
||||
PATH=/usr/local/bin:$PATH
|
||||
|
||||
# Copy installed packages from the builder.
|
||||
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
|
||||
COPY --from=builder /usr/local/bin /usr/local/bin
|
||||
|
||||
# Application code + collected statics.
|
||||
WORKDIR /app
|
||||
COPY --from=builder /build/mnemosyne /app
|
||||
COPY docker/entrypoint.sh /usr/local/bin/entrypoint.sh
|
||||
COPY docker/gunicorn.conf.py /app/docker/gunicorn.conf.py
|
||||
RUN chmod +x /usr/local/bin/entrypoint.sh
|
||||
|
||||
# Non-root user for everything that runs in this image. uid:gid 1000:1000
|
||||
# matches the convention for a single-application container.
|
||||
RUN groupadd --gid 1000 mnemosyne \
|
||||
&& useradd --uid 1000 --gid mnemosyne --home /app --no-create-home --shell /sbin/nologin mnemosyne \
|
||||
&& mkdir -p /app/media /app/logs /mnt/static /mnt/media \
|
||||
&& chown -R mnemosyne:mnemosyne /app /mnt/static /mnt/media
|
||||
USER mnemosyne
|
||||
|
||||
# The compose file overrides this per service. Default = Django web.
|
||||
ENTRYPOINT ["entrypoint.sh"]
|
||||
CMD ["web"]
|
||||
245
README.md
245
README.md
@@ -37,11 +37,14 @@ This **content-type awareness** flows through every layer: chunking strategy, em
|
||||
| Library | Example Content | Multimodal? | Graph Relationships |
|
||||
|---------|----------------|-------------|-------------------|
|
||||
| **Fiction** | Novels, short stories | Cover art | Author → Book → Character → Theme |
|
||||
| **Nonfiction** | History, biography, science writing | Photos, charts | Author → Work → Topic → Person/Place |
|
||||
| **Technical** | Textbooks, manuals, docs | Diagrams, screenshots | Product → Manual → Section → Procedure |
|
||||
| **Music** | Lyrics, liner notes | Album artwork | Artist → Album → Track → Genre |
|
||||
| **Film** | Scripts, synopses | Stills, posters | Director → Film → Scene → Actor |
|
||||
| **Art** | Descriptions, catalogs | The artwork itself | Artist → Piece → Style → Movement |
|
||||
| **Journals** | Personal entries | Photos | Date → Entry → Topic → Person/Place |
|
||||
| **Journal** | Personal entries, plans, observations | Photos | Date → Entry → Topic → Person/Place |
|
||||
| **Business** | Proposals, marketing, strategy | Logos, charts | Client → Engagement → Deliverable |
|
||||
| **Finance** | Statements, tax, market commentary | Charts, statement scans | Account → Instrument → Period |
|
||||
|
||||
## Search Pipeline
|
||||
|
||||
@@ -55,32 +58,246 @@ Query → Vector Search (Neo4j) + Graph Traversal (Cypher) + Full-Text Search
|
||||
|
||||
Mnemosyne's RAG pipeline architecture is inspired by [Spelunker](https://git.helu.ca/r/spelunker), an enterprise RFP response platform. The proven patterns — hybrid search, two-stage RAG (responder + reviewer), citation-based retrieval, and async document processing — are carried forward and enhanced with multimodal capabilities and knowledge graph relationships.
|
||||
|
||||
## Running Celery Workers
|
||||
## Running Mnemosyne
|
||||
|
||||
Mnemosyne uses Celery with RabbitMQ for async document embedding. From the `mnemosyne/` directory:
|
||||
Mnemosyne runs as three cooperating processes: the Django web app (REST API + admin), the MCP server (LLM-facing tools), and one or more Celery workers (async embedding + ingest). All three read configuration from `mnemosyne/.env` (copy from `mnemosyne/.env example` and fill in secrets).
|
||||
|
||||
Hosts in the Ouranos lab:
|
||||
- **Postgres** — `portia.incus:5432` (Django ORM: users, IngestJob)
|
||||
- **Neo4j** — `umbriel.incus:7687` (Bolt; dedicated instance — see note below — knowledge graph + vectors; HTTP Browser on `umbriel.incus:25555`)
|
||||
- **RabbitMQ** — `oberon.incus:5672` (Celery broker)
|
||||
- **MinIO** — `nyx.helu.ca:8555` (S3-compatible; `mnemosyne-content` and `daedalus` buckets)
|
||||
- **Memcached** — `127.0.0.1:11211` (task progress)
|
||||
|
||||
> **Neo4j must be dedicated to Mnemosyne.** Don't share the instance with Spelunker or any other graph workload. Mnemosyne owns the `Library`, `Collection`, `Item`, `Chunk`, and `Concept` labels and runs its own indexes (`chunk_embedding_index`, full-text indexes per library_type) and schema migrations (`setup_neo4j_indexes`, `load_library_types`). The Phase-1 workspace-delete path runs label-scoped `DETACH DELETE` over those labels, and a workspace_id-scoped subgraph is the unit of isolation — both assume single-tenancy. A shared instance risks (1) label/property collisions corrupting the other tenant's graph, (2) vector-index memory contention degrading search latency for both apps, (3) management commands mutating schema another tenant depends on, and (4) backup/restore that can't be reasoned about per-app. Neo4j Community Edition is sufficient — the multi-database feature is Enterprise-only, so isolation has to come from running a separate server process. Run a dedicated instance per environment (one for staging, one for production); point each via `NEOMODEL_NEO4J_BOLT_URL` in that environment's `mnemosyne/.env`.
|
||||
|
||||
### One-time setup
|
||||
|
||||
```bash
|
||||
# Development — single worker, all queues
|
||||
cd mnemosyne/
|
||||
python manage.py migrate # Apply Django ORM migrations
|
||||
python manage.py load_library_types # Load LIBRARY_TYPE_DEFAULTS into Neo4j
|
||||
# --- seed the system embedding model in /admin/llm_manager/llmmodel/ here ---
|
||||
python manage.py setup_neo4j_indexes # Create Neo4j vector + full-text indexes
|
||||
```
|
||||
|
||||
> **Seed the embedding model before running `setup_neo4j_indexes`.** Vector
|
||||
> index dimensions are read from the row in ``llm_manager_llmmodel`` that
|
||||
> has ``is_system_embedding_model=True`` and a non-null ``vector_dimensions``.
|
||||
> There is deliberately no hardcoded fallback: an index built at the wrong
|
||||
> dimension silently breaks every search. The command will exit non-zero
|
||||
> with a clear error if no such row exists, which is why the
|
||||
> ``docker compose`` ``init`` sidecar does **not** run
|
||||
> ``setup_neo4j_indexes`` — the stack brings up `migrate` +
|
||||
> `load_library_types` only, you land in `/admin/` to configure the system
|
||||
> embedding model, and then you run
|
||||
> ``docker compose exec app python manage.py setup_neo4j_indexes`` manually
|
||||
> once. Until that last step runs, vector search returns empty results and
|
||||
> `library/apps.py` logs a readiness warning. See
|
||||
> [Docker bootstrap order](#docker-bootstrap-order) below for the full flow.
|
||||
|
||||
### Start the web app
|
||||
|
||||
The Django REST API serves `/library/api/*` (libraries, collections, items, search, workspaces, ingest) and Django admin. Use Gunicorn in production; `runserver` for dev.
|
||||
|
||||
```bash
|
||||
cd mnemosyne/
|
||||
|
||||
# Development
|
||||
python manage.py runserver 0.0.0.0:8000
|
||||
|
||||
# Production
|
||||
gunicorn --bind 0.0.0.0:8000 --workers 3 mnemosyne.wsgi:application
|
||||
```
|
||||
|
||||
### Start the MCP server
|
||||
|
||||
The MCP server exposes the LLM-facing tools (`search`, `get_chunk`, `list_libraries`, `list_collections`, `list_items`, `get_health`) over Streamable HTTP at `/mcp` and SSE at `/mcp/sse`. Run as a separate Uvicorn process, on its own port, so it can be reverse-proxied or scaled independently of the Django app.
|
||||
|
||||
```bash
|
||||
cd mnemosyne/
|
||||
|
||||
# Single command: ASGI server hosting the FastMCP app
|
||||
uvicorn mnemosyne.asgi:app --host 0.0.0.0 --port 231s91 --workers 1
|
||||
```
|
||||
|
||||
The `mcp_server/asgi.py` mounts FastMCP at `/mcp` (Streamable HTTP) and `/mcp/sse` (SSE), with a `/mcp/health` JSON probe for HAProxy/Pallas.
|
||||
|
||||
### Start a Celery worker
|
||||
|
||||
A single worker that handles all queues (development) plus the focused command Daedalus depends on (the `embedding` queue, where the Daedalus ingest task lives).
|
||||
|
||||
```bash
|
||||
cd mnemosyne/
|
||||
|
||||
# Development — one worker, all queues
|
||||
celery -A mnemosyne worker -l info -Q celery,embedding,batch
|
||||
|
||||
# Or skip workers entirely with eager mode (.env):
|
||||
CELERY_TASK_ALWAYS_EAGER=True
|
||||
# Production — embedding queue (handles Daedalus ingest + embed_item)
|
||||
celery -A mnemosyne worker -l info -Q embedding -c 1 -n embedding@%h
|
||||
|
||||
# Production — batch queue (collection/library bulk operations)
|
||||
celery -A mnemosyne worker -l info -Q batch -c 2 -n batch@%h
|
||||
|
||||
# Production — default queue (LLM validation, misc)
|
||||
celery -A mnemosyne worker -l info -Q celery -c 2 -n default@%h
|
||||
```
|
||||
|
||||
**Production — separate workers:**
|
||||
```bash
|
||||
celery -A mnemosyne worker -l info -Q embedding -c 1 -n embedding@%h # GPU-bound embedding
|
||||
celery -A mnemosyne worker -l info -Q batch -c 2 -n batch@%h # Batch orchestration
|
||||
celery -A mnemosyne worker -l info -Q celery -c 2 -n default@%h # LLM API validation
|
||||
```
|
||||
Daedalus's `POST /library/api/ingest/` dispatches `library.tasks.ingest_from_daedalus` to the **embedding** queue. If you only run one worker, make sure it consumes `embedding` or that task will sit in the broker.
|
||||
|
||||
**Scheduler & Monitoring:**
|
||||
To bypass workers in dev/test, set `CELERY_TASK_ALWAYS_EAGER=True` in `.env`.
|
||||
|
||||
**Scheduler & monitoring (optional):**
|
||||
```bash
|
||||
celery -A mnemosyne beat -l info # Periodic task scheduler
|
||||
celery -A mnemosyne flower --port=5555 # Web monitoring UI
|
||||
```
|
||||
|
||||
See [Phase 2: Celery Workers & Scheduler](docs/PHASE_2_EMBEDDING_PIPELINE.md#celery-workers--scheduler) for full details on queues, reliability settings, and task progress tracking.
|
||||
See [Phase 2: Celery Workers & Scheduler](docs/PHASE_2_EMBEDDING_PIPELINE.md#celery-workers--scheduler) for queue tuning, reliability settings, and task progress tracking.
|
||||
|
||||
### Daedalus integration endpoints
|
||||
|
||||
These endpoints are used by the Daedalus FastAPI backend (HTTP Basic auth). All under `/library/api/`:
|
||||
|
||||
| Method | Route | Purpose |
|
||||
|--------|-------|---------|
|
||||
| POST | `/workspaces/` | Create a workspace (idempotent on `workspace_id`); body: `{workspace_id, name, library_type, description?}` |
|
||||
| GET | `/workspaces/{workspace_id}/` | Workspace status (item/chunk counts) |
|
||||
| DELETE | `/workspaces/{workspace_id}/` | Delete workspace + reachable content; preserves shared concepts |
|
||||
| POST | `/ingest/` | Queue a file for ingestion + embedding |
|
||||
| GET | `/jobs/{job_id}/` | Poll ingest job status |
|
||||
| POST | `/jobs/{job_id}/retry/` | Re-dispatch a failed job |
|
||||
| GET | `/jobs/?status=&library_uid=` | List recent jobs |
|
||||
|
||||
See [docs/mnemosyne_integration.md](docs/mnemosyne_integration.md) for the full Daedalus contract.
|
||||
|
||||
## Production Deployment
|
||||
|
||||
Production runs as four containers from a single image (built and pushed by [`.gitea/workflows/cve-scan-docker-build.yml`](.gitea/workflows/cve-scan-docker-build.yml) on every push to `main`):
|
||||
|
||||
| Service | Role | Port |
|
||||
|---------|------|------|
|
||||
| `app` | Django REST API + admin (gunicorn) | internal :8000 |
|
||||
| `mcp` | FastMCP server (uvicorn) | internal :22091 |
|
||||
| `worker` | Celery worker — embedding/ingest/batch | — |
|
||||
| `web` | Reverse proxy + static files (nginx) | host :23090 |
|
||||
|
||||
Plus a one-shot `static-init` service that copies `/app/staticfiles` (baked into the image at build time via `collectstatic`) into the shared volume nginx reads from. It runs to completion on every `up`, so static-file changes propagate on each deploy without manual intervention.
|
||||
|
||||
External services (NOT spun up by compose): Postgres on Portia, Neo4j on Umbriel (dedicated Mnemosyne instance), RabbitMQ on Oberon, S3/MinIO on Nyx, Memcached, embedder + reranker. All reached over the internal 10.10.0.0/24 network.
|
||||
|
||||
### Environment scoping
|
||||
|
||||
Each compose service declares *only* the environment variables it actually needs — there is no shared `env_file:`. The rationale:
|
||||
|
||||
- The MCP server (the most exposed surface, because it talks to outside LLMs) should never see the Celery broker URL or the LLM API encryption key. It only needs Postgres, Neo4j, Memcached, S3, and the MCP-specific auth toggle.
|
||||
- The Celery worker has no business knowing `ALLOWED_HOSTS`, `CSRF_TRUSTED_ORIGINS`, `MCP_REQUIRE_AUTH`, or the email backend — it doesn't serve HTTP.
|
||||
- The Django app doesn't need the Daedalus S3 credentials — only the ingest Celery task reads that bucket.
|
||||
- When a shared secret (like the broker password) is mis-configured, the blast radius is limited to the services that actually need that secret, so you can still observe the rest of the stack while debugging.
|
||||
|
||||
Values are interpolated from a `.env` file at the **repo root** (not `mnemosyne/.env`, which is the dev config for bare-Python runs). Copy `.env.example` to `.env` and fill in the blanks, or — in production — have your Ansible role render `.env` from a Jinja2 template with secrets from the vault.
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
$EDITOR .env # fill in SECRET_KEY, DB/RabbitMQ/S3 creds, LLM_API_SECRETS_ENCRYPTION_KEY
|
||||
```
|
||||
|
||||
The per-service surface is defined by the `environment:` blocks in `docker-compose.yaml`; `.env.example` documents every variable with which service(s) consume it.
|
||||
|
||||
> **Broker URL gotcha.** If the RabbitMQ password contains any of `@ : / # % + ? & =` or a space, it must be percent-encoded in `CELERY_BROKER_URL`. Kombu's URL parser is strict, and this is the most common cause of a `PLAIN 403 ACCESS_REFUSED` at worker startup when the same credentials work fine under bare-Python `celery` invocations (because you were probably passing them as kwargs, not a URL).
|
||||
|
||||
### Docker bootstrap order
|
||||
|
||||
Three steps — the first and third are one-liners, the middle step is a
|
||||
manual sit-down in `/admin/` to configure the system embedding model.
|
||||
`setup_neo4j_indexes` is **not** run automatically: it reads vector
|
||||
dimensions from that admin row and hard-fails if the row is missing, so
|
||||
bundling it into the `init` sidecar would make `app` unreachable on
|
||||
first boot. Running it manually after admin configuration is the
|
||||
chicken-and-egg escape.
|
||||
|
||||
```bash
|
||||
# 1. Generate the root .env from the template (or let Ansible do it),
|
||||
# pull the image, and bring the stack up. The `init` sidecar runs
|
||||
# `migrate` + `load_library_types` and exits; `app`, `mcp`, and
|
||||
# `worker` come up healthy.
|
||||
cp .env.example .env && $EDITOR .env
|
||||
docker compose pull
|
||||
docker compose up -d
|
||||
|
||||
# 2. Browse to /admin/llm_manager/llmapi/ and add the embedding provider
|
||||
# (e.g. Pan Synesis, with the right base URL and API key). Then
|
||||
# /admin/llm_manager/llmmodel/ and add one row for the embedding model:
|
||||
# - api = the api you just created
|
||||
# - name = the provider's model name
|
||||
# - vector_dimensions = whatever your embedding provider returns
|
||||
# - is_system_embedding_model = True
|
||||
# Save, then come back to the shell.
|
||||
|
||||
# 3. Create Neo4j vector + full-text indexes at the right dimensions.
|
||||
# Idempotent — re-run after an embedding-model swap with `--drop` to
|
||||
# rebuild, which requires re-embedding all content.
|
||||
docker compose exec app python manage.py setup_neo4j_indexes
|
||||
```
|
||||
|
||||
Until step 3 runs, vector search returns empty results and
|
||||
`library/apps.py` logs a readiness warning each time the app boots. This
|
||||
is deliberate: an index built at the wrong dimension silently breaks
|
||||
every search, so loud failure beats quiet misconfiguration.
|
||||
|
||||
### Day-to-day
|
||||
|
||||
```bash
|
||||
docker compose ps # service status + health
|
||||
docker compose logs -f app # tail Django app logs
|
||||
docker compose logs -f web # tail nginx logs
|
||||
docker compose logs -f worker # tail Celery worker logs
|
||||
docker compose restart mcp # restart just the MCP server
|
||||
|
||||
# After a new image is published:
|
||||
docker compose pull && docker compose up -d
|
||||
```
|
||||
|
||||
### Things to verify in `.env` before bringing up
|
||||
|
||||
The root `.env` (the one compose interpolates from — not `mnemosyne/.env`) needs the following set for a working production deploy:
|
||||
|
||||
- `DEBUG=False`
|
||||
- `USE_LOCAL_STORAGE=False`
|
||||
- `KVDB_LOCATION=<external-memcached-host>:11211` — `127.0.0.1` does not resolve from inside containers
|
||||
- `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` filled in (Mnemosyne's own MinIO bucket)
|
||||
- `DAEDALUS_S3_ACCESS_KEY_ID` / `DAEDALUS_S3_SECRET_ACCESS_KEY` filled in for cross-bucket ingest reads
|
||||
- `CELERY_BROKER_URL` with the RabbitMQ password **percent-encoded** if it contains URL-special characters
|
||||
- `ALLOWED_HOSTS` includes the public hostname HAProxy routes to (e.g. `mnemosyne.ouranos.helu.ca`)
|
||||
- `CSRF_TRUSTED_ORIGINS` includes `https://<same-hostname>`
|
||||
- `LLM_API_SECRETS_ENCRYPTION_KEY` set to a real Fernet key (generated once per environment)
|
||||
|
||||
### Verifying the environment reached a container
|
||||
|
||||
If a service misbehaves on startup — typically the worker with an `AccessRefused` from RabbitMQ, or the app with a DB auth error — the fastest diagnostic is to print what Django actually parsed, since that removes every layer of env-file / interpolation / URL-encoding ambiguity:
|
||||
|
||||
```bash
|
||||
# What broker URL did the worker actually receive?
|
||||
docker compose run --rm --no-deps worker \
|
||||
python -c "from django.conf import settings; print(repr(settings.CELERY_BROKER_URL))"
|
||||
|
||||
# What DB host/user?
|
||||
docker compose run --rm --no-deps app \
|
||||
python -c "from django.conf import settings; print(settings.DATABASES['default'])"
|
||||
```
|
||||
|
||||
The `repr(...)` form surfaces CRLF, trailing whitespace, stray quotes, or characters that should have been percent-encoded.
|
||||
|
||||
### Health probes
|
||||
|
||||
| Endpoint | Probes | Auth |
|
||||
|----------|--------|------|
|
||||
| `GET /live/` | Django process alive (always 200 if gunicorn is up) | None |
|
||||
| `GET /ready/` | PostgreSQL + Memcached reachable (503 if either is down) | None |
|
||||
| `GET /healthz` | MCP server `/mcp/health` — used as the HAProxy `health_path` | None |
|
||||
| `GET /metrics` | Prometheus scrape | Internal networks only |
|
||||
|
||||
> **Trailing slashes matter.** Always use `/live/` and `/ready/` (with the trailing slash). The un-slashed forms (`/live`, `/ready`) trigger Django's `APPEND_SLASH` 301 redirect — health check clients that don't follow redirects will report a failure even when the service is healthy.
|
||||
|
||||
## Architecture Note: Retrieval, Not Synthesis
|
||||
|
||||
|
||||
431
docker-compose.yaml
Normal file
431
docker-compose.yaml
Normal file
@@ -0,0 +1,431 @@
|
||||
# =============================================================================
|
||||
# Mnemosyne — production deployment
|
||||
# =============================================================================
|
||||
# Five services:
|
||||
# init — one-shot sidecar: migrate + collectstatic + load_library_types
|
||||
# app — Django REST API + admin (gunicorn, port 8000)
|
||||
# mcp — FastMCP server (uvicorn, port 8001)
|
||||
# worker — Celery worker (embedding/ingest/batch queues)
|
||||
# web — reverse proxy, public port 23081 (nginx)
|
||||
#
|
||||
# External services (NOT spun up here): Postgres on Despina, Neo4j on Naiad,
|
||||
# RabbitMQ on Thalassa, S3/MinIO on Perseus, Memcached on host. All reached
|
||||
# over the internal network.
|
||||
#
|
||||
# Environment scoping
|
||||
# -------------------
|
||||
# Every service lists ONLY the environment variables it actually needs, with
|
||||
# values interpolated from the shell (the .env at the project root is
|
||||
# generated by Ansible from a j2 template + vault secrets). No `env_file:`
|
||||
# sharing — a compromised MCP container should not see the Celery broker
|
||||
# creds or the LLM API encryption key, and the Celery worker has no business
|
||||
# knowing `ALLOWED_HOSTS`. If you add a new Django setting, decide which
|
||||
# services need it and add it only to those `environment:` blocks.
|
||||
#
|
||||
# Static files
|
||||
# ------------
|
||||
# collectstatic is run by the `init` sidecar on every `up`. Static files are
|
||||
# baked into the image at build time (/app/staticfiles by collectstatic in
|
||||
# the Dockerfile builder stage), then copied to STATIC_ROOT (/mnt/static) by
|
||||
# the init sidecar. nginx serves them directly from that bind-mounted path.
|
||||
# --clear removes stale files from the previous deploy on each run.
|
||||
#
|
||||
# Run:
|
||||
# docker compose up -d
|
||||
#
|
||||
# The `init` sidecar runs migrate + collectstatic + load_library_types on
|
||||
# every `up`. Long-running services wait for it via
|
||||
# `depends_on: init: service_completed_successfully` — so a failure there
|
||||
# (unreachable DB, broken migration) blocks the stack.
|
||||
#
|
||||
# Neo4j vector-index creation is deliberately NOT bundled into `init`.
|
||||
# `setup_neo4j_indexes` requires a system embedding model configured in
|
||||
# the admin, which only exists after first boot — an operator has to land
|
||||
# in /admin/, pick an embedding API + model, and set its vector_dimensions
|
||||
# value. Bootstrap order is therefore:
|
||||
#
|
||||
# 1. docker compose up # init sidecar: migrate + collectstatic + load_library_types
|
||||
# 2. browse to /admin/ → llm_manager → configure system embedding model
|
||||
# 3. docker compose exec app python manage.py setup_neo4j_indexes
|
||||
#
|
||||
# Until step 3, vector search returns empty results. library/apps.py logs
|
||||
# a readiness warning when indexes are missing, so this is visible.
|
||||
# The standalone `migrate` / `setup` entrypoint commands remain available
|
||||
# for ad-hoc ops work (`setup` runs setup_neo4j_indexes + load_library_types
|
||||
# and is the typical re-run target after embedding-model changes).
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Shared logging config — JSON to stdout, picked up by Alloy via the Docker
|
||||
# socket on the host and shipped to Loki. Pinning json-file (Docker's default)
|
||||
# so Alloy's discovery.docker + loki.source.docker on puck sees a consistent
|
||||
# driver across every service, and bounding log retention per container so a
|
||||
# misbehaving service can't fill the disk between Alloy tails.
|
||||
# -----------------------------------------------------------------------------
|
||||
x-logging: &default-logging
|
||||
driver: json-file
|
||||
options:
|
||||
tag: "{{.Name}}"
|
||||
max-size: "10m"
|
||||
max-file: "5"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Shared build config — build the Mnemosyne image locally from ./Dockerfile
|
||||
# instead of pulling from git.helu.ca. All four Mnemosyne services
|
||||
# (init/app/mcp/worker) share `image: mnemosyne:local`, so Compose builds
|
||||
# once and reuses the resulting image across them.
|
||||
# -----------------------------------------------------------------------------
|
||||
x-mnemosyne-build: &mnemosyne-build
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
|
||||
|
||||
services:
|
||||
# ── Init sidecar: one-shot Postgres migrate + collectstatic + library-type seed. Runs on
|
||||
# every `up` and exits. Long-running services below depend on
|
||||
# `service_completed_successfully`, so a failure here (unreachable DB,
|
||||
# broken migration) blocks `app`/`mcp`/`worker` from starting. All
|
||||
# commands are idempotent.
|
||||
#
|
||||
# collectstatic copies static files baked into the image (/app/staticfiles)
|
||||
# into STATIC_ROOT (/mnt/static) so nginx can serve them. --clear removes
|
||||
# stale files from the previous deploy on each run.
|
||||
#
|
||||
# Neo4j vector-index setup is NOT run here — see the header comment for
|
||||
# the operator bootstrap flow. Only library_type seeding touches Neo4j
|
||||
# from this sidecar, and it does not depend on any embedding model.
|
||||
#
|
||||
# This sidecar only needs Postgres, Neo4j, static files, and logging env —
|
||||
# no S3, no Celery, no LLM encryption key. Keep it that way.
|
||||
init:
|
||||
image: mnemosyne:local
|
||||
build: *mnemosyne-build
|
||||
command: ["init"]
|
||||
environment:
|
||||
# Django core (settings import)
|
||||
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
|
||||
- SECRET_KEY=${SECRET_KEY}
|
||||
- DEBUG=${DEBUG}
|
||||
- TIME_ZONE=${TIME_ZONE}
|
||||
- LANGUAGE_CODE=${LANGUAGE_CODE}
|
||||
# Postgres (migrate)
|
||||
- APP_DB_NAME=${APP_DB_NAME}
|
||||
- APP_DB_USER=${APP_DB_USER}
|
||||
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
|
||||
- DB_HOST=${DB_HOST}
|
||||
- DB_PORT=${DB_PORT}
|
||||
# Neo4j (load_library_types writes Library defaults into the graph)
|
||||
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
|
||||
# Static files (collectstatic destination)
|
||||
- STATIC_ROOT=/mnt/static
|
||||
- USE_LOCAL_STORAGE=True
|
||||
# Logging
|
||||
- MNEMOSYNE_COMPONENT=init
|
||||
- LOGGING_LEVEL=${LOGGING_LEVEL}
|
||||
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
|
||||
restart: "no"
|
||||
volumes:
|
||||
- static:/mnt/static
|
||||
logging: *default-logging
|
||||
|
||||
|
||||
# ── App: Django REST API + admin ──────────────────────────────────────────
|
||||
# Serves /library/api/*, /admin/, /live/, /ready/, /metrics. Enqueues
|
||||
# Celery tasks (hence CELERY_BROKER_URL is required here too — Django is
|
||||
# the producer, the worker is the consumer).
|
||||
app:
|
||||
image: mnemosyne:local
|
||||
build: *mnemosyne-build
|
||||
command: ["web"]
|
||||
environment:
|
||||
# Django core
|
||||
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
|
||||
- SECRET_KEY=${SECRET_KEY}
|
||||
- DEBUG=${DEBUG}
|
||||
- ALLOWED_HOSTS=${ALLOWED_HOSTS}
|
||||
- CSRF_TRUSTED_ORIGINS=${CSRF_TRUSTED_ORIGINS}
|
||||
- TIME_ZONE=${TIME_ZONE}
|
||||
- LANGUAGE_CODE=${LANGUAGE_CODE}
|
||||
- STATIC_ROOT=/mnt/static
|
||||
- MEDIA_ROOT=/mnt/media
|
||||
# Postgres (Django ORM)
|
||||
- APP_DB_NAME=${APP_DB_NAME}
|
||||
- APP_DB_USER=${APP_DB_USER}
|
||||
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
|
||||
- DB_HOST=${DB_HOST}
|
||||
- DB_PORT=${DB_PORT}
|
||||
# Neo4j (knowledge graph + vectors)
|
||||
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
|
||||
# Memcached (readiness probe, theme/notification cache)
|
||||
- KVDB_LOCATION=${KVDB_LOCATION}
|
||||
- KVDB_PREFIX=${KVDB_PREFIX}
|
||||
# S3 (Django storage backend — chunk text, item files)
|
||||
- USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE}
|
||||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
||||
- AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME}
|
||||
- AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL}
|
||||
- AWS_S3_USE_SSL=${AWS_S3_USE_SSL}
|
||||
- AWS_S3_VERIFY=${AWS_S3_VERIFY}
|
||||
- AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME}
|
||||
# Celery (Django enqueues tasks; does NOT consume)
|
||||
- CELERY_BROKER_URL=${CELERY_BROKER_URL}
|
||||
- CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND}
|
||||
- CELERY_TASK_ALWAYS_EAGER=${CELERY_TASK_ALWAYS_EAGER}
|
||||
# LLM API secrets (admin + DRF pages decrypt stored provider API keys)
|
||||
- LLM_API_SECRETS_ENCRYPTION_KEY=${LLM_API_SECRETS_ENCRYPTION_KEY}
|
||||
# Email
|
||||
- EMAIL_HOST=${EMAIL_HOST}
|
||||
- EMAIL_PORT=${EMAIL_PORT}
|
||||
- EMAIL_USE_TLS=${EMAIL_USE_TLS}
|
||||
# Search & re-ranker (serves /library/api/search)
|
||||
- SEARCH_VECTOR_TOP_K=${SEARCH_VECTOR_TOP_K}
|
||||
- SEARCH_FULLTEXT_TOP_K=${SEARCH_FULLTEXT_TOP_K}
|
||||
- SEARCH_GRAPH_MAX_DEPTH=${SEARCH_GRAPH_MAX_DEPTH}
|
||||
- SEARCH_RRF_K=${SEARCH_RRF_K}
|
||||
- SEARCH_DEFAULT_LIMIT=${SEARCH_DEFAULT_LIMIT}
|
||||
- RERANKER_MAX_CANDIDATES=${RERANKER_MAX_CANDIDATES}
|
||||
- RERANKER_TIMEOUT=${RERANKER_TIMEOUT}
|
||||
# SSO / Casdoor (app only — only this service renders the login page
|
||||
# and initiates the OIDC flow; worker and mcp never touch OAuth)
|
||||
- CASDOOR_ENABLED=${CASDOOR_ENABLED}
|
||||
- CASDOOR_ORIGIN=${CASDOOR_ORIGIN}
|
||||
- CASDOOR_ORIGIN_FRONTEND=${CASDOOR_ORIGIN_FRONTEND}
|
||||
- CASDOOR_CLIENT_ID=${CASDOOR_CLIENT_ID}
|
||||
- CASDOOR_CLIENT_SECRET=${CASDOOR_CLIENT_SECRET}
|
||||
- CASDOOR_ORG_NAME=${CASDOOR_ORG_NAME}
|
||||
- CASDOOR_SSL_VERIFY=${CASDOOR_SSL_VERIFY}
|
||||
- ALLOW_LOCAL_LOGIN=${ALLOW_LOCAL_LOGIN}
|
||||
# Logging
|
||||
- MNEMOSYNE_COMPONENT=app
|
||||
- LOGGING_LEVEL=${LOGGING_LEVEL}
|
||||
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
|
||||
restart: unless-stopped
|
||||
logging: *default-logging
|
||||
depends_on:
|
||||
init:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- static:/mnt/static
|
||||
- media:/mnt/media
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/ready/"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
|
||||
# ── MCP server: FastMCP Streamable HTTP at /mcp/ ───────────────────────────
|
||||
# Read-only LLM-facing surface. Intentionally excluded:
|
||||
# CELERY_BROKER_URL — MCP must not enqueue tasks
|
||||
# LLM_API_SECRETS_ENCRYPTION_KEY — MCP must not decrypt stored provider keys
|
||||
# DAEDALUS_S3_* — MCP does not ingest
|
||||
# CSRF_TRUSTED_ORIGINS — MCP does not accept browser forms
|
||||
# EMAIL_* — MCP does not send mail
|
||||
# EMBEDDING_* (batch/timeout) — MCP does not embed
|
||||
# S3 vars ARE passed so STORAGES initialises identically to the app container
|
||||
# (simpler to reason about than having mcp use FileSystemStorage while the
|
||||
# rest of the stack uses S3). MCP is read-only at the application layer so
|
||||
# the S3 key here only matters if someone exploits a write path in the
|
||||
# future — keep the credential scoped to read-only in your secret manager.
|
||||
mcp:
|
||||
image: mnemosyne:local
|
||||
build: *mnemosyne-build
|
||||
command: ["mcp"]
|
||||
environment:
|
||||
# Django core (ASGI still imports settings)
|
||||
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
|
||||
- SECRET_KEY=${SECRET_KEY}
|
||||
- DEBUG=${DEBUG}
|
||||
- ALLOWED_HOSTS=${ALLOWED_HOSTS}
|
||||
- TIME_ZONE=${TIME_ZONE}
|
||||
- LANGUAGE_CODE=${LANGUAGE_CODE}
|
||||
- STATIC_ROOT=/mnt/static
|
||||
- MEDIA_ROOT=/mnt/media
|
||||
# Postgres (McpToken lookup lives in Django ORM)
|
||||
- APP_DB_NAME=${APP_DB_NAME}
|
||||
- APP_DB_USER=${APP_DB_USER}
|
||||
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
|
||||
- DB_HOST=${DB_HOST}
|
||||
- DB_PORT=${DB_PORT}
|
||||
# Neo4j (search + get_chunk)
|
||||
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
|
||||
# Memcached
|
||||
- KVDB_LOCATION=${KVDB_LOCATION}
|
||||
- KVDB_PREFIX=${KVDB_PREFIX}
|
||||
# S3 (same block as app — STORAGES must initialise identically)
|
||||
- USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE}
|
||||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
||||
- AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME}
|
||||
- AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL}
|
||||
- AWS_S3_USE_SSL=${AWS_S3_USE_SSL}
|
||||
- AWS_S3_VERIFY=${AWS_S3_VERIFY}
|
||||
- AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME}
|
||||
# MCP-specific
|
||||
- MCP_REQUIRE_AUTH=${MCP_REQUIRE_AUTH}
|
||||
# Search & re-ranker (the `search` MCP tool uses these)
|
||||
- SEARCH_VECTOR_TOP_K=${SEARCH_VECTOR_TOP_K}
|
||||
- SEARCH_FULLTEXT_TOP_K=${SEARCH_FULLTEXT_TOP_K}
|
||||
- SEARCH_GRAPH_MAX_DEPTH=${SEARCH_GRAPH_MAX_DEPTH}
|
||||
- SEARCH_RRF_K=${SEARCH_RRF_K}
|
||||
- SEARCH_DEFAULT_LIMIT=${SEARCH_DEFAULT_LIMIT}
|
||||
- RERANKER_MAX_CANDIDATES=${RERANKER_MAX_CANDIDATES}
|
||||
- RERANKER_TIMEOUT=${RERANKER_TIMEOUT}
|
||||
# Logging
|
||||
- MNEMOSYNE_COMPONENT=mcp
|
||||
- LOGGING_LEVEL=${LOGGING_LEVEL}
|
||||
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
|
||||
restart: unless-stopped
|
||||
logging: *default-logging
|
||||
depends_on:
|
||||
init:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- media:/mnt/media
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8001/mcp/health"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
|
||||
# ── Celery worker: embedding + ingest + batch queues ───────────────────────
|
||||
# Consumer side of the queue. Needs the full S3 block (reads Daedalus's
|
||||
# bucket, writes to Mnemosyne's), the LLM API encryption key (ingest calls
|
||||
# vision models via stored provider keys), and both broker URL + result
|
||||
# backend. Does NOT need HTTP-layer settings (ALLOWED_HOSTS, CSRF, MCP auth)
|
||||
# or search tuning (the worker never serves queries).
|
||||
worker:
|
||||
image: mnemosyne:local
|
||||
build: *mnemosyne-build
|
||||
command: ["worker"]
|
||||
environment:
|
||||
# Django core (Celery imports settings)
|
||||
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
|
||||
- SECRET_KEY=${SECRET_KEY}
|
||||
- DEBUG=${DEBUG}
|
||||
- TIME_ZONE=${TIME_ZONE}
|
||||
- LANGUAGE_CODE=${LANGUAGE_CODE}
|
||||
- STATIC_ROOT=/mnt/static
|
||||
- MEDIA_ROOT=/mnt/media
|
||||
# Postgres
|
||||
- APP_DB_NAME=${APP_DB_NAME}
|
||||
- APP_DB_USER=${APP_DB_USER}
|
||||
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
|
||||
- DB_HOST=${DB_HOST}
|
||||
- DB_PORT=${DB_PORT}
|
||||
# Neo4j (graph writes during embed/ingest)
|
||||
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
|
||||
# Memcached (task progress cache)
|
||||
- KVDB_LOCATION=${KVDB_LOCATION}
|
||||
- KVDB_PREFIX=${KVDB_PREFIX}
|
||||
# S3 — Mnemosyne's own bucket (chunk text writes, item file storage)
|
||||
- USE_LOCAL_STORAGE=${USE_LOCAL_STORAGE}
|
||||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
||||
- AWS_STORAGE_BUCKET_NAME=${AWS_STORAGE_BUCKET_NAME}
|
||||
- AWS_S3_ENDPOINT_URL=${AWS_S3_ENDPOINT_URL}
|
||||
- AWS_S3_USE_SSL=${AWS_S3_USE_SSL}
|
||||
- AWS_S3_VERIFY=${AWS_S3_VERIFY}
|
||||
- AWS_S3_REGION_NAME=${AWS_S3_REGION_NAME}
|
||||
# Daedalus S3 — cross-bucket reads for ingest (worker-only)
|
||||
- DAEDALUS_S3_ENDPOINT_URL=${DAEDALUS_S3_ENDPOINT_URL}
|
||||
- DAEDALUS_S3_ACCESS_KEY_ID=${DAEDALUS_S3_ACCESS_KEY_ID}
|
||||
- DAEDALUS_S3_SECRET_ACCESS_KEY=${DAEDALUS_S3_SECRET_ACCESS_KEY}
|
||||
- DAEDALUS_S3_BUCKET_NAME=${DAEDALUS_S3_BUCKET_NAME}
|
||||
- DAEDALUS_S3_REGION_NAME=${DAEDALUS_S3_REGION_NAME}
|
||||
- DAEDALUS_S3_USE_SSL=${DAEDALUS_S3_USE_SSL}
|
||||
- DAEDALUS_S3_VERIFY=${DAEDALUS_S3_VERIFY}
|
||||
- SPELUNKER_S3_ENDPOINT_URL=${SPELUNKER_S3_ENDPOINT_URL}
|
||||
- SPELUNKER_S3_ACCESS_KEY_ID=${SPELUNKER_S3_ACCESS_KEY_ID}
|
||||
- SPELUNKER_S3_SECRET_ACCESS_KEY=${SPELUNKER_S3_SECRET_ACCESS_KEY}
|
||||
- SPELUNKER_S3_BUCKET_NAME=${SPELUNKER_S3_BUCKET_NAME}
|
||||
- SPELUNKER_S3_REGION_NAME=${SPELUNKER_S3_REGION_NAME}
|
||||
- SPELUNKER_S3_USE_SSL=${SPELUNKER_S3_USE_SSL}
|
||||
- SPELUNKER_S3_VERIFY=${SPELUNKER_S3_VERIFY}
|
||||
# Celery / RabbitMQ
|
||||
- CELERY_BROKER_URL=${CELERY_BROKER_URL}
|
||||
- CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND}
|
||||
- CELERY_TASK_ALWAYS_EAGER=${CELERY_TASK_ALWAYS_EAGER}
|
||||
# Worker tuning (entrypoint.sh reads these)
|
||||
- CELERY_QUEUES=${CELERY_QUEUES}
|
||||
- CELERY_CONCURRENCY=${CELERY_CONCURRENCY}
|
||||
- CELERY_LOG_LEVEL=${CELERY_LOGGING_LEVEL}
|
||||
# LLM API secrets (ingest vision pass decrypts stored provider keys)
|
||||
- LLM_API_SECRETS_ENCRYPTION_KEY=${LLM_API_SECRETS_ENCRYPTION_KEY}
|
||||
# Embedding pipeline
|
||||
- EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE}
|
||||
- EMBEDDING_TIMEOUT=${EMBEDDING_TIMEOUT}
|
||||
# Logging
|
||||
- MNEMOSYNE_COMPONENT=worker
|
||||
- LOGGING_LEVEL=${LOGGING_LEVEL}
|
||||
- CELERY_LOGGING_LEVEL=${CELERY_LOGGING_LEVEL}
|
||||
restart: unless-stopped
|
||||
logging: *default-logging
|
||||
depends_on:
|
||||
app:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- media:/mnt/media
|
||||
healthcheck:
|
||||
test: ["CMD", "celery", "-A", "mnemosyne", "inspect", "ping", "-d", "celery@$$HOSTNAME"]
|
||||
interval: 60s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
# ── Web: nginx reverse proxy, public port 23081 ────────────────────────────
|
||||
# No Django env — nginx only knows how to route. Public listener is
|
||||
# templated into the conf file by Ansible if the port ever needs to change.
|
||||
web:
|
||||
image: nginx:alpine
|
||||
restart: unless-stopped
|
||||
logging: *default-logging
|
||||
depends_on:
|
||||
app:
|
||||
condition: service_healthy
|
||||
mcp:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "23081:80"
|
||||
volumes:
|
||||
- ./nginx/mnemosyne.conf:/etc/nginx/conf.d/default.conf:ro
|
||||
- static:/var/www/static:ro
|
||||
- media:/var/www/media:ro
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost/live/"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
# ── Web metrics: nginx-prometheus-exporter ─────────────────────────────────
|
||||
# Scrapes the `web` container's stub_status endpoint and re-exposes it in
|
||||
# Prometheus format on 9113. Prospero (Sao) scrapes this; see
|
||||
# virgo/ansible/pplg/prometheus.yml.j2 → job_name: 'mnemosyne'.
|
||||
# The Django /metrics endpoint (django-prometheus + custom pipeline metrics
|
||||
# in mcp_server/metrics.py and library/metrics.py) is reached separately
|
||||
# via nginx at /metrics — no sidecar needed for that.
|
||||
web-metrics:
|
||||
image: nginx/nginx-prometheus-exporter:latest
|
||||
command:
|
||||
- --nginx.scrape-uri
|
||||
- http://web:80/nginx_status
|
||||
depends_on:
|
||||
web:
|
||||
condition: service_started
|
||||
ports:
|
||||
- "23191:9113"
|
||||
restart: unless-stopped
|
||||
logging: *default-logging
|
||||
|
||||
volumes:
|
||||
# Static files written by collectstatic (run by the init sidecar on every
|
||||
# `up`). Docker-managed volume — no host path needed; storage is minimal
|
||||
# and auto-regenerated on every `up`.
|
||||
static:
|
||||
# Media files. Production uses USE_LOCAL_STORAGE=False (S3) so this volume
|
||||
# is effectively unused — kept so the mount points in services don't break.
|
||||
media:
|
||||
102
docker/entrypoint.sh
Normal file
102
docker/entrypoint.sh
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/bin/sh
|
||||
# Mnemosyne container entrypoint.
|
||||
#
|
||||
# The same image runs all three processes — the compose service supplies
|
||||
# `web`, `mcp`, `worker`, or `migrate` as CMD.
|
||||
|
||||
set -e
|
||||
|
||||
case "$1" in
|
||||
web)
|
||||
# Django REST API + admin (gunicorn → wsgi).
|
||||
exec gunicorn \
|
||||
--config /app/docker/gunicorn.conf.py \
|
||||
--bind 0.0.0.0:8000 \
|
||||
--workers "${GUNICORN_WORKERS:-3}" \
|
||||
--access-logfile - \
|
||||
--error-logfile - \
|
||||
mnemosyne.wsgi:application
|
||||
;;
|
||||
|
||||
mcp)
|
||||
# FastMCP over Streamable HTTP at /mcp/, mounted by mnemosyne.asgi.
|
||||
exec uvicorn \
|
||||
--host 0.0.0.0 \
|
||||
--port 8001 \
|
||||
--workers "${UVICORN_WORKERS:-1}" \
|
||||
mnemosyne.asgi:app
|
||||
;;
|
||||
|
||||
worker)
|
||||
# Celery worker covering embedding + ingest + batch + default queues.
|
||||
# In production you may want to split these onto separate worker
|
||||
# services for queue-level isolation; one process is fine to start.
|
||||
exec celery -A mnemosyne worker \
|
||||
--loglevel="${CELERY_LOG_LEVEL:-info}" \
|
||||
--queues="${CELERY_QUEUES:-celery,embedding,batch}" \
|
||||
--concurrency="${CELERY_CONCURRENCY:-2}"
|
||||
;;
|
||||
|
||||
beat)
|
||||
# Celery scheduled tasks (only needed if/when periodic jobs are wired).
|
||||
exec celery -A mnemosyne beat \
|
||||
--loglevel="${CELERY_LOG_LEVEL:-info}"
|
||||
;;
|
||||
|
||||
migrate)
|
||||
# One-shot DB migration runner — invoke before bringing services up
|
||||
# for the first time or after a deploy.
|
||||
exec python manage.py migrate --noinput
|
||||
;;
|
||||
|
||||
setup)
|
||||
# One-shot init — Neo4j indexes + library_type seed data. Run this
|
||||
# manually after the system embedding model has been configured in the
|
||||
# admin (setup_neo4j_indexes reads vector dimensions from that row).
|
||||
python manage.py setup_neo4j_indexes
|
||||
python manage.py load_library_types
|
||||
;;
|
||||
|
||||
init)
|
||||
# Bundled one-shot init run by the `init` sidecar on every
|
||||
# `docker compose up`. Idempotent: re-runs are no-ops unless migrations
|
||||
# or library_type defaults need to change. A non-zero exit here blocks
|
||||
# `app`, `mcp`, and `worker` from starting.
|
||||
#
|
||||
# collectstatic copies the static files baked into the image at build
|
||||
# time (/app/staticfiles) into STATIC_ROOT (/mnt/static), which nginx
|
||||
# serves directly. --clear removes any stale files from the previous
|
||||
# deploy before copying, so deleted assets don't linger.
|
||||
#
|
||||
# Neo4j vector-index creation is *deliberately not* bundled here. That
|
||||
# command (``setup_neo4j_indexes``) requires a system embedding model
|
||||
# with a configured ``vector_dimensions`` value, and that model is
|
||||
# data an operator configures through the Django admin after first
|
||||
# boot. On a fresh stack there is no such row yet, so blocking the
|
||||
# whole stack on it would make the admin unreachable — a chicken-and-
|
||||
# egg. Operator bootstrap flow:
|
||||
#
|
||||
# 1. docker compose up # init sidecar: migrate + collectstatic + load_library_types
|
||||
# 2. browse to admin, configure system embedding model
|
||||
# 3. docker compose exec app python manage.py setup_neo4j_indexes
|
||||
#
|
||||
# Until step 3 runs, vector search will return empty results — the
|
||||
# readiness check in library/apps.py logs a warning when indexes are
|
||||
# missing so this is visible, not silent.
|
||||
set -e
|
||||
python manage.py migrate --noinput
|
||||
python manage.py collectstatic --noinput --clear
|
||||
python manage.py load_library_types
|
||||
;;
|
||||
|
||||
|
||||
shell)
|
||||
# Drop into the management shell for ad-hoc work.
|
||||
exec python manage.py shell
|
||||
;;
|
||||
|
||||
*)
|
||||
# Fall through: run whatever was passed (e.g. `manage.py <cmd>`).
|
||||
exec "$@"
|
||||
;;
|
||||
esac
|
||||
52
docker/gunicorn.conf.py
Normal file
52
docker/gunicorn.conf.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import logging
|
||||
import re
|
||||
|
||||
_PROBE_PATH = re.compile(
|
||||
r"^(?:/live|/ready|/metrics|/healthz|/health[^ ]*|/ping)/?(?:\?|$)"
|
||||
)
|
||||
|
||||
|
||||
class _ProbePathFilter(logging.Filter):
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
request = getattr(record, "args", None)
|
||||
if isinstance(request, dict):
|
||||
path = request.get("U") or request.get("r", "")
|
||||
else:
|
||||
path = record.getMessage()
|
||||
return not _PROBE_PATH.search(path)
|
||||
|
||||
|
||||
_filter = _ProbePathFilter()
|
||||
|
||||
|
||||
def on_starting(server):
|
||||
logging.getLogger("gunicorn.access").addFilter(_filter)
|
||||
|
||||
|
||||
def post_worker_init(worker):
|
||||
logging.getLogger("gunicorn.access").addFilter(_filter)
|
||||
from library.apps import _run_startup_probe, _should_skip_probe
|
||||
|
||||
if not _should_skip_probe():
|
||||
try:
|
||||
_run_startup_probe()
|
||||
except Exception as exc:
|
||||
logging.getLogger("library.apps").warning(
|
||||
"Startup probe crashed: %s", exc, exc_info=True
|
||||
)
|
||||
|
||||
|
||||
def worker_exit(server, worker):
|
||||
# Neomodel lazily creates a neo4j.Driver on first cypher_query and
|
||||
# holds it for the process lifetime. Newer neo4j drivers warn (and
|
||||
# will eventually fail to clean up) if the driver is destroyed
|
||||
# without an explicit close. Close it here so each gunicorn worker
|
||||
# shuts down cleanly.
|
||||
try:
|
||||
from neomodel import db
|
||||
|
||||
db.close_connection()
|
||||
except Exception as exc:
|
||||
logging.getLogger("neomodel").warning(
|
||||
"Failed to close neomodel driver on worker exit: %s", exc
|
||||
)
|
||||
795
docs/DAEDALUS_PALLAS_INTEGRATION_v1.md
Normal file
795
docs/DAEDALUS_PALLAS_INTEGRATION_v1.md
Normal file
@@ -0,0 +1,795 @@
|
||||
# Daedalus ↔ Pallas ↔ Mnemosyne Integration — v1
|
||||
|
||||
**Status:** Draft / approved design
|
||||
**Authoritative home:** `mnemosyne/docs/DAEDALUS_PALLAS_INTEGRATION_v1.md`
|
||||
**Versioning:** subsequent major revisions ship as `..._v2.md`, `..._v3.md`
|
||||
alongside this file rather than overwriting it. Cross-service docs
|
||||
(Daedalus, Pallas) link here rather than duplicating.
|
||||
|
||||
---
|
||||
|
||||
## 1. Summary
|
||||
|
||||
This document describes the end-state authentication / authorization
|
||||
model connecting three services:
|
||||
|
||||
* **Mnemosyne** — knowledge platform. Owns Libraries, users, and the
|
||||
MCP surface third-party clients query.
|
||||
* **Daedalus** — workspace + file-lifecycle UI. Single-user per
|
||||
instance. Registers Pallas instances, syncs file content to
|
||||
Mnemosyne, drives chat.
|
||||
* **Pallas** — FastAgent-backed MCP host that exposes agent teams
|
||||
(Kottos, Mentor, Iolaus, Daedalus-chat, …) as HTTP MCP servers.
|
||||
|
||||
The model replaces the per-turn JWT *forwarding* scheme with a unified
|
||||
**bearer → resolved library set** abstraction. Every authenticated
|
||||
Mnemosyne request resolves to a single ordered `resolved_libraries`
|
||||
list of Library UIDs the caller may read; the principal type (opaque
|
||||
`MCPToken`, Daedalus per-turn JWT, team JWT) only determines how that
|
||||
list is derived. `Library.workspace_id` is a Daedalus content-routing
|
||||
attribute used by the ingest and workspace-lifecycle APIs; it is **not**
|
||||
consulted by the auth layer.
|
||||
|
||||
It also records the UX shift in Daedalus: **workspaces attach Teams
|
||||
(Pallas instances), not individual agents**; the agent picker in chat
|
||||
is filtered by the workspace's attached teams.
|
||||
|
||||
---
|
||||
|
||||
## 2. Motivation
|
||||
|
||||
The previous design forwarded a Daedalus-minted per-turn JWT through
|
||||
Pallas to Mnemosyne via a custom `_DynamicBearerAuth`, a `ContextVar`,
|
||||
a YAML scanner (`_refresh_forward_servers`), a header-mutation
|
||||
monkey-patch, and three trace wrappers in
|
||||
`pallas/pallas/_fastagent_patch.py`. When something broke on this
|
||||
path, tracebacks landed nowhere visible because fast-agent's
|
||||
`MCPAggregator._execute_on_server.try_execute` swallowed exceptions
|
||||
(`except Exception as e: logger.error(…str(e)…); return error_factory(...)`),
|
||||
and we were monkey-patching under it.
|
||||
|
||||
The failure mode (agent teams like Harper going into infinite
|
||||
token-burning loops when Mnemosyne was unhappy) combined with the
|
||||
diagnostic opacity made this architecture unsustainable. Per-turn
|
||||
forwarding was also the wrong shape for non-interactive agent teams
|
||||
that have no user session per call.
|
||||
|
||||
This design eliminates forwarding. Each Pallas deployment carries a
|
||||
static, long-lived team JWT in its own `fastagent.secrets.yaml`. No
|
||||
custom transport code in Pallas. Authorization happens server-side in
|
||||
Mnemosyne against live DB rows.
|
||||
|
||||
---
|
||||
|
||||
## 3. Architecture
|
||||
|
||||
### 3.1 Services and responsibilities
|
||||
|
||||
| Service | Role in auth model |
|
||||
|---|---|
|
||||
| **Mnemosyne** | Owns Libraries, Library memberships, MCPTokens, Teams, TeamWorkspaceAssignments, signing keys. Validates bearers. Resolves every authenticated request to a Library set. |
|
||||
| **Daedalus** | Control plane. Registers Pallas instances as Teams in Mnemosyne. Manages workspace ↔ team attachments. Stores team JWTs for copying into Pallas deployment configs. |
|
||||
| **Pallas** | Stateless MCP host. Holds a static team JWT in `fastagent.secrets.yaml`. No custom auth-forwarding code. |
|
||||
|
||||
### 3.2 Three credential types
|
||||
|
||||
Every Mnemosyne MCP call presents a Bearer token that falls into one
|
||||
of three categories:
|
||||
|
||||
| # | Credential | `iss` | Issuer | Lifetime | Library scope source |
|
||||
|---|---|---|---|---|---|
|
||||
| 1 | **Opaque `MCPToken`** | n/a | Mnemosyne admin | Until revoked | `MCPToken.allowed_libraries` (M2M, set at mint) |
|
||||
| 2 | **Per-turn JWT** | `daedalus` | Daedalus | Minutes | `libs` claim (baked in at mint) |
|
||||
| 3 | **Team JWT** | `mnemosyne` | Mnemosyne | 10 years | Live DB lookup via `Team.workspaces → Library` |
|
||||
|
||||
Category 2 is used only by Daedalus chat. Once Daedalus-chat itself
|
||||
registers as a Pallas Team in Phase 4, category 2 retires entirely
|
||||
and the design collapses to two credential types.
|
||||
|
||||
### 3.3 Resolved-library abstraction
|
||||
|
||||
Mnemosyne's auth middleware populates a single
|
||||
`resolved_libraries: list[str]` per request. Downstream code (search,
|
||||
get_chunk, list_libraries, list_collections, list_items, …) only
|
||||
reads that list; it does not care where it came from.
|
||||
|
||||
```
|
||||
Bearer → classify → dispatch
|
||||
├─ Opaque MCPToken → token.allowed_libraries (JSON list of UIDs)
|
||||
├─ per-turn JWT → claims["libs"]
|
||||
└─ team JWT (typ=team) → live DB join:
|
||||
TeamWorkspaceAssignment.workspace_id
|
||||
→ Library.workspace_id → Library.uid
|
||||
↓
|
||||
resolved_libraries: list[str]
|
||||
↓
|
||||
downstream tools
|
||||
```
|
||||
|
||||
Fail-closed: if the resolution produces an empty list, the request
|
||||
sees no Libraries. There is no "empty means everything" fallback.
|
||||
|
||||
#### 3.3.1 Retirement of the old three-branch scope clause
|
||||
|
||||
The pre-phase-2 search pipeline ran every Cypher query against a
|
||||
`_WORKSPACE_SCOPE_CLAUSE` with three branches keyed on whether
|
||||
`workspace_id` and/or `allowed_libraries` were set. Phase 2 removes
|
||||
that clause entirely. Every authorization check collapses to:
|
||||
|
||||
```cypher
|
||||
WHERE lib.uid IN $resolved_libraries
|
||||
```
|
||||
|
||||
`Library.workspace_id` stays on the node as a Daedalus content-routing
|
||||
attribute (used by the ingest API to find-or-create the per-workspace
|
||||
Library, and by the workspace-lifecycle API to cascade-delete that
|
||||
Library's contents). It is **not** an authorization axis and is not
|
||||
consulted anywhere in the auth middleware, the MCP tool surface, or
|
||||
the search service.
|
||||
|
||||
Admin-UI-initiated searches (Django staff logged into the Mnemosyne
|
||||
admin / search page) materialize `resolved_libraries` explicitly as
|
||||
"every Library UID the database contains" — the same mechanism used
|
||||
today as a workaround, now the only code path.
|
||||
|
||||
---
|
||||
|
||||
## 4. Data model
|
||||
|
||||
### 4.1 Mnemosyne additions
|
||||
|
||||
#### `LibraryMembership` (new)
|
||||
```python
|
||||
class LibraryMembership(models.Model):
|
||||
user = FK(User, related_name="library_memberships")
|
||||
library_uid = CharField(max_length=64, db_index=True) # neo4j Library.uid
|
||||
role = CharField(choices=[("owner","owner"),
|
||||
("manager","manager"),
|
||||
("reader","reader")])
|
||||
created_at = DateTimeField(auto_now_add=True)
|
||||
class Meta:
|
||||
unique_together = ("user", "library_uid", "role")
|
||||
```
|
||||
- **owner** — full control: rename, delete, reassign ownership,
|
||||
grant/revoke any role.
|
||||
- **manager** — can grant `reader` and scope Library into MCPTokens
|
||||
they own, but cannot delete the library or remove other owners.
|
||||
- **reader** — can read via their own MCPTokens; cannot grant to other
|
||||
users.
|
||||
|
||||
User can scope a Library into `MCPToken.allowed_libraries` iff they
|
||||
have `owner` or `manager` role on it.
|
||||
|
||||
#### `MCPToken.allowed_libraries` (new field on existing model)
|
||||
```python
|
||||
# JSON list of Library.uid strings. A real M2M isn't possible because
|
||||
# Library lives in Neo4j (neomodel StructuredNode), not Django's ORM.
|
||||
# The admin/dashboard form materializes the picker by querying
|
||||
# Library.nodes and filtering to libraries where the token's user has
|
||||
# an ``owner`` or ``manager`` LibraryMembership.
|
||||
allowed_libraries = models.JSONField(default=list, blank=True)
|
||||
```
|
||||
Fail-closed: empty → token grants access to zero libraries.
|
||||
Admin form filters the picker by the current user's owned/managed
|
||||
memberships.
|
||||
|
||||
#### `Team` (new)
|
||||
```python
|
||||
class Team(models.Model):
|
||||
id = UUIDField(primary_key=True, editable=False)
|
||||
# = Daedalus PallasInstance.id. Stays stable across redeploy /
|
||||
# rehost of a given Pallas instance.
|
||||
name = CharField(max_length=200) # display; mirrored from Daedalus
|
||||
active = BooleanField(default=True)
|
||||
active_jti = UUIDField(null=True) # current valid jti
|
||||
created_at = DateTimeField(auto_now_add=True)
|
||||
updated_at = DateTimeField(auto_now=True)
|
||||
```
|
||||
|
||||
#### `TeamWorkspaceAssignment` (new)
|
||||
```python
|
||||
class TeamWorkspaceAssignment(models.Model):
|
||||
team = FK(Team, related_name="workspace_assignments",
|
||||
on_delete=CASCADE)
|
||||
workspace_id = CharField(max_length=64) # matches Library.workspace_id
|
||||
created_at = DateTimeField(auto_now_add=True)
|
||||
class Meta:
|
||||
unique_together = ("team", "workspace_id")
|
||||
```
|
||||
|
||||
No library-level assignment for teams. Teams gain access to *all*
|
||||
libraries of their assigned workspaces. If finer control is ever
|
||||
needed later, it layers on without disturbing this model.
|
||||
|
||||
#### `MCPSigningKey` (existing, unchanged)
|
||||
Re-used to sign team JWTs. The same signing key can back both
|
||||
per-turn tokens (pre-retirement) and team tokens (long-lived).
|
||||
|
||||
### 4.2 Daedalus additions
|
||||
|
||||
#### `PallasInstance.team_jwt_encrypted` (new column, text, Fernet)
|
||||
Stores the team JWT received from Mnemosyne at registration time.
|
||||
Fernet-encrypted at rest using the same pattern as
|
||||
`daedalus/llm_manager/encryption.py`. Displayed plaintext exactly once
|
||||
in the admin detail page immediately after provisioning, so the
|
||||
operator can copy it into `fastagent.secrets.yaml` on the Pallas
|
||||
deployment.
|
||||
|
||||
#### `PallasInstance.pallas_team_mnemosyne_status` (new column)
|
||||
`NULL | "pending" | "provisioned" | "failed"`. Drives the reconciler;
|
||||
analogous to the existing `WorkspaceFile.mnemosyne_status`.
|
||||
|
||||
#### `workspace_pallas_assignments` (new M2M table)
|
||||
```sql
|
||||
CREATE TABLE workspace_pallas_assignments (
|
||||
workspace_id TEXT NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
|
||||
pallas_instance_id TEXT NOT NULL REFERENCES pallas_instances(id) ON DELETE CASCADE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
PRIMARY KEY (workspace_id, pallas_instance_id)
|
||||
);
|
||||
```
|
||||
|
||||
Starts empty on workspace create. Operator explicitly attaches Pallas
|
||||
instances (Teams) before any Mnemosyne-backed agent — including
|
||||
Daedalus chat — can search that workspace.
|
||||
|
||||
### 4.3 Nothing changes in Pallas
|
||||
After cleanup, `pallas/pallas/_fastagent_patch.py` either becomes an
|
||||
empty placeholder or is removed entirely. `pallas/__init__.py` no
|
||||
longer invokes `install()`. Pallas deployments configure stock
|
||||
fast-agent with a static `Authorization: Bearer <team-jwt>`.
|
||||
|
||||
---
|
||||
|
||||
## 5. JWT claim shapes
|
||||
|
||||
### 5.1 Per-turn JWT (category 2 — legacy, retires in Phase 4)
|
||||
```json
|
||||
{
|
||||
"iss": "daedalus",
|
||||
"aud": "mnemosyne", // optional, not enforced
|
||||
"sub": "daedalus-chat",
|
||||
"iat": 1715000000,
|
||||
"exp": 1715000600, // ≤ 10 minutes
|
||||
"jti": "uuid4",
|
||||
"ws": "ws_abc", // Daedalus workspace id
|
||||
"libs": ["lib_xxx", "lib_yyy"] // user-managed libraries
|
||||
}
|
||||
```
|
||||
Kept unchanged during Phase 2–3 so Daedalus chat continues to work
|
||||
while we ship the team infrastructure.
|
||||
|
||||
### 5.2 Team JWT (category 3 — new)
|
||||
```json
|
||||
{
|
||||
"iss": "mnemosyne",
|
||||
"aud": "mnemosyne",
|
||||
"sub": "team:<pallas_instance_uuid>", // UUID; Daedalus id
|
||||
"typ": "team", // distinguishes from per-turn
|
||||
"iat": 1715000000,
|
||||
"exp": 1976000000, // +10 years
|
||||
"jti": "uuid4"
|
||||
}
|
||||
```
|
||||
No `ws`, no `libs`. Authorization is evaluated live against
|
||||
`TeamWorkspaceAssignment` rows on every request.
|
||||
|
||||
### 5.3 Validator changes in `mcp_server/auth.py`
|
||||
|
||||
```python
|
||||
# Accept both issuers; distinguish paths by typ.
|
||||
_JWT_ISS = {"daedalus", "mnemosyne"}
|
||||
|
||||
def resolve_mcp_jwt(token_string: str) -> dict:
|
||||
... # validate signature, iat/exp, required claims including sub
|
||||
|
||||
typ = claims.get("typ")
|
||||
if typ == "team":
|
||||
# No replay cache — team tokens are reused on every request.
|
||||
# Validate sub=="team:<uuid>" shape; stash the uuid on claims.
|
||||
pass
|
||||
else:
|
||||
if _remember_jti(jti, float(exp)):
|
||||
raise MCPAuthError("Token replay detected.")
|
||||
|
||||
return claims
|
||||
```
|
||||
|
||||
Middleware populates `STATE_KEY_RESOLVED_LIBRARIES` per request:
|
||||
|
||||
```python
|
||||
# Opaque MCPToken
|
||||
resolved_libraries = list(token.allowed_libraries or [])
|
||||
|
||||
# Per-turn JWT (legacy; retires phase 4)
|
||||
resolved_libraries = list(claims.get("libs") or [])
|
||||
|
||||
# Team JWT
|
||||
team = Team.objects.get(id=uuid_from_sub(claims["sub"]),
|
||||
active=True,
|
||||
active_jti=claims["jti"])
|
||||
resolved_libraries = _libraries_for_team(team) # see below
|
||||
```
|
||||
|
||||
`_libraries_for_team(team)` runs a single Cypher query against Neo4j:
|
||||
|
||||
```cypher
|
||||
MATCH (l:Library)
|
||||
WHERE l.workspace_id IN $workspace_ids
|
||||
RETURN l.uid
|
||||
```
|
||||
|
||||
where `$workspace_ids` is `list(team.workspace_assignments.values_list("workspace_id", flat=True))`.
|
||||
|
||||
---
|
||||
|
||||
## 6. Auth flow
|
||||
|
||||
### 6.1 Third-party MCP client with opaque `MCPToken`
|
||||
1. Client sends `Authorization: Bearer <plaintext>`.
|
||||
2. Middleware hashes → looks up `MCPToken` → validates active/expired.
|
||||
3. `resolved_libraries = list(token.allowed_libraries or [])` — the
|
||||
JSON list of Library UIDs the admin / dashboard granted at mint.
|
||||
4. Fails closed if empty.
|
||||
|
||||
### 6.2 Daedalus chat per-turn JWT (legacy, retires Phase 4)
|
||||
`iss=daedalus`, `typ` absent, `libs` carries the full library set
|
||||
Daedalus pre-computed for that turn (the workspace's auto-Library
|
||||
plus any user-managed extras), `ws` is present but no longer consulted
|
||||
server-side. Middleware assigns `resolved_libraries = claims["libs"]`.
|
||||
Mnemosyne validates the JWT against `MCPSigningKey` keyed by `kid`.
|
||||
|
||||
### 6.3 Agent team (Kottos / Mentor / Iolaus / post-migration Daedalus-chat)
|
||||
1. Pallas sends `Authorization: Bearer <team-jwt>` (static, read from
|
||||
`fastagent.secrets.yaml`).
|
||||
2. Middleware validates signature → detects `typ=team`.
|
||||
3. Reads `Team` row by UUID from `sub`. Verifies `active=True` and
|
||||
`jti == active_jti`. Rejects otherwise.
|
||||
4. Expands to `resolved_libraries` via
|
||||
`TeamWorkspaceAssignment` → `Library.workspace_id`.
|
||||
5. Fails closed if the team has no workspaces attached.
|
||||
|
||||
### 6.4 Failure modes
|
||||
|
||||
| Condition | Response |
|
||||
|---|---|
|
||||
| JWT signature invalid | `PermissionError("Invalid MCP token.")` |
|
||||
| `exp` past (+30s leeway) | `PermissionError("Token has expired.")` |
|
||||
| `iss` not in `_JWT_ISS` | `PermissionError("Invalid token issuer.")` |
|
||||
| `typ=team`, team not found | `PermissionError("Invalid MCP token.")` |
|
||||
| `typ=team`, team not active | `PermissionError("Token has been deactivated.")` |
|
||||
| `typ=team`, `jti` stale | `PermissionError("Invalid MCP token.")` |
|
||||
| Opaque token not found | `PermissionError("Invalid MCP token.")` |
|
||||
| Opaque token, inactive user | `PermissionError("User account is disabled.")` |
|
||||
| Resolved library set empty | Tool call proceeds but returns empty — this is *authorization*, not *authentication*, and the caller is legitimately scoped to nothing. |
|
||||
|
||||
---
|
||||
|
||||
## 7. REST API — Mnemosyne team lifecycle
|
||||
|
||||
All endpoints live under `/mcp_server/api/teams/` and are authenticated
|
||||
as the Mnemosyne user the team belongs to via a per-user DRF token
|
||||
(`Authorization: Token <key>`, surfaced on `/profile/settings/`). Each
|
||||
team has an `owner` FK; non-owners receive 404 (never 403) so a team's
|
||||
existence isn't disclosed across users. `/library/api/workspaces/` and
|
||||
`/library/api/ingest/` use the same per-user auth model.
|
||||
|
||||
### 7.1 `POST /mcp_server/api/teams/`
|
||||
Create a team.
|
||||
|
||||
**Request**
|
||||
```json
|
||||
{
|
||||
"id": "a3f1…", // UUID; mirrors Daedalus PallasInstance.id
|
||||
"name": "Kottos"
|
||||
}
|
||||
```
|
||||
|
||||
**Response 201**
|
||||
```json
|
||||
{
|
||||
"id": "a3f1…",
|
||||
"name": "Kottos",
|
||||
"jwt": "eyJhbGci…" // shown once; not recoverable later
|
||||
}
|
||||
```
|
||||
|
||||
On `id` collision: idempotent — returns existing team **without** the
|
||||
JWT. Caller must call `/rotate` to get a new one.
|
||||
|
||||
### 7.2 `DELETE /mcp_server/api/teams/{id}/`
|
||||
Soft-delete. Sets `active=False`. Old JWT invalid on next call.
|
||||
|
||||
### 7.3 `PUT /mcp_server/api/teams/{id}/workspaces/`
|
||||
Replace the team's workspace assignment set. Idempotent.
|
||||
|
||||
**Request**
|
||||
```json
|
||||
{ "workspace_ids": ["ws_abc", "ws_def"] }
|
||||
```
|
||||
|
||||
**Response 200**
|
||||
```json
|
||||
{ "workspace_ids": ["ws_abc", "ws_def"] }
|
||||
```
|
||||
|
||||
Non-existent workspaces silently accepted (they become active if/when
|
||||
a `Library` with that `workspace_id` is later created). Mirrors the
|
||||
Daedalus source of truth.
|
||||
|
||||
### 7.4 `POST /mcp_server/api/teams/{id}/rotate/`
|
||||
Generate a fresh `jti` and JWT, replace `active_jti`. Old JWT invalid
|
||||
immediately.
|
||||
|
||||
**Response 200**
|
||||
```json
|
||||
{ "jwt": "eyJhbGci…" }
|
||||
```
|
||||
|
||||
### 7.5 `GET /mcp_server/api/teams/{id}/`
|
||||
Read-only team detail (no JWT). Used by Daedalus reconciler to
|
||||
confirm state.
|
||||
|
||||
**Response 200**
|
||||
```json
|
||||
{
|
||||
"id": "a3f1…",
|
||||
"name": "Kottos",
|
||||
"active": true,
|
||||
"active_jti": "…", // for diagnostics, not a credential
|
||||
"workspace_ids": ["ws_abc"]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Daedalus lifecycle hooks
|
||||
|
||||
Mirrors the pattern in `daedalus/backend/daedalus/mnemosyne/lifecycle.py`:
|
||||
every hook is best-effort, logs errors without blocking the local
|
||||
operation, and is retried by the reconciler.
|
||||
|
||||
### 8.1 `on_pallas_registered(instance)`
|
||||
1. `POST /mcp_server/api/teams/` with `id=instance.id`, `name=instance.name`.
|
||||
2. Encrypt JWT via Fernet; store on `instance.team_jwt_encrypted`;
|
||||
set `pallas_team_mnemosyne_status="provisioned"`.
|
||||
3. Log `pallas_team_provisioned`.
|
||||
|
||||
On failure: status `"failed"`, reconciler retries.
|
||||
|
||||
### 8.2 `on_pallas_deleted(instance_id)`
|
||||
`DELETE /mcp_server/api/teams/{id}/`. Row cascade locally; Mnemosyne
|
||||
soft-deletes. Best-effort.
|
||||
|
||||
### 8.3 `on_workspace_pallas_attached(workspace_id, instance_id)`
|
||||
1. Read all current `workspace_pallas_assignments` where
|
||||
`pallas_instance_id=instance_id`.
|
||||
2. `PUT /mcp_server/api/teams/{id}/workspaces/` with the resulting
|
||||
workspace_id list.
|
||||
|
||||
### 8.4 `on_workspace_pallas_detached(workspace_id, instance_id)`
|
||||
Symmetric to 8.3.
|
||||
|
||||
### 8.5 `on_workspace_deleted(workspace_id)`
|
||||
For every attached Pallas instance, recompute and push updated
|
||||
workspace list (so teams lose the deleted workspace).
|
||||
|
||||
### 8.6 Reconciler extension
|
||||
Extends
|
||||
`daedalus/backend/daedalus/mnemosyne/reconciler.py`:
|
||||
- Re-runs `POST /teams/` for instances with status NULL or `"failed"`.
|
||||
- Re-syncs workspace assignments for all instances on every cycle
|
||||
(cheap idempotent PUT; guards against silent drift).
|
||||
|
||||
---
|
||||
|
||||
## 9. Operator workflows
|
||||
|
||||
### 9.1 Register a new Pallas deployment
|
||||
1. Operator adds entry in Daedalus admin: `POST /api/v1/pallas`
|
||||
with the Pallas registry URL. Daedalus fetches the registry,
|
||||
creates the `PallasInstance` row, then calls Mnemosyne
|
||||
`POST /mcp_server/api/teams/`. The JWT lands in
|
||||
`instance.team_jwt_encrypted`.
|
||||
2. Daedalus admin detail page surfaces the JWT plaintext **once**
|
||||
(decrypted client-side or via a one-shot "reveal" endpoint that
|
||||
logs the access). Operator copies it.
|
||||
3. On the Pallas deployment machine, operator pastes the JWT into
|
||||
`fastagent.secrets.yaml`:
|
||||
```yaml
|
||||
mcp:
|
||||
servers:
|
||||
mnemosyne:
|
||||
transport: http
|
||||
url: https://mnemosyne.example.helu.ca/mcp/
|
||||
headers:
|
||||
Authorization: Bearer eyJhbGci…
|
||||
```
|
||||
Operator removes any stale `forward_inbound_auth: true` from the
|
||||
corresponding entry in `fastagent.config.yaml`. Restart Pallas.
|
||||
|
||||
### 9.2 Attach a Pallas team to a workspace
|
||||
1. Daedalus workspace settings → "Attached Teams" → multi-select
|
||||
across registered Pallas instances → save.
|
||||
2. Daedalus fires `on_workspace_pallas_attached`, Mnemosyne's
|
||||
`TeamWorkspaceAssignment` updates.
|
||||
3. Agent picker in chat immediately shows agents from that team for
|
||||
this workspace.
|
||||
|
||||
### 9.3 Retire a Pallas deployment
|
||||
1. Daedalus admin → delete PallasInstance.
|
||||
2. Daedalus calls `DELETE /mcp_server/api/teams/{id}/`; row marked
|
||||
inactive in Mnemosyne. JWT rejected on next call.
|
||||
3. Operator shuts down the Pallas deployment.
|
||||
|
||||
### 9.4 Rotate a compromised team JWT
|
||||
1. Daedalus admin → "Rotate team JWT" action on the PallasInstance.
|
||||
2. Daedalus calls `POST /mcp_server/api/teams/{id}/rotate/`,
|
||||
re-encrypts and stores the new JWT.
|
||||
3. Operator copies the new JWT into the Pallas deployment's
|
||||
`fastagent.secrets.yaml`, restarts.
|
||||
|
||||
### 9.5 Provision existing Pallas instances (one-time migration)
|
||||
After Mnemosyne phase 2 deploys:
|
||||
```
|
||||
$ daedalus manage.py provision_teams
|
||||
```
|
||||
Walks all existing `PallasInstance` rows, calls
|
||||
`POST /mcp_server/api/teams/` for each, stores + prints JWTs in a
|
||||
table for the operator to distribute. Idempotent: rows already
|
||||
`provisioned` are skipped.
|
||||
|
||||
### 9.6 Issue an MCPToken for a third-party MCP client
|
||||
1. Mnemosyne admin → MCPTokens → add. Pick user. Library picker is
|
||||
filtered to libraries where that user has `owner` or `manager`
|
||||
membership.
|
||||
2. Submit. Plaintext shown once on the response page.
|
||||
3. Operator pastes the plaintext into the third-party client's
|
||||
config (Claude Desktop, Cline, etc.).
|
||||
|
||||
---
|
||||
|
||||
## 10. UX changes in Daedalus
|
||||
|
||||
### 10.1 Workspace → attached Teams
|
||||
|
||||
Today: workspaces accumulate `AgentConnection` rows across every
|
||||
registered Pallas instance; the agent picker is long, sub-agents
|
||||
share names with parents (e.g. two distinct "Harper" agents), and it
|
||||
is unclear which workspace grants which agents Mnemosyne access.
|
||||
|
||||
New:
|
||||
- Workspace settings has a new section **Attached Teams**, a
|
||||
multi-select over registered `PallasInstance` rows. Starts empty on
|
||||
workspace create.
|
||||
- Attaching/detaching a team triggers the lifecycle hook that updates
|
||||
Mnemosyne's `TeamWorkspaceAssignment` for that team.
|
||||
|
||||
### 10.2 Agent picker in chat
|
||||
|
||||
New behavior:
|
||||
- Lists only agents belonging to teams attached to the current
|
||||
workspace.
|
||||
- Displays each agent as **`TeamName › AgentTitle`** so sub-agent
|
||||
name collisions resolve visually.
|
||||
- Empty state: "No teams attached to this workspace. Go to workspace
|
||||
settings → Attached Teams." (links inline).
|
||||
|
||||
### 10.3 Agent switching during chat
|
||||
|
||||
Unchanged mechanically: each chat turn may target a different
|
||||
`agent_server_id`. New constraint: the target agent must belong to a
|
||||
team currently attached to the workspace. The REST endpoint validates
|
||||
this and returns 403 otherwise. The frontend picker already enforces
|
||||
it as a UX affordance.
|
||||
|
||||
---
|
||||
|
||||
## 11. Migration
|
||||
|
||||
### 11.1 Current state (pre-migration)
|
||||
|
||||
* Mnemosyne is currently not in a released/working deployment; a
|
||||
fresh rollout is possible.
|
||||
* Daedalus has existing `PallasInstance` rows (registered via
|
||||
`POST /api/v1/pallas`) but none have a team JWT.
|
||||
* Kottos / Mentor / Iolaus each carry `forward_inbound_auth: true`
|
||||
in `fastagent.config.yaml` and currently rely on the Pallas
|
||||
forwarding patch to pass Daedalus's per-turn JWT to Mnemosyne.
|
||||
|
||||
### 11.2 Order of operations (must follow)
|
||||
|
||||
1. **Mnemosyne phase 2 deploys.** REST `/mcp_server/api/teams/` is
|
||||
live; old per-turn JWT path still works. No consumers yet.
|
||||
2. **Daedalus phase 4 deploys.** New columns + lifecycle hooks +
|
||||
`provision_teams` command. On upgrade, migration creates columns
|
||||
with default NULL status; existing PallasInstances remain
|
||||
functional on the legacy (non-Mnemosyne) path.
|
||||
3. **Operator runs `provision_teams`.** Every existing PallasInstance
|
||||
gets a team in Mnemosyne and a stored JWT.
|
||||
4. **Operator distributes JWTs** to each Pallas deployment
|
||||
(Kottos / Mentor / Iolaus / Daedalus-chat). Each deployment
|
||||
updates `fastagent.secrets.yaml`, removes
|
||||
`forward_inbound_auth: true`, restarts.
|
||||
5. **Pallas phase 3 cleanup deploys.** Forwarding infrastructure
|
||||
removed from Pallas codebase. Safe only after all deployments
|
||||
have switched to static JWTs.
|
||||
6. **Daedalus per-turn token path retires.** `mnemosyne/tokens.py`
|
||||
and its config (`MNEMOSYNE_SIGNING_SECRET`,
|
||||
`MNEMOSYNE_SIGNING_KID`, `MNEMOSYNE_TOKEN_TTL_SECONDS`) are
|
||||
removed after Daedalus chat's own team JWT is in place.
|
||||
|
||||
### 11.3 Rollback story
|
||||
|
||||
* **Mnemosyne phase 2**: safe to roll back — old per-turn JWT path
|
||||
untouched; new endpoints simply disappear.
|
||||
* **Daedalus phase 4**: safe to roll back until `provision_teams`
|
||||
has run. After that, the JWTs are already distributed; rolling
|
||||
back means the JWTs go unused but nothing breaks.
|
||||
* **Pallas phase 3**: *not* safe to roll back independently — if any
|
||||
deployment still has `forward_inbound_auth: true` pointing at code
|
||||
that no longer exists, that deployment fails to start. Sequence
|
||||
correctly.
|
||||
|
||||
---
|
||||
|
||||
## 12. Deprecated / removed
|
||||
|
||||
At end-of-migration (after Phase 6):
|
||||
|
||||
### Pallas (`pallas/pallas/_fastagent_patch.py`)
|
||||
- `_DynamicBearerAuth` (httpx Auth subclass)
|
||||
- `_CurrentBearer` ContextVar plumbing
|
||||
- `_refresh_forward_servers()` YAML scanner
|
||||
- `_prepare_headers_and_auth_with_forward` (the header-mutation monkey-patch)
|
||||
- `_send_request_with_trace`, `_session_call_tool_with_trace`,
|
||||
`_execute_on_server_with_trace` (diagnostic wrappers installed
|
||||
because the forwarding path was opaque)
|
||||
- `install()` function in `_fastagent_patch`; `pallas/__init__.py` no
|
||||
longer auto-installs
|
||||
|
||||
### Agent team configs (`kottos/`, `mentor/`, `iolaus/`)
|
||||
- `forward_inbound_auth: true` under any server stanza in
|
||||
`fastagent.config.yaml`
|
||||
|
||||
### Daedalus (`daedalus/backend/daedalus/`)
|
||||
- `mnemosyne/tokens.py` (per-turn JWT mint)
|
||||
- Config settings: `MNEMOSYNE_SIGNING_SECRET`, `MNEMOSYNE_SIGNING_KID`,
|
||||
`MNEMOSYNE_TOKEN_TTL_SECONDS`, `MNEMOSYNE_MCP_URL`
|
||||
|
||||
### Mnemosyne (`mnemosyne/mcp_server/`)
|
||||
- `_JWT_ISS` constant-string (replaced by set containing `daedalus`
|
||||
and `mnemosyne`; `daedalus` entry removed in a later version once
|
||||
per-turn path is deleted)
|
||||
- The `_JTI_CACHE` replay logic continues to exist for the per-turn
|
||||
path until that path retires; team JWTs bypass it entirely
|
||||
|
||||
---
|
||||
|
||||
## 13. Security
|
||||
|
||||
### 13.1 Token lifetimes
|
||||
* **Opaque MCPToken**: until revoked (admin). Rotation is manual.
|
||||
* **Per-turn JWT**: ≤ 10 minutes (existing `MNEMOSYNE_TOKEN_TTL_SECONDS`).
|
||||
Retires with Phase 4 completion.
|
||||
* **Team JWT**: 10 years. Rationale: operator cannot tolerate a
|
||||
silent expiry-induced outage in a year. Revocation is explicit via
|
||||
`Team.active`, `Team.active_jti`, or key rotation.
|
||||
|
||||
### 13.2 Revocation levers, in order of granularity
|
||||
1. `PUT /teams/{id}/workspaces/` with `[]` — team sees nothing, JWT
|
||||
still validates. Useful for pausing a deployment without
|
||||
redistributing tokens.
|
||||
2. `DELETE /teams/{id}/` — team marked inactive. All its tokens
|
||||
rejected. Restoring requires re-POST (new id) or admin DB edit.
|
||||
3. `POST /teams/{id}/rotate/` — `active_jti` changes; the token that
|
||||
leaked stops working; the new JWT must be distributed.
|
||||
4. `MCPSigningKey.retire()` — nuclear option. All JWTs signed with
|
||||
that kid stop validating. Re-key + re-issue every team token.
|
||||
|
||||
### 13.3 At-rest protection
|
||||
* `MCPToken.token_hash`: SHA-256 of plaintext; plaintext never stored.
|
||||
* `MCPSigningKey.secret_hex`: 256-bit hex secret stored in Mnemosyne
|
||||
DB only (not distributed).
|
||||
* `PallasInstance.team_jwt_encrypted`: Fernet-encrypted by Daedalus's
|
||||
`SECRET_KEY` (or `MNEMOSYNE_FERNET_KEY` if configured);
|
||||
ciphertext at rest.
|
||||
|
||||
### 13.4 Audit points
|
||||
* Every auth failure increments `mcp_auth_failures_total{reason=…}`.
|
||||
* Every team lifecycle action logs `pallas_team_{provisioned,
|
||||
deleted, rotated, workspaces_updated}` in Daedalus with full
|
||||
correlation IDs.
|
||||
* Every bearer resolution logs the principal type + resolved
|
||||
library count at DEBUG (INFO until shakedown stabilizes).
|
||||
|
||||
### 13.5 Isolation model
|
||||
|
||||
Separation of "work" vs "personal" agents composes from three
|
||||
independent mechanisms:
|
||||
|
||||
1. **Per-token scope (Mnemosyne)**: each MCPToken carries its own
|
||||
`allowed_libraries`. A personal token and a work token may
|
||||
belong to the same user yet see disjoint Library sets.
|
||||
2. **Per-workspace attachment (Daedalus)**: a Pallas instance only
|
||||
sees workspaces explicitly attached to it. Work Pallas and
|
||||
personal Pallas attach to disjoint workspaces.
|
||||
3. **Per-Daedalus-instance (deployment)**: the strongest isolation
|
||||
is two Daedalus deployments pointing at distinct Mnemosyne
|
||||
accounts. Nothing the operator does in deployment A reaches any
|
||||
data accessible to deployment B.
|
||||
|
||||
For typical operator isolation, (1) + (2) suffice. (3) is the
|
||||
escape hatch for hard compartmentalization.
|
||||
|
||||
---
|
||||
|
||||
## 14. Testing
|
||||
|
||||
### 14.1 Mnemosyne test surface
|
||||
* `resolve_mcp_jwt` accepts `iss in {daedalus, mnemosyne}`.
|
||||
* `typ=team` branch: bypasses replay cache; resolves team + active_jti.
|
||||
* `typ=team`: rejects if team missing, inactive, or `jti` stale.
|
||||
* `LibraryMembership`: owner can grant; reader cannot; grant form
|
||||
filters correctly.
|
||||
* `MCPToken.allowed_libraries` empty → resolved library set empty.
|
||||
* `TeamWorkspaceAssignment` PUT is idempotent and replaces, not
|
||||
unions.
|
||||
* `/mcp_server/api/teams/` endpoints: create, delete, rotate,
|
||||
workspaces PUT, all authenticated with a per-user DRF token and
|
||||
scoped to the team's `owner` (non-owner requests return 404).
|
||||
|
||||
### 14.2 Daedalus test surface
|
||||
* `on_pallas_registered` populates `team_jwt_encrypted` and transitions
|
||||
status to `provisioned`.
|
||||
* `on_workspace_pallas_attached` triggers the correct PUT payload.
|
||||
* Agent-picker endpoint filters `AgentConnection` by attached
|
||||
Pallas instances.
|
||||
* `provision_teams` is idempotent.
|
||||
|
||||
### 14.3 Integration
|
||||
* End-to-end: third-party MCP client with MCPToken → Mnemosyne
|
||||
search scoped to `allowed_libraries`.
|
||||
* End-to-end: Pallas agent with team JWT → Mnemosyne search scoped
|
||||
to team's attached workspaces.
|
||||
* End-to-end: workspace detached from team → agent no longer sees
|
||||
that workspace's libraries (on next request, not stale-cached).
|
||||
|
||||
---
|
||||
|
||||
## 15. Phased delivery
|
||||
|
||||
| # | Phase | Surface | Deployable independently? |
|
||||
|---|---|---|---|
|
||||
| 1 | Design doc | This file | Yes — this document |
|
||||
| 2 | Mnemosyne | `LibraryMembership`, `MCPToken.allowed_libraries`, `Team`, `TeamWorkspaceAssignment`, unified `auth.py` resolver, `/mcp_server/api/teams/` REST, admin UIs, backfill, tests | Yes — old per-turn JWT path untouched |
|
||||
| 3 | Pallas cleanup | Remove `_fastagent_patch.py` internals, docs | No — must wait until all deployments use static JWTs |
|
||||
| 4 | Daedalus integration | `workspace_pallas_assignments`, `team_jwt_encrypted`, `pallas_team_mnemosyne_status`, lifecycle hooks, reconciler, `provision_teams`, admin API, agent-picker filter, register chat as team | Yes — new columns nullable, legacy path still works |
|
||||
| 5 | Daedalus frontend | Workspace settings attached-teams picker, agent picker namespacing | Yes — backwards-compatible once phase 4 ships |
|
||||
| 6 | Agent team cutovers | Kottos / Mentor / Iolaus paste JWT, remove `forward_inbound_auth`, restart | Yes — one at a time |
|
||||
| 7 | Documentation | Mnemosyne README, Pallas README + `docs/auth.md`, Daedalus operator docs, cross-references to this file | Yes |
|
||||
|
||||
---
|
||||
|
||||
## 16. Open items (v1)
|
||||
|
||||
None — all decisions are closed in this revision. Future revisions
|
||||
that add scope (e.g., team-level library scoping finer than
|
||||
workspace granularity; OAuth 2.1 for external MCP clients;
|
||||
per-library audit logs) will ship as `_v2.md` alongside this file.
|
||||
|
||||
---
|
||||
|
||||
## 17. Cross-references
|
||||
|
||||
* Existing Mnemosyne per-turn JWT implementation:
|
||||
`mnemosyne/mnemosyne/mcp_server/auth.py`,
|
||||
`mnemosyne/mnemosyne/mcp_server/models.py` (`MCPToken`,
|
||||
`MCPSigningKey`).
|
||||
* Existing Daedalus Mnemosyne integration:
|
||||
`daedalus/backend/daedalus/mnemosyne/` (`client.py`,
|
||||
`tokens.py`, `lifecycle.py`, `reconciler.py`),
|
||||
`daedalus/backend/daedalus/api/v1/pallas.py`.
|
||||
* Existing Pallas auth-forwarding patch (to be removed):
|
||||
`pallas/pallas/_fastagent_patch.py`,
|
||||
`pallas/pallas/__init__.py`.
|
||||
658
docs/DAEDALUS_PALLAS_INTEGRATION_v2.md
Normal file
658
docs/DAEDALUS_PALLAS_INTEGRATION_v2.md
Normal file
@@ -0,0 +1,658 @@
|
||||
# Daedalus ↔ Pallas ↔ Mnemosyne Integration — v2
|
||||
|
||||
**Status:** Approved design — supersedes
|
||||
[`DAEDALUS_PALLAS_INTEGRATION_v1.md`](DAEDALUS_PALLAS_INTEGRATION_v1.md).
|
||||
**Authoritative home:** `mnemosyne/docs/DAEDALUS_PALLAS_INTEGRATION_v2.md`
|
||||
**Versioning:** subsequent major revisions ship as `..._v3.md` etc.
|
||||
alongside this file. Cross-service docs (Daedalus, Pallas) link here.
|
||||
|
||||
---
|
||||
|
||||
## 1. Summary
|
||||
|
||||
This document describes the end-state authentication / authorization
|
||||
model connecting three services:
|
||||
|
||||
* **Mnemosyne** — knowledge platform. Owns Libraries, users, and the
|
||||
MCP surface third-party clients query.
|
||||
* **Daedalus** — workspace + file-lifecycle UI. Registers Pallas
|
||||
instances, syncs file content to Mnemosyne, drives chat. Acts on
|
||||
behalf of one Mnemosyne user per Daedalus instance.
|
||||
* **Pallas** — FastAgent-backed MCP host that exposes agent teams
|
||||
(Kottos, Mentor, Iolaus, …) as HTTP MCP servers.
|
||||
|
||||
**What changed from v1:**
|
||||
|
||||
* **Single token model.** The two-token split in v1 (DRF `authtoken`
|
||||
for REST, `MCPToken` for `/mcp/`) is gone. One model —
|
||||
[`UserToken`](../mnemosyne/mcp_server/models.py) — authenticates both
|
||||
surfaces, managed from one UI at `/profile/tokens/`. The DRF
|
||||
`authtoken` app has been removed from `INSTALLED_APPS`.
|
||||
* **Per-user authorization on the REST surface.** The Daedalus-facing
|
||||
endpoints (`/library/api/*`, `/mcp_server/api/teams/*`) are no longer
|
||||
open to any authenticated account. Each `Team` has an `owner` FK and
|
||||
each workspace-scoped `Library` has an `owner_username` property; the
|
||||
endpoints scope by these and return 404 for non-owners. The
|
||||
`daedalus-service` shared account has been retired.
|
||||
* **Per-turn JWT path retired.** The legacy `iss=daedalus` JWT flow
|
||||
(v1 §5.1, §6.2) is gone. Mnemosyne now only validates one JWT shape:
|
||||
`typ=team`, `iss=mnemosyne`. The replay cache and the
|
||||
`_resolve_jwt_actor` service-user fallback are also gone.
|
||||
* **Authorization headers normalised to `Bearer`.** DRF
|
||||
`TokenAuthentication` (and its `Token` keyword) is replaced by
|
||||
[`UserTokenAuthentication`](../mnemosyne/mcp_server/drf_auth.py),
|
||||
which accepts `Authorization: Bearer <plaintext>`. Anonymous
|
||||
requests get **401 + `WWW-Authenticate: Bearer`** (RFC 7235).
|
||||
|
||||
Everything else in v1 — the resolved-library abstraction, team JWT
|
||||
shape, Pallas's static-bearer configuration, the workspace ↔ Team
|
||||
attachment model in Daedalus, agent picker UX, signing-key model — is
|
||||
unchanged.
|
||||
|
||||
---
|
||||
|
||||
## 2. Motivation
|
||||
|
||||
v1 closed the per-turn JWT forwarding hairball by introducing static
|
||||
team JWTs. v2 finishes the cleanup pass: it deletes the per-turn JWT
|
||||
path entirely (now that Daedalus has migrated off it), collapses the
|
||||
remaining two-token muddle into a single `UserToken` system, and tightens
|
||||
the REST surface so authentication-as-user is sufficient for access
|
||||
control without a shared service account.
|
||||
|
||||
---
|
||||
|
||||
## 3. Architecture
|
||||
|
||||
### 3.1 Services and responsibilities
|
||||
|
||||
| Service | Role in auth model |
|
||||
|---|---|
|
||||
| **Mnemosyne** | Owns Libraries, Library memberships, `UserToken`s, Teams, `TeamWorkspaceAssignment`s, signing keys. Validates bearers. Resolves every authenticated request to a Library set. |
|
||||
| **Daedalus** | Control plane. Registers Pallas instances as Teams in Mnemosyne. Manages workspace ↔ team attachments. Stores team JWTs for copying into Pallas deployment configs. Acts as a single Mnemosyne user via a `UserToken`. |
|
||||
| **Pallas** | Stateless MCP host. Holds a static team JWT in `fastagent.secrets.yaml`. No custom auth-forwarding code. |
|
||||
|
||||
### 3.2 Two credential types
|
||||
|
||||
Every authenticated request to Mnemosyne presents a Bearer token of
|
||||
exactly one of these shapes:
|
||||
|
||||
| # | Credential | `iss` | Issuer | Lifetime | Used on | Library scope source |
|
||||
|---|---|---|---|---|---|---|
|
||||
| 1 | **Opaque `UserToken`** | n/a | The Mnemosyne user, via `/profile/tokens/` | Until revoked / expiry | `/mcp/` and DRF REST | MCP: `allowed_libraries`. REST: ignored (owner-scoped). |
|
||||
| 2 | **Team JWT** | `mnemosyne` | Mnemosyne (`/mcp_server/api/teams/`) | 10 years | `/mcp/` only | Live DB lookup via `TeamWorkspaceAssignment → Library` |
|
||||
|
||||
The v1 per-turn JWT (category 2 in v1) has been retired and is no
|
||||
longer accepted by `resolve_mcp_jwt`.
|
||||
|
||||
### 3.3 Scope split by surface
|
||||
|
||||
A `UserToken` carries optional `allowed_libraries` / `allowed_tools`
|
||||
fields. These are honoured **only on the MCP surface** (`/mcp/`):
|
||||
|
||||
* **`/mcp/`** — `MCPAuthMiddleware` enforces `allowed_libraries`
|
||||
(fail-closed: empty list = zero libraries) and `allowed_tools` (empty
|
||||
list = any tool). This is the surface third-party clients (Claude
|
||||
Desktop, Cline) use.
|
||||
* **`/library/api/*`, `/mcp_server/api/teams/*`** — The DRF auth class
|
||||
resolves *who* is calling. Access is gated by `Team.owner`
|
||||
(mcp_server) and `Library.owner_username` (library workspaces). The
|
||||
scope claims are ignored. Daedalus tokens are therefore
|
||||
unrestricted; the user identity plus owner-scope is the access model.
|
||||
|
||||
The rationale: enforcing `allowed_libraries` on the REST endpoints
|
||||
would force Daedalus to mint an effectively-unrestricted token (since
|
||||
it manages the whole workspace lifecycle), which would defeat the
|
||||
field. Owner-scope already encodes the right access pattern there.
|
||||
|
||||
### 3.4 Resolved-library abstraction (MCP)
|
||||
|
||||
Mnemosyne's MCP auth middleware populates a single
|
||||
`resolved_libraries: list[str]` per request. Downstream code (search,
|
||||
get_chunk, …) only reads that list.
|
||||
|
||||
```
|
||||
Bearer → classify → dispatch
|
||||
├─ Opaque UserToken → token.allowed_libraries (JSON list of UIDs)
|
||||
└─ team JWT (typ=team) → live DB join:
|
||||
TeamWorkspaceAssignment.workspace_id
|
||||
→ Library.workspace_id → Library.uid
|
||||
↓
|
||||
resolved_libraries: list[str]
|
||||
↓
|
||||
downstream tools
|
||||
```
|
||||
|
||||
Fail-closed: empty resolution → no libraries visible.
|
||||
|
||||
---
|
||||
|
||||
## 4. Data model
|
||||
|
||||
### 4.1 Mnemosyne
|
||||
|
||||
#### `UserToken` (renamed from `MCPToken`)
|
||||
[`mnemosyne/mcp_server/models.py`](../mnemosyne/mcp_server/models.py).
|
||||
Per-user opaque bearer. Hashed at rest (SHA-256, 64-char hex).
|
||||
|
||||
```python
|
||||
class UserToken(models.Model):
|
||||
user = FK(User, related_name="api_tokens")
|
||||
token_hash = CharField(64, unique=True, db_index=True)
|
||||
name = CharField(100)
|
||||
is_active = BooleanField(default=True)
|
||||
expires_at = DateTimeField(null=True, blank=True)
|
||||
last_used_at = DateTimeField(null=True, blank=True)
|
||||
allowed_tools = JSONField(default=list, blank=True)
|
||||
allowed_libraries = JSONField(default=list, blank=True)
|
||||
created_at, updated_at = …
|
||||
```
|
||||
|
||||
* Plaintext shown once at mint via
|
||||
[`UserTokenManager.create_token`](../mnemosyne/mcp_server/models.py);
|
||||
never persisted.
|
||||
* Display masking via `get_masked_token()` returns `tok_…<hash[:8]>`.
|
||||
* `allowed_*` fields apply only on `/mcp/` — see §3.3.
|
||||
|
||||
#### `LibraryMembership`
|
||||
Unchanged from v1. Roles `owner` / `manager` / `reader` over Neo4j
|
||||
Libraries (joined by `uid` string since Library is a neomodel node).
|
||||
|
||||
#### `Team`
|
||||
v1 + new non-null `owner` FK:
|
||||
|
||||
```python
|
||||
class Team(models.Model):
|
||||
id = UUIDField(primary_key=True, editable=False)
|
||||
name = CharField(200)
|
||||
owner = FK(User, on_delete=PROTECT, related_name="teams")
|
||||
active = BooleanField(default=True)
|
||||
active_jti = UUIDField(null=True)
|
||||
created_at, updated_at = …
|
||||
```
|
||||
|
||||
`Team.owner` is set on creation in
|
||||
[`team_create`](../mnemosyne/mcp_server/api/teams.py) from
|
||||
`request.user`. All other team endpoints filter by `(pk, owner=request.user)`;
|
||||
non-owners receive 404, never 403, so a team's existence isn't
|
||||
disclosed across users.
|
||||
|
||||
Soft-delete via `Team.active = False` is unchanged.
|
||||
|
||||
#### `TeamWorkspaceAssignment`
|
||||
Unchanged from v1. Live-queried per request; `PUT /workspaces/`
|
||||
replaces the assignment set.
|
||||
|
||||
#### `MCPSigningKey`
|
||||
Unchanged. Signs team JWTs.
|
||||
|
||||
#### `Library.owner_username` (new neomodel property)
|
||||
[`mnemosyne/library/models.py`](../mnemosyne/library/models.py). For
|
||||
workspace-scoped libraries (i.e. those with `workspace_id` set), the
|
||||
Mnemosyne username of the creating user. Null for global libraries.
|
||||
Indexed.
|
||||
|
||||
```python
|
||||
owner_username = StringProperty(required=False, index=True)
|
||||
```
|
||||
|
||||
The workspace endpoints (`/library/api/workspaces/…`) set this on
|
||||
create and require `lib.owner_username == request.user.username` for
|
||||
all mutations and reads; non-owners get 404 on GET/PUT and 204 on
|
||||
DELETE (idempotent).
|
||||
|
||||
### 4.2 Daedalus (informational — managed in the Daedalus repo)
|
||||
|
||||
Unchanged from v1 except:
|
||||
|
||||
* `vault_mnemosyne_daedalus_service_password` is **gone**. Daedalus
|
||||
authenticates to Mnemosyne with a `UserToken` plaintext minted at
|
||||
`/profile/tokens/`, stored in whatever secret the operator wires
|
||||
(suggestion: `vault_mnemosyne_user_token`).
|
||||
* Daedalus's HTTP client sends `Authorization: Bearer <plaintext>` to
|
||||
every Mnemosyne endpoint (`/library/api/*`, `/mcp_server/api/teams/*`,
|
||||
`/mcp/`). The `Token <key>` keyword is no longer accepted anywhere.
|
||||
|
||||
### 4.3 Pallas
|
||||
Unchanged from v1. Static `Authorization: Bearer <team-jwt>` in
|
||||
`fastagent.secrets.yaml`.
|
||||
|
||||
---
|
||||
|
||||
## 5. JWT claim shapes
|
||||
|
||||
Only one JWT shape remains — the team JWT from v1 §5.2:
|
||||
|
||||
```json
|
||||
{
|
||||
"iss": "mnemosyne",
|
||||
"aud": "mnemosyne",
|
||||
"sub": "team:<pallas_instance_uuid>",
|
||||
"typ": "team",
|
||||
"iat": 1715000000,
|
||||
"exp": 1976000000,
|
||||
"jti": "uuid4"
|
||||
}
|
||||
```
|
||||
|
||||
[`mnemosyne/mcp_server/teams.py:mint_team_jwt`](../mnemosyne/mcp_server/teams.py).
|
||||
|
||||
### 5.1 Validator changes vs v1
|
||||
|
||||
[`mnemosyne/mcp_server/auth.py`](../mnemosyne/mcp_server/auth.py):
|
||||
|
||||
* `resolve_mcp_jwt` no longer accepts `iss=daedalus`. The `_JTI_CACHE`
|
||||
replay cache still exists but is exercised by no live code path —
|
||||
scheduled for removal in a follow-up cleanup commit.
|
||||
* `_resolve_jwt_actor` resolves to `team.owner` (the Mnemosyne user
|
||||
that created the team) rather than a synthetic service user. Audit
|
||||
log / usage accounting now correctly attribute each turn to the
|
||||
acting user.
|
||||
|
||||
```python
|
||||
def _resolve_jwt_actor(claims: dict):
|
||||
if claims.get("typ") != "team":
|
||||
raise MCPAuthError("Per-turn JWTs are no longer accepted; mint a team JWT.")
|
||||
team = Team.objects.select_related("owner").get(pk=claims["team_id"])
|
||||
if not team.active:
|
||||
raise MCPAuthError("Team JWT references an inactive team.")
|
||||
if not team.owner.is_active:
|
||||
raise MCPAuthError("Team owner is disabled.")
|
||||
return team.owner
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Auth flow
|
||||
|
||||
### 6.1 Third-party MCP client with `UserToken`
|
||||
1. Client sends `Authorization: Bearer <plaintext>` to `/mcp/`.
|
||||
2. `MCPAuthMiddleware` hashes → looks up `UserToken` → validates
|
||||
active/expired/user-active.
|
||||
3. `resolved_libraries = list(token.allowed_libraries or [])`.
|
||||
4. Fails closed if empty.
|
||||
|
||||
### 6.2 Agent team (Kottos / Mentor / Iolaus / Daedalus-chat-team)
|
||||
1. Pallas sends `Authorization: Bearer <team-jwt>` to `/mcp/`.
|
||||
2. Middleware validates signature, `iss=mnemosyne`, `typ=team`.
|
||||
3. Loads `Team` by UUID from `sub`. Verifies `active=True` and
|
||||
`jti == active_jti`.
|
||||
4. Expands to `resolved_libraries` via `TeamWorkspaceAssignment` →
|
||||
`Library.workspace_id`.
|
||||
5. The acting user (for audit, usage accounting) is `team.owner`.
|
||||
|
||||
### 6.3 Daedalus REST control / ingest
|
||||
1. Daedalus sends `Authorization: Bearer <user-token-plaintext>` to
|
||||
`/library/api/*` or `/mcp_server/api/teams/*`.
|
||||
2. DRF `UserTokenAuthentication` (first in the auth stack) resolves
|
||||
the token to its user.
|
||||
3. Endpoint scopes by `Team.owner` (mcp_server) or
|
||||
`Library.owner_username` (library). Non-owner ⇒ 404.
|
||||
|
||||
### 6.4 Browser / web session
|
||||
SessionAuthentication runs second; cookie-authenticated users hit the
|
||||
DRF browsable API as themselves with no special handling.
|
||||
|
||||
### 6.5 Failure modes
|
||||
|
||||
| Condition | Response |
|
||||
|---|---|
|
||||
| No `Authorization` header | 401 + `WWW-Authenticate: Bearer` |
|
||||
| `Authorization: Token …` (legacy DRF keyword) | 401 (not consumed by any auth class) |
|
||||
| Invalid bearer plaintext | 401 + `WWW-Authenticate: Bearer` |
|
||||
| Inactive / expired token | 401 |
|
||||
| Disabled user | 401 |
|
||||
| JWT signature invalid | 401 + `WWW-Authenticate: Bearer` |
|
||||
| JWT `exp` past (+30s leeway) | 401 |
|
||||
| JWT `iss` not `mnemosyne` | 401 |
|
||||
| JWT `typ` not `team` (legacy per-turn) | 401 ("per-turn JWTs no longer accepted") |
|
||||
| Team inactive / unknown / `jti` stale | 401 |
|
||||
| Team endpoint, non-owner caller | 404 |
|
||||
| Workspace endpoint, non-owner caller (GET/PUT) | 404 |
|
||||
| Workspace endpoint, non-owner caller (DELETE) | 204 (idempotent) |
|
||||
|
||||
---
|
||||
|
||||
## 7. REST API — Mnemosyne team lifecycle
|
||||
|
||||
Endpoints under `/mcp_server/api/teams/` are authenticated as the
|
||||
Mnemosyne user the team belongs to via a per-user `UserToken`
|
||||
(`Authorization: Bearer <plaintext>`, minted at `/profile/tokens/`).
|
||||
Each team has an `owner` FK; non-owners receive 404 (never 403) so a
|
||||
team's existence isn't disclosed across users.
|
||||
|
||||
### 7.1 `POST /mcp_server/api/teams/`
|
||||
Create a team. `Team.owner` is set to `request.user`.
|
||||
|
||||
**Request**
|
||||
```json
|
||||
{ "id": "a3f1…", "name": "Kottos" }
|
||||
```
|
||||
|
||||
**Response 201** — fresh id
|
||||
```json
|
||||
{ "id": "a3f1…", "name": "Kottos", "jwt": "eyJhbGci…" }
|
||||
```
|
||||
|
||||
**Response 200** — same id, same owner (idempotent; no new JWT issued).
|
||||
**Response 409** — same id, different owner ("Team id is already in use.").
|
||||
|
||||
### 7.2 `DELETE /mcp_server/api/teams/{id}/`
|
||||
Soft-delete (`active=False`, clear `active_jti`). Old JWT invalid on
|
||||
next call. Non-owner ⇒ 404.
|
||||
|
||||
### 7.3 `PUT /mcp_server/api/teams/{id}/workspaces/`
|
||||
Replace the team's workspace assignment set. Idempotent.
|
||||
|
||||
```json
|
||||
{ "workspace_ids": ["ws_abc", "ws_def"] }
|
||||
```
|
||||
|
||||
### 7.4 `POST /mcp_server/api/teams/{id}/rotate/`
|
||||
Generate a fresh `jti` and JWT, replace `active_jti`. Old JWT invalid
|
||||
immediately.
|
||||
|
||||
**Upsert-on-missing.** If no `Team` exists for `id`, rotate creates one
|
||||
owned by the caller (with `name = str(id)`) and mints its first JWT —
|
||||
the operator clicks "Rotate JWT" in Daedalus settings and things just
|
||||
work even if Daedalus's `provision_teams` workflow never ran for this
|
||||
PallasInstance. The placeholder name can be edited via admin.
|
||||
|
||||
| Response | Condition |
|
||||
|---|---|
|
||||
| **200** + `jwt` | Same-owner id (rotates) or fresh id (upserts + mints) |
|
||||
| **409** | `id` exists under a different owner (`"Team id is already in use."`) |
|
||||
| **409** | Team is inactive (soft-deleted) — explicit recreate required |
|
||||
|
||||
The upsert path logs `team_rotate upserted_missing team_id=… owner=…`
|
||||
at INFO. Surfacing this in metrics is a useful drift signal: Daedalus
|
||||
and Mnemosyne fell out of sync on team provisioning.
|
||||
|
||||
### 7.5 `GET /mcp_server/api/teams/{id}/`
|
||||
Read-only detail (no JWT). Used by the Daedalus reconciler.
|
||||
|
||||
### 7.6 `/library/api/ingest/` and `/library/api/jobs/…`
|
||||
Same owner-scope model as the workspace endpoints: every ingest write,
|
||||
job read, retry, and list filter against
|
||||
`Library.owner_username == request.user.username` (global libraries
|
||||
with null `owner_username` remain shared). Cross-user calls get 404
|
||||
with the same "not registered" wording as a genuinely missing
|
||||
workspace — existence is not disclosed across users. The list endpoint
|
||||
silently filters; a `library_uid` the caller has no access to returns
|
||||
an empty list rather than 404.
|
||||
|
||||
---
|
||||
|
||||
## 8. Daedalus lifecycle hooks
|
||||
|
||||
Unchanged from v1 §8 except the HTTP client now sends
|
||||
`Authorization: Bearer <UserToken-plaintext>` and Daedalus's config
|
||||
exposes one `UserToken` plaintext (one per Mnemosyne user the Daedalus
|
||||
instance acts on behalf of, in deployments that multiplex).
|
||||
|
||||
---
|
||||
|
||||
## 9. Operator workflows
|
||||
|
||||
### 9.1 Register a new Pallas deployment
|
||||
Unchanged from v1 §9.1.
|
||||
|
||||
### 9.2 Attach a Pallas team to a workspace
|
||||
Unchanged from v1 §9.2.
|
||||
|
||||
### 9.3 Retire a Pallas deployment
|
||||
Unchanged from v1 §9.3.
|
||||
|
||||
### 9.4 Rotate a compromised team JWT
|
||||
Unchanged from v1 §9.4.
|
||||
|
||||
### 9.5 Provision Mnemosyne integration on a fresh Daedalus instance
|
||||
Replaces v1 §9.5 (`provision_teams`) and the deleted
|
||||
`ensure_service_user` flow:
|
||||
|
||||
1. **Mint a `UserToken` for the Mnemosyne user** Daedalus will act as:
|
||||
`/profile/tokens/add/` (UI) or
|
||||
`python manage.py create_user_token --user <username> --name "Daedalus"`.
|
||||
Copy the plaintext (shown once).
|
||||
2. **Stage the plaintext in Daedalus's config** as the bearer for all
|
||||
Mnemosyne calls.
|
||||
3. **Run Daedalus's `provision_teams`** to materialize a `Team` row in
|
||||
Mnemosyne for every existing `PallasInstance`.
|
||||
4. **Distribute team JWTs** to each Pallas deployment as v1 §9.5
|
||||
describes.
|
||||
|
||||
### 9.6 Issue a `UserToken` for a third-party MCP client
|
||||
1. User logs in to Mnemosyne, navigates to `/profile/tokens/`, clicks
|
||||
"Generate API Token".
|
||||
2. (Optional) opens the "Restrictions (optional)" section to set
|
||||
`allowed_tools` / `allowed_libraries` — these apply only on
|
||||
`/mcp/`; for purely REST use they can stay empty.
|
||||
3. Plaintext is shown once on the response page.
|
||||
4. User pastes plaintext into the third-party client's config (Claude
|
||||
Desktop, Cline, etc.) with `Authorization: Bearer …`.
|
||||
|
||||
The same UI and command (`create_user_token`) mint tokens for any
|
||||
purpose — Daedalus, MCP clients, scripts, CI. There is no separate
|
||||
"DRF token" category.
|
||||
|
||||
---
|
||||
|
||||
## 10. UX changes in Daedalus
|
||||
|
||||
Unchanged from v1 §10.
|
||||
|
||||
---
|
||||
|
||||
## 11. Migration
|
||||
|
||||
### 11.1 State at the start of v2
|
||||
|
||||
* Mnemosyne is not in a production deployment; migrations are reset on
|
||||
schema changes and the project assumes a clean DB on the next
|
||||
release.
|
||||
* Daedalus has already migrated to `Authorization: Bearer <plaintext>`
|
||||
and is configured to use a per-user token; the v1 DRF-token shim is
|
||||
no longer used at runtime.
|
||||
* No live Pallas deployments authenticate via per-turn JWT (the path
|
||||
is removed).
|
||||
|
||||
### 11.2 Order of operations
|
||||
|
||||
1. **Mnemosyne v2 deploys.** New `UserTokenAuthentication`, owner-scoped
|
||||
REST endpoints, retired per-turn JWT validation, removed
|
||||
`authtoken` app. Operator mints a `UserToken` for Daedalus's
|
||||
Mnemosyne account before deploy.
|
||||
2. **Daedalus's config swap.** Operator points Daedalus at the new
|
||||
`UserToken` plaintext. (If Daedalus was still sending
|
||||
`Authorization: Token …`, switch to `Authorization: Bearer …` at
|
||||
the same time.)
|
||||
3. **Existing Teams.** None expected at the v2 cutover (migrations are
|
||||
reset). If any existed, `Team.owner` would need backfill; not in
|
||||
scope.
|
||||
|
||||
### 11.3 Rollback
|
||||
Mnemosyne v2 is a coordinated cutover with Daedalus's bearer-header
|
||||
swap. Rolling Mnemosyne back to v1 without rolling Daedalus back too
|
||||
means Daedalus's `Authorization: Bearer …` won't be recognised on
|
||||
`/library/api/*` (v1 only accepted `Token`). Plan the deploy as a
|
||||
single window.
|
||||
|
||||
---
|
||||
|
||||
## 12. Deprecated / removed in v2
|
||||
|
||||
### Mnemosyne
|
||||
* `rest_framework.authtoken` (removed from `INSTALLED_APPS`).
|
||||
Generated migration drops the `authtoken_token` table on next migrate;
|
||||
on a reset schema there's nothing to drop.
|
||||
* `rest_framework.authentication.TokenAuthentication` and
|
||||
`BasicAuthentication` (removed from
|
||||
`REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"]`).
|
||||
* "API Token" card on `/profile/settings/` (removed). The whole
|
||||
`api_token_regenerate` view + URL are gone.
|
||||
* `mcp_server.management.commands.ensure_service_user` (deleted).
|
||||
* `daedalus-service` user (no longer provisioned by Mnemosyne; no
|
||||
longer assumed by any endpoint).
|
||||
* `MCP_JWT_SERVICE_USERNAME` setting (no longer read by
|
||||
`_resolve_jwt_actor`).
|
||||
* Per-turn JWT path in
|
||||
[`mcp_server/auth.py`](../mnemosyne/mcp_server/auth.py) — accepted
|
||||
shapes shrink to `typ=team` only. `_JTI_CACHE` is now exercised by
|
||||
no live path; scheduled for cleanup.
|
||||
* `MCPToken` (renamed to `UserToken`); `MCPTokenManager`,
|
||||
`MCPTokenAdmin`, `MCPTokenCreateForm`, `MCPTokenEditForm` (renamed
|
||||
in lockstep). The `mcp_…` masked-token prefix becomes `tok_…`.
|
||||
* `create_mcp_token` management command (renamed `create_user_token`).
|
||||
* `/profile/mcp-tokens/` URL prefix (renamed `/profile/tokens/`); URL
|
||||
names `mcp-token-*` (renamed `token-*`).
|
||||
|
||||
### Daedalus
|
||||
* `vault_mnemosyne_daedalus_service_password` (no longer needed; the
|
||||
service user is gone).
|
||||
* Any code path that distinguished DRF-`Token` from MCP-`Bearer` — one
|
||||
bearer header for everything now.
|
||||
|
||||
### Pallas
|
||||
No changes from v1.
|
||||
|
||||
---
|
||||
|
||||
## 13. Security
|
||||
|
||||
### 13.1 Token lifetimes
|
||||
* **`UserToken`**: until revoked (user) or `expires_at`. Rotation is
|
||||
manual via the `/profile/tokens/` dashboard.
|
||||
* **Team JWT**: 10 years. Revocation via `Team.active`,
|
||||
`Team.active_jti`, or key rotation.
|
||||
|
||||
### 13.2 Revocation levers
|
||||
1. `PUT /teams/{id}/workspaces/` with `[]` — team sees nothing, JWT
|
||||
still validates. Useful for pausing without redistributing tokens.
|
||||
2. `DELETE /teams/{id}/` — team inactive, all its JWTs rejected.
|
||||
3. `POST /teams/{id}/rotate/` — `active_jti` changes; leaked JWT
|
||||
stops working.
|
||||
4. **Revoke a `UserToken`** — `/profile/tokens/{id}/revoke/` flips
|
||||
`is_active=False`; immediate effect for both `/mcp/` and REST.
|
||||
5. `MCPSigningKey.retire()` — nuclear option for team JWTs.
|
||||
|
||||
### 13.3 At-rest protection
|
||||
* `UserToken.token_hash`: SHA-256 of plaintext; plaintext never
|
||||
stored.
|
||||
* `MCPSigningKey.secret_hex`: 256-bit hex secret stored in Mnemosyne
|
||||
DB only.
|
||||
* `PallasInstance.team_jwt_encrypted`: Fernet-encrypted by Daedalus.
|
||||
|
||||
### 13.4 Audit attribution
|
||||
Every authenticated request resolves to a real Mnemosyne user:
|
||||
|
||||
* Opaque `UserToken` → `token.user`.
|
||||
* Team JWT → `team.owner`.
|
||||
|
||||
Both flow through to usage accounting (`LLMUsage`, search metrics) and
|
||||
the audit log. The synthetic `daedalus-service` actor is gone; nothing
|
||||
in the audit trail is attributed to a non-user account.
|
||||
|
||||
Notable audit events:
|
||||
|
||||
* `team_create created team_id=… name=…` — fresh team registered.
|
||||
* `team_create idempotent_hit team_id=…` — same-owner re-POST.
|
||||
* `team_create owner_conflict team_id=… caller=…` — id collision.
|
||||
* `team_rotate team_id=… new_jti=…` — explicit rotation.
|
||||
* `team_rotate upserted_missing team_id=… owner=…` — rotate created a
|
||||
missing team on the fly. Useful drift signal: Daedalus and
|
||||
Mnemosyne fell out of sync on team provisioning.
|
||||
* `team_delete team_id=…` — soft-delete.
|
||||
|
||||
### 13.5 Isolation model
|
||||
Unchanged from v1 §13.5.
|
||||
|
||||
---
|
||||
|
||||
## 14. Testing
|
||||
|
||||
### 14.1 Mnemosyne test surface (relevant to v2)
|
||||
* `resolve_mcp_jwt` rejects `iss=daedalus` / non-`team` payloads.
|
||||
* `_resolve_jwt_actor` resolves to `team.owner`; rejects per-turn JWTs
|
||||
and inactive owners. See
|
||||
[`test_auth.py::ResolveJWTActorTest`](../mnemosyne/mcp_server/tests/test_auth.py).
|
||||
* `UserTokenAuthentication` issues 401 + `WWW-Authenticate: Bearer`
|
||||
for anonymous and rejected-token cases; 200 for valid bearer; stashes
|
||||
the `UserToken` on `request.auth`. See
|
||||
[`test_drf_auth.py`](../mnemosyne/mcp_server/tests/test_drf_auth.py).
|
||||
* `Team` endpoints scope by `owner`; cross-user GET/DELETE/PUT return
|
||||
404; same-id different-owner POST/rotate returns 409. `rotate`
|
||||
upserts a missing team owned by the caller. See
|
||||
[`test_teams_api.py`](../mnemosyne/mcp_server/tests/test_teams_api.py).
|
||||
* Ingest endpoints (`POST /library/api/ingest/`,
|
||||
`GET/POST /library/api/jobs/…`) scope by `Library.owner_username`.
|
||||
Cross-user writes/reads return 404; list silently filters. The
|
||||
Cypher-touching paths require Neo4j, so the scoping is exercised by
|
||||
the manual e2e plan in §14.3 rather than unit tests.
|
||||
* `UserToken` model: hash-at-rest, `tok_…` masked prefix,
|
||||
`allowed_libraries` round-trip. See
|
||||
[`test_token.py`](../mnemosyne/mcp_server/tests/test_token.py),
|
||||
[`test_models.py`](../mnemosyne/mcp_server/tests/test_models.py).
|
||||
|
||||
### 14.2 Daedalus test surface
|
||||
Unchanged from v1 §14.2 except:
|
||||
* HTTP client uses `Authorization: Bearer …` against every Mnemosyne
|
||||
endpoint.
|
||||
* Provisioning command depends on a configured `UserToken`, not the
|
||||
retired `daedalus-service` Basic-auth credential.
|
||||
|
||||
### 14.3 Integration
|
||||
* End-to-end: MCP client with `UserToken` → search scoped to
|
||||
`token.allowed_libraries`.
|
||||
* End-to-end: Pallas with team JWT → search scoped to team's attached
|
||||
workspaces.
|
||||
* End-to-end: Daedalus REST call with `UserToken` → workspace
|
||||
mutation succeeds only for the owning user; cross-user attempts get
|
||||
404.
|
||||
* End-to-end: ingest as one user, then a *different* user attempts
|
||||
`POST /library/api/ingest/`, `GET /jobs/{id}/`, `POST /jobs/{id}/retry/`
|
||||
and `GET /jobs/?library_uid=<theirs>` — first three return 404, the
|
||||
list returns an empty array.
|
||||
* End-to-end: anonymous REST call → 401 + `WWW-Authenticate: Bearer`.
|
||||
* End-to-end: `POST /mcp_server/api/teams/{fresh-uuid}/rotate/` on a
|
||||
team Mnemosyne has never seen → 200 + JWT, `Team` row created with
|
||||
`owner=request.user`. Second rotate on the same id → 200 with a
|
||||
fresh `active_jti`. Rotate on an id owned by a different user → 409.
|
||||
|
||||
---
|
||||
|
||||
## 15. Phased delivery
|
||||
|
||||
| # | Phase | Surface | Status |
|
||||
|---|---|---|---|
|
||||
| 1 | Design v1 | [`DAEDALUS_PALLAS_INTEGRATION_v1.md`](DAEDALUS_PALLAS_INTEGRATION_v1.md) | Superseded |
|
||||
| 2 | Mnemosyne core | `LibraryMembership`, `MCPToken`, `Team`, `TeamWorkspaceAssignment`, `/mcp_server/api/teams/`, team JWT mint | Implemented (v1) |
|
||||
| 3 | Pallas cleanup | Remove `_fastagent_patch.py` internals | Implemented (v1) |
|
||||
| 4 | Daedalus integration | Lifecycle hooks, reconciler, `provision_teams`, attached-teams UI | Implemented (v1) |
|
||||
| 5 | Per-user REST authorization | `Team.owner`, `Library.owner_username`, owner-scope on all Daedalus-facing endpoints, `_resolve_jwt_actor` → `team.owner` | Implemented (v2) |
|
||||
| 6 | Token consolidation | Rename `MCPToken` → `UserToken`, `UserTokenAuthentication` DRF class, drop `authtoken` + DRF Token UI, retire per-turn JWT, `Bearer`-first auth stack | Implemented (v2) |
|
||||
| 7 | Documentation | This file; updates to [`mnemosyne_integration.md`](mnemosyne_integration.md) and [`deploy.md`](deploy.md) | Implemented (v2) |
|
||||
|
||||
---
|
||||
|
||||
## 16. Open items (v2)
|
||||
|
||||
* `_JTI_CACHE` in [`auth.py`](../mnemosyne/mcp_server/auth.py) is dead
|
||||
code (the per-turn replay path is gone). Cleanup commit pending; not
|
||||
blocking.
|
||||
* `BasicAuthentication` is removed from the DRF default stack. If any
|
||||
internal tooling relied on it, that path is now broken and will need
|
||||
an explicit re-add to the relevant viewset's `authentication_classes`
|
||||
rather than the global default.
|
||||
|
||||
---
|
||||
|
||||
## 17. Cross-references
|
||||
|
||||
* Mnemosyne MCP auth: [`mnemosyne/mcp_server/auth.py`](../mnemosyne/mcp_server/auth.py).
|
||||
* Mnemosyne DRF auth class: [`mnemosyne/mcp_server/drf_auth.py`](../mnemosyne/mcp_server/drf_auth.py).
|
||||
* Mnemosyne token model: [`mnemosyne/mcp_server/models.py`](../mnemosyne/mcp_server/models.py) (`UserToken`).
|
||||
* Mnemosyne team REST: [`mnemosyne/mcp_server/api/teams.py`](../mnemosyne/mcp_server/api/teams.py).
|
||||
* Mnemosyne workspace REST: [`mnemosyne/library/api/workspaces.py`](../mnemosyne/library/api/workspaces.py).
|
||||
* Token self-service dashboard: [`mnemosyne/mcp_server/views.py`](../mnemosyne/mcp_server/views.py), [`urls.py`](../mnemosyne/mcp_server/urls.py).
|
||||
* `create_user_token` management command: [`mnemosyne/mcp_server/management/commands/create_user_token.py`](../mnemosyne/mcp_server/management/commands/create_user_token.py).
|
||||
* v1 design (superseded but kept for history): [`DAEDALUS_PALLAS_INTEGRATION_v1.md`](DAEDALUS_PALLAS_INTEGRATION_v1.md).
|
||||
22
docs/Makefile
Normal file
22
docs/Makefile
Normal file
@@ -0,0 +1,22 @@
|
||||
# Minimal Sphinx Makefile.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = _build
|
||||
|
||||
.PHONY: help clean html livehtml Makefile
|
||||
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILDDIR) $(SOURCEDIR)/reference/apps
|
||||
|
||||
html:
|
||||
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
livehtml:
|
||||
sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)
|
||||
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
@@ -61,6 +61,22 @@ POST http://pan.helu.ca:8400/v1/rerank
|
||||
}
|
||||
```
|
||||
|
||||
> **`LLMApi.base_url` convention.** Every Mnemosyne service client
|
||||
> (`EmbeddingClient`, `RerankerClient`, `vision.py`, `concepts.py`)
|
||||
> treats `base_url` as the **OpenAI-style `/v1` root** and appends a
|
||||
> path-only segment: `/embeddings`, `/rerank`, `/chat/completions`.
|
||||
> So a single `LLMApi` row with `base_url=http://pan.helu.ca:8400/v1`
|
||||
> serves both the embedding and the reranker endpoints — no per-purpose
|
||||
> duplication needed.
|
||||
>
|
||||
> Get this wrong (e.g. set `base_url=http://pan.helu.ca:8400` with no
|
||||
> `/v1`, or have a client prepend `/v1` locally) and you get a
|
||||
> double-prefixed URL like `…/v1/v1/rerank` that 404s silently —
|
||||
> `SearchService._rerank` catches the exception, the UI shows
|
||||
> "Re-rank: Skipped", and the search falls back to raw RRF order.
|
||||
> Check `results.reranker_skip_reason` on the search page for the
|
||||
> specific error.
|
||||
|
||||
## Deliverables
|
||||
|
||||
### 1. Search Service (`library/services/search.py`)
|
||||
|
||||
807
docs/Pattern_SSO-Allauth-Casdoor_V1-02.md
Normal file
807
docs/Pattern_SSO-Allauth-Casdoor_V1-02.md
Normal file
@@ -0,0 +1,807 @@
|
||||
# SSO with Allauth & Casdoor Pattern v1.02
|
||||
|
||||
Standardizes OIDC-based Single Sign-On using Django Allauth and Casdoor, covering adapter customization, user provisioning, group mapping, superuser protection, and configurable local-login fallback. Used by the `core` Django application.
|
||||
|
||||
## 🐾 Red Panda Approval™
|
||||
|
||||
This pattern follows Red Panda Approval standards.
|
||||
|
||||
---
|
||||
|
||||
## Why a Pattern, Not a Shared Implementation
|
||||
|
||||
Every Django project that adopts SSO has different identity-provider configurations, claim schemas, permission models, and organizational structures:
|
||||
|
||||
- A **project management** app needs role claims mapped to project-scoped permissions
|
||||
- An **e-commerce** app needs tenant/store claims with purchase-limit groups
|
||||
- An **RFP tool** (Spelunker) needs organization + group claims mapped to View Only / Staff / SME / Admin groups
|
||||
|
||||
Instead, this pattern defines:
|
||||
|
||||
- **Required components** — every implementation must have
|
||||
- **Required settings** — Django & Allauth configuration values
|
||||
- **Standard conventions** — group names, claim mappings, redirect URL format
|
||||
- **Extension guidelines** — for domain-specific provisioning logic
|
||||
|
||||
---
|
||||
|
||||
## Required Components
|
||||
|
||||
Every SSO implementation following this pattern must provide these files:
|
||||
|
||||
| Component | Location | Purpose |
|
||||
|-----------|----------|---------|
|
||||
| Social account adapter | `<app>/adapters.py` | User provisioning, group mapping, superuser protection |
|
||||
| Local account adapter | `<app>/adapters.py` | Disable local signup, authentication logging |
|
||||
| Management command | `<app>/management/commands/create_sso_groups.py` | Idempotent group + permission creation |
|
||||
| Login template | `templates/account/login.html` | SSO button + conditional local login form |
|
||||
| SSO signup template | `templates/socialaccount/signup.html` | Email confirmation step for first-time SSO users |
|
||||
| Context processor | `<app>/context_processors.py` | Expose `CASDOOR_ENABLED` / `ALLOW_LOCAL_LOGIN` to templates |
|
||||
| SSL patch (optional) | `<app>/ssl_patch.py` | Development-only SSL bypass |
|
||||
|
||||
### Minimum settings.py configuration
|
||||
|
||||
```python
|
||||
# INSTALLED_APPS — required entries
|
||||
INSTALLED_APPS = [
|
||||
# ... standard Django apps ...
|
||||
'allauth',
|
||||
'allauth.account',
|
||||
'allauth.socialaccount',
|
||||
'allauth.socialaccount.providers.openid_connect',
|
||||
'<your_app>',
|
||||
]
|
||||
|
||||
# MIDDLEWARE — Allauth middleware is required
|
||||
MIDDLEWARE = [
|
||||
# ... standard Django middleware ...
|
||||
'allauth.account.middleware.AccountMiddleware',
|
||||
]
|
||||
|
||||
# AUTHENTICATION_BACKENDS — both local and SSO
|
||||
AUTHENTICATION_BACKENDS = [
|
||||
'django.contrib.auth.backends.ModelBackend',
|
||||
'allauth.account.auth_backends.AuthenticationBackend',
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Standard Values / Conventions
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Every deployment must set these environment variables (or `.env` entries):
|
||||
|
||||
| Variable | Required | Default | Description |
|
||||
|----------|----------|---------|-------------|
|
||||
| `CASDOOR_ENABLED` | Yes | — | Enable/disable SSO (`true`/`false`) |
|
||||
| `CASDOOR_ORIGIN` | Yes | — | Casdoor backend URL for OIDC discovery |
|
||||
| `CASDOOR_ORIGIN_FRONTEND` | Yes | — | Casdoor frontend URL (may differ behind reverse proxy) |
|
||||
| `CASDOOR_CLIENT_ID` | Yes | — | OAuth client ID from Casdoor application |
|
||||
| `CASDOOR_CLIENT_SECRET` | Yes | — | OAuth client secret from Casdoor application |
|
||||
| `CASDOOR_ORG_NAME` | Yes | — | Default organization slug in Casdoor |
|
||||
| `ALLOW_LOCAL_LOGIN` | No | `false` | Show local login form for non-superusers |
|
||||
| `CASDOOR_SSL_VERIFY` | No | `true` | SSL verification (`true`, `false`, or CA-bundle path) |
|
||||
|
||||
### Redirect URL Convention
|
||||
|
||||
The Allauth OIDC callback URL follows a fixed format. Register this URL in Casdoor:
|
||||
|
||||
```
|
||||
/accounts/oidc/<provider_id>/login/callback/
|
||||
```
|
||||
|
||||
For Spelunker with `provider_id = casdoor`:
|
||||
|
||||
```
|
||||
/accounts/oidc/casdoor/login/callback/
|
||||
```
|
||||
|
||||
> **Important:** The path segment is `oidc`, not `openid_connect`.
|
||||
|
||||
### Standard Group Mapping
|
||||
|
||||
Casdoor group names map to Django groups with consistent naming:
|
||||
|
||||
| Casdoor Group | Django Group | `is_staff` | Permissions |
|
||||
|---------------|-------------|------------|-------------|
|
||||
| `view_only` | `View Only` | `False` | `view_*` |
|
||||
| `staff` | `Staff` | `True` | `view_*`, `add_*`, `change_*` |
|
||||
| `sme` | `SME` | `True` | `view_*`, `add_*`, `change_*` |
|
||||
| `admin` | `Admin` | `True` | `view_*`, `add_*`, `change_*`, `delete_*` |
|
||||
|
||||
### Standard OIDC Claim Mapping
|
||||
|
||||
| Casdoor Claim | Django Field | Notes |
|
||||
|---------------|-------------|-------|
|
||||
| `email` | `User.username`, `User.email` | Full email used as username |
|
||||
| `given_name` | `User.first_name` | — |
|
||||
| `family_name` | `User.last_name` | — |
|
||||
| `name` | Parsed into first/last | Fallback when given/family absent |
|
||||
| `organization` | Organization lookup/create | Via adapter |
|
||||
| `groups` | Django Group membership | Via adapter mapping |
|
||||
|
||||
---
|
||||
|
||||
## Recommended Settings
|
||||
|
||||
Most implementations should include these Allauth settings:
|
||||
|
||||
```python
|
||||
# Authentication mode
|
||||
ACCOUNT_LOGIN_METHODS = {'email'}
|
||||
ACCOUNT_SIGNUP_FIELDS = ['email*', 'password1*', 'password2*']
|
||||
ACCOUNT_EMAIL_VERIFICATION = 'optional'
|
||||
ACCOUNT_SESSION_REMEMBER = True
|
||||
ACCOUNT_LOGIN_ON_PASSWORD_RESET = True
|
||||
ACCOUNT_UNIQUE_EMAIL = True
|
||||
|
||||
# Redirects
|
||||
LOGIN_REDIRECT_URL = '/dashboard/'
|
||||
ACCOUNT_LOGOUT_REDIRECT_URL = '/'
|
||||
LOGIN_URL = '/accounts/login/'
|
||||
|
||||
# Social account behavior
|
||||
SOCIALACCOUNT_AUTO_SIGNUP = True
|
||||
SOCIALACCOUNT_EMAIL_VERIFICATION = 'none'
|
||||
SOCIALACCOUNT_QUERY_EMAIL = True
|
||||
SOCIALACCOUNT_STORE_TOKENS = True
|
||||
SOCIALACCOUNT_ADAPTER = '<app>.adapters.CasdoorAccountAdapter'
|
||||
ACCOUNT_ADAPTER = '<app>.adapters.LocalAccountAdapter'
|
||||
|
||||
# Session management
|
||||
SESSION_COOKIE_AGE = 28800 # 8 hours
|
||||
SESSION_SAVE_EVERY_REQUEST = True
|
||||
|
||||
# Account linking — auto-connect SSO to an existing local account with
|
||||
# the same verified email instead of raising a conflict error
|
||||
SOCIALACCOUNT_EMAIL_AUTHENTICATION_AUTO_CONNECT = True
|
||||
```
|
||||
|
||||
### Multi-Factor Authentication (Recommended)
|
||||
|
||||
Add `allauth.mfa` for TOTP/WebAuthn second-factor support:
|
||||
|
||||
```python
|
||||
INSTALLED_APPS += ['allauth.mfa']
|
||||
MFA_ADAPTER = 'allauth.mfa.adapter.DefaultMFAAdapter'
|
||||
```
|
||||
|
||||
MFA is enforced per-user inside Django; Casdoor may also enforce its own MFA upstream.
|
||||
|
||||
### Rate Limiting on Local Login (Recommended)
|
||||
|
||||
Protect the local login form from brute-force attacks with `django-axes` or similar:
|
||||
|
||||
```python
|
||||
# pip install django-axes
|
||||
INSTALLED_APPS += ['axes']
|
||||
AUTHENTICATION_BACKENDS = [
|
||||
'axes.backends.AxesStandaloneBackend',
|
||||
'django.contrib.auth.backends.ModelBackend',
|
||||
'allauth.account.auth_backends.AuthenticationBackend',
|
||||
]
|
||||
AXES_FAILURE_LIMIT = 5 # Lock after 5 failures
|
||||
AXES_COOLOFF_TIME = 1 # 1-hour cooloff
|
||||
AXES_LOCKOUT_PARAMETERS = ['ip_address', 'username']
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Social Account Adapter
|
||||
|
||||
The social account adapter is the core of the pattern. It handles user provisioning on SSO login, maps claims to Django fields, enforces superuser protection, and assigns groups.
|
||||
|
||||
```python
|
||||
from allauth.socialaccount.adapter import DefaultSocialAccountAdapter
|
||||
from allauth.core.exceptions import ImmediateHttpResponse
|
||||
from django.contrib.auth.models import User, Group
|
||||
from django.contrib import messages
|
||||
from django.shortcuts import redirect
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CasdoorAccountAdapter(DefaultSocialAccountAdapter):
|
||||
|
||||
def is_open_for_signup(self, request, sociallogin):
|
||||
"""Always allow SSO-initiated signup."""
|
||||
return True
|
||||
|
||||
def pre_social_login(self, request, sociallogin):
|
||||
"""
|
||||
Runs on every SSO login (new and returning users).
|
||||
|
||||
1. Blocks superusers — they must use local auth.
|
||||
2. Re-syncs organization and group claims for returning users
|
||||
so that IdP changes are reflected immediately.
|
||||
"""
|
||||
if sociallogin.user.id:
|
||||
user = sociallogin.user
|
||||
|
||||
# --- Superuser gate ---
|
||||
if user.is_superuser:
|
||||
logger.warning(
|
||||
f"SSO login blocked for superuser {user.username}. "
|
||||
"Superusers must use local authentication."
|
||||
)
|
||||
messages.error(
|
||||
request,
|
||||
"Superuser accounts must use local authentication."
|
||||
)
|
||||
raise ImmediateHttpResponse(redirect('account_login'))
|
||||
|
||||
# --- Re-sync claims for returning users ---
|
||||
extra_data = sociallogin.account.extra_data
|
||||
|
||||
org_identifier = extra_data.get('organization', '')
|
||||
if org_identifier:
|
||||
self._assign_organization(user, org_identifier)
|
||||
|
||||
groups = extra_data.get('groups', [])
|
||||
self._assign_groups(user, groups)
|
||||
|
||||
user.is_staff = any(
|
||||
g in ['staff', 'sme', 'admin'] for g in groups
|
||||
)
|
||||
user.save(update_fields=['is_staff'])
|
||||
|
||||
def populate_user(self, request, sociallogin, data):
|
||||
"""Map Casdoor claims to Django User fields."""
|
||||
user = super().populate_user(request, sociallogin, data)
|
||||
|
||||
email = data.get('email', '')
|
||||
user.username = email
|
||||
user.email = email
|
||||
|
||||
user.first_name = data.get('given_name', '')
|
||||
user.last_name = data.get('family_name', '')
|
||||
|
||||
# Fallback: parse full 'name' claim
|
||||
if not user.first_name and not user.last_name:
|
||||
full_name = data.get('name', '')
|
||||
if full_name:
|
||||
parts = full_name.split(' ', 1)
|
||||
user.first_name = parts[0]
|
||||
user.last_name = parts[1] if len(parts) > 1 else ''
|
||||
|
||||
# Security: SSO users are never superusers
|
||||
user.is_superuser = False
|
||||
|
||||
# Set is_staff from group membership
|
||||
groups = data.get('groups', [])
|
||||
user.is_staff = any(g in ['staff', 'sme', 'admin'] for g in groups)
|
||||
|
||||
return user
|
||||
|
||||
def save_user(self, request, sociallogin, form=None):
|
||||
"""Save user and handle organization + group mapping."""
|
||||
user = super().save_user(request, sociallogin, form)
|
||||
extra_data = sociallogin.account.extra_data
|
||||
|
||||
org_identifier = extra_data.get('organization', '')
|
||||
if org_identifier:
|
||||
self._assign_organization(user, org_identifier)
|
||||
|
||||
groups = extra_data.get('groups', [])
|
||||
self._assign_groups(user, groups)
|
||||
return user
|
||||
|
||||
def _assign_organization(self, user, org_identifier):
|
||||
"""Assign (or create) organization from the OIDC claim."""
|
||||
# Domain-specific — see Extension Examples below
|
||||
raise NotImplementedError("Override per project")
|
||||
|
||||
def _assign_groups(self, user, group_names):
|
||||
"""Map Casdoor groups to Django groups."""
|
||||
group_mapping = {
|
||||
'view_only': 'View Only',
|
||||
'staff': 'Staff',
|
||||
'sme': 'SME',
|
||||
'admin': 'Admin',
|
||||
}
|
||||
user.groups.clear()
|
||||
for casdoor_group in group_names:
|
||||
django_group_name = group_mapping.get(casdoor_group.lower())
|
||||
if django_group_name:
|
||||
group, _ = Group.objects.get_or_create(name=django_group_name)
|
||||
user.groups.add(group)
|
||||
logger.info(f"Added {user.username} to group {django_group_name}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Local Account Adapter
|
||||
|
||||
Prevents local registration and logs authentication failures:
|
||||
|
||||
```python
|
||||
from allauth.account.adapter import DefaultAccountAdapter
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LocalAccountAdapter(DefaultAccountAdapter):
|
||||
|
||||
def is_open_for_signup(self, request):
|
||||
"""Disable local signup — all users come via SSO or admin."""
|
||||
return False
|
||||
|
||||
def authentication_failed(self, request, **kwargs):
|
||||
"""Log failures for security monitoring."""
|
||||
logger.warning(
|
||||
f"Local authentication failed from {request.META.get('REMOTE_ADDR')}"
|
||||
)
|
||||
super().authentication_failed(request, **kwargs)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## OIDC Provider Configuration
|
||||
|
||||
Register Casdoor as an OpenID Connect provider in `settings.py`:
|
||||
|
||||
```python
|
||||
SOCIALACCOUNT_PROVIDERS = {
|
||||
'openid_connect': {
|
||||
'APPS': [
|
||||
{
|
||||
'provider_id': 'casdoor',
|
||||
'name': 'Casdoor SSO',
|
||||
'client_id': CASDOOR_CLIENT_ID,
|
||||
'secret': CASDOOR_CLIENT_SECRET,
|
||||
'settings': {
|
||||
'server_url': f'{CASDOOR_ORIGIN}/.well-known/openid-configuration',
|
||||
},
|
||||
}
|
||||
],
|
||||
'OAUTH_PKCE_ENABLED': True,
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Management Command — Group Creation
|
||||
|
||||
An idempotent management command ensures groups and permissions exist:
|
||||
|
||||
```python
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.contrib.auth.models import Group, Permission
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Create Django groups for Casdoor SSO integration'
|
||||
|
||||
def handle(self, *args, **options):
|
||||
groups_config = {
|
||||
'View Only': {'permissions': ['view']},
|
||||
'Staff': {'permissions': ['view', 'add', 'change']},
|
||||
'SME': {'permissions': ['view', 'add', 'change']},
|
||||
'Admin': {'permissions': ['view', 'add', 'change', 'delete']},
|
||||
}
|
||||
|
||||
# Add your domain-specific model names here
|
||||
models_to_permission = [
|
||||
'vendor', 'document', 'rfp', 'rfpquestion',
|
||||
]
|
||||
|
||||
for group_name, config in groups_config.items():
|
||||
group, created = Group.objects.get_or_create(name=group_name)
|
||||
status = 'Created' if created else 'Exists'
|
||||
self.stdout.write(f'{status}: {group_name}')
|
||||
|
||||
for perm_prefix in config['permissions']:
|
||||
for model in models_to_permission:
|
||||
try:
|
||||
perm = Permission.objects.get(
|
||||
codename=f'{perm_prefix}_{model}'
|
||||
)
|
||||
group.permissions.add(perm)
|
||||
except Permission.DoesNotExist:
|
||||
pass
|
||||
|
||||
self.stdout.write(self.style.SUCCESS('SSO groups created successfully'))
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Login Template
|
||||
|
||||
The login template shows an SSO button when Casdoor is enabled and conditionally reveals the local login form:
|
||||
|
||||
```html
|
||||
{% load socialaccount %}
|
||||
|
||||
<!-- SSO Login Button (POST form for CSRF protection) -->
|
||||
{% if CASDOOR_ENABLED %}
|
||||
<form method="post" action="{% provider_login_url 'casdoor' %}">
|
||||
{% csrf_token %}
|
||||
<button type="submit">Sign in with SSO</button>
|
||||
</form>
|
||||
{% endif %}
|
||||
|
||||
<!-- Local Login Form (conditional) -->
|
||||
{% if ALLOW_LOCAL_LOGIN or user.is_superuser %}
|
||||
<form method="post" action="{% url 'account_login' %}">
|
||||
{% csrf_token %}
|
||||
{{ form.as_p }}
|
||||
<button type="submit">Sign In Locally</button>
|
||||
</form>
|
||||
{% endif %}
|
||||
```
|
||||
|
||||
> **Why POST?** Using a `<a href>` GET link to initiate the OAuth flow skips CSRF
|
||||
> validation. Allauth's `{% provider_login_url %}` is designed for use inside a
|
||||
> `<form method="post">` so the CSRF token is verified before the redirect.
|
||||
|
||||
---
|
||||
|
||||
## SSO Signup Template
|
||||
|
||||
When a new SSO user has no existing account, allauth redirects them to `accounts/3rdparty/signup/` to confirm their email before the account is created. Without a custom template this page renders with no styling.
|
||||
|
||||
Create `templates/socialaccount/signup.html` extending the project base:
|
||||
|
||||
```html
|
||||
{% extends "<app>/base.html" %}
|
||||
|
||||
{% block title %}Complete Sign Up — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-center items-center min-h-[60vh]">
|
||||
<div class="card bg-base-200 shadow-xl w-full max-w-md">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title text-2xl justify-center mb-2">Complete Sign Up</h2>
|
||||
<p class="text-center text-base-content/70 mb-4">
|
||||
Confirm your email address to finish signing in with SSO.
|
||||
</p>
|
||||
|
||||
{% if form.errors %}
|
||||
<div class="alert alert-error mb-4">
|
||||
<span>Please correct the errors below.</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<form method="post" action="{{ action_url }}">
|
||||
{% csrf_token %}
|
||||
|
||||
<div class="form-control mb-6">
|
||||
<label class="label" for="id_email">
|
||||
<span class="label-text">Email</span>
|
||||
</label>
|
||||
<input type="email" name="email" id="id_email"
|
||||
class="input input-bordered w-full{% if form.email.errors %} input-error{% endif %}"
|
||||
value="{{ form.email.value|default:'' }}"
|
||||
autocomplete="email" required>
|
||||
{% if form.email.errors %}
|
||||
<label class="label">
|
||||
<span class="label-text-alt text-error">{{ form.email.errors|join:", " }}</span>
|
||||
</label>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="form-control mt-2">
|
||||
<button type="submit" class="btn btn-primary w-full">Complete Sign Up</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
```
|
||||
|
||||
Key context variables allauth provides to this template:
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `form` | `SignupForm` with a single `email` field pre-populated from the OIDC claim |
|
||||
| `action_url` | POST target (`/accounts/3rdparty/signup/`) — always use this, not a hard-coded path |
|
||||
| `sociallogin` | The in-progress social login object (rarely needed in the template) |
|
||||
|
||||
> **Why this page exists:** `SOCIALACCOUNT_AUTO_SIGNUP = True` skips it when the IdP provides a valid email. It only appears when allauth cannot confirm the email (e.g. the IdP omitted it or there is a conflict with an existing account).
|
||||
|
||||
---
|
||||
|
||||
## Context Processor
|
||||
|
||||
Exposes SSO settings to every template:
|
||||
|
||||
```python
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
def user_preferences(request):
|
||||
context = {}
|
||||
|
||||
# Always expose SSO flags for the login page
|
||||
context['CASDOOR_ENABLED'] = getattr(settings, 'CASDOOR_ENABLED', False)
|
||||
context['ALLOW_LOCAL_LOGIN'] = getattr(settings, 'ALLOW_LOCAL_LOGIN', False)
|
||||
|
||||
return context
|
||||
```
|
||||
|
||||
Register in `settings.py`:
|
||||
|
||||
```python
|
||||
TEMPLATES = [{
|
||||
'OPTIONS': {
|
||||
'context_processors': [
|
||||
# ... standard processors ...
|
||||
'<app>.context_processors.user_preferences',
|
||||
],
|
||||
},
|
||||
}]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## SSL Bypass (Development Only)
|
||||
|
||||
For sandbox environments with self-signed certificates, an optional SSL patch disables verification at the `requests` library level:
|
||||
|
||||
```python
|
||||
import os, logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def apply_ssl_bypass():
|
||||
ssl_verify = os.environ.get('CASDOOR_SSL_VERIFY', 'true').lower()
|
||||
if ssl_verify != 'false':
|
||||
return
|
||||
|
||||
logger.warning("SSL verification DISABLED — sandbox only")
|
||||
|
||||
import urllib3
|
||||
from requests.adapters import HTTPAdapter
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
_original_send = HTTPAdapter.send
|
||||
|
||||
def _patched_send(self, request, stream=False, timeout=None,
|
||||
verify=True, cert=None, proxies=None):
|
||||
return _original_send(self, request, stream=stream,
|
||||
timeout=timeout, verify=False,
|
||||
cert=cert, proxies=proxies)
|
||||
|
||||
HTTPAdapter.send = _patched_send
|
||||
|
||||
apply_ssl_bypass()
|
||||
```
|
||||
|
||||
Load it at the top of `settings.py` **before** any library imports that make HTTP calls:
|
||||
|
||||
```python
|
||||
_ssl_verify = os.environ.get('CASDOOR_SSL_VERIFY', 'true').lower()
|
||||
if _ssl_verify == 'false':
|
||||
import <app>.ssl_patch # noqa: F401
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Logout Flow
|
||||
|
||||
By default, Django's `account_logout` destroys the local session but does **not** terminate the upstream Casdoor session. The user remains logged in at the IdP and will be silently re-authenticated on next visit.
|
||||
|
||||
### Options
|
||||
|
||||
| Strategy | Behaviour | Implementation |
|
||||
|----------|-----------|----------------|
|
||||
| **Local-only logout** (default) | Destroys Django session; IdP session survives | No extra work |
|
||||
| **IdP redirect logout** | Redirects to Casdoor's `/api/logout` after local logout | Override `ACCOUNT_LOGOUT_REDIRECT_URL` to point at Casdoor |
|
||||
| **OIDC back-channel logout** | Casdoor notifies Django to invalidate sessions | Requires Casdoor back-channel support + a Django webhook endpoint |
|
||||
|
||||
### Recommended: IdP redirect logout
|
||||
|
||||
```python
|
||||
# settings.py
|
||||
ACCOUNT_LOGOUT_REDIRECT_URL = (
|
||||
f'{CASDOOR_ORIGIN}/api/logout'
|
||||
f'?post_logout_redirect_uri=https://your-app.example.com/'
|
||||
)
|
||||
```
|
||||
|
||||
This ensures the Casdoor session cookie is cleared before the user returns to your app.
|
||||
|
||||
---
|
||||
|
||||
## Domain Extension Examples
|
||||
|
||||
### Spelunker (RFP Tool)
|
||||
|
||||
Spelunker's adapter creates organizations on first encounter and links them to user profiles:
|
||||
|
||||
```python
|
||||
def _assign_organization(self, user, org_identifier):
|
||||
from django.db import models
|
||||
from django.utils.text import slugify
|
||||
from core.models import Organization
|
||||
|
||||
try:
|
||||
org = Organization.objects.filter(
|
||||
models.Q(slug=org_identifier) | models.Q(name=org_identifier)
|
||||
).first()
|
||||
|
||||
if not org:
|
||||
org = Organization.objects.create(
|
||||
name=org_identifier,
|
||||
slug=slugify(org_identifier),
|
||||
type='for-profit',
|
||||
legal_country='CA',
|
||||
status='active',
|
||||
)
|
||||
logger.info(f"Created organization: {org.name}")
|
||||
|
||||
if hasattr(user, 'profile'):
|
||||
logger.info(f"Assigned {user.username} → {org.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Organization assignment error: {e}")
|
||||
```
|
||||
|
||||
### Multi-Tenant SaaS App
|
||||
|
||||
A multi-tenant app might restrict users to a single tenant and enforce tenant isolation:
|
||||
|
||||
```python
|
||||
def _assign_organization(self, user, org_identifier):
|
||||
from tenants.models import Tenant
|
||||
|
||||
tenant = Tenant.objects.filter(external_id=org_identifier).first()
|
||||
if not tenant:
|
||||
raise ValueError(f"Unknown tenant: {org_identifier}")
|
||||
|
||||
user.tenant = tenant
|
||||
user.save(update_fields=['tenant'])
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
- ❌ Don't allow SSO to grant `is_superuser` — always force `is_superuser = False` in `populate_user`
|
||||
- ❌ Don't *log-and-continue* for superuser SSO attempts — raise `ImmediateHttpResponse` to actually block the login
|
||||
- ❌ Don't disable local login for superusers — they need emergency access when SSO is unavailable
|
||||
- ❌ Don't rely on SSO username claims — use email as the canonical identifier
|
||||
- ❌ Don't hard-code the OIDC provider URL — always read from environment variables
|
||||
- ❌ Don't skip the management command — groups and permissions must be idempotent and repeatable
|
||||
- ❌ Don't use `CASDOOR_SSL_VERIFY=false` in production — only for sandbox environments with self-signed certificates
|
||||
- ❌ Don't forget PKCE — always set `OAUTH_PKCE_ENABLED: True` for Authorization Code flow
|
||||
- ❌ Don't sync groups only on first login — re-sync in `pre_social_login` so IdP changes take effect immediately
|
||||
- ❌ Don't use a GET link (`<a href>`) to start the OAuth flow — use a POST form so CSRF protection applies
|
||||
- ❌ Don't assume Django logout kills the IdP session — configure an IdP redirect or back-channel logout
|
||||
- ❌ Don't leave the local login endpoint unprotected — add rate limiting (e.g. `django-axes`) to prevent brute-force attacks
|
||||
|
||||
---
|
||||
|
||||
## Settings
|
||||
|
||||
All Django settings this pattern recognizes:
|
||||
|
||||
```python
|
||||
# settings.py
|
||||
|
||||
# --- SSO Provider ---
|
||||
CASDOOR_ENABLED = env.bool('CASDOOR_ENABLED') # Master SSO toggle
|
||||
CASDOOR_ORIGIN = env('CASDOOR_ORIGIN') # OIDC discovery base URL
|
||||
CASDOOR_ORIGIN_FRONTEND = env('CASDOOR_ORIGIN_FRONTEND') # Frontend URL (may differ)
|
||||
CASDOOR_CLIENT_ID = env('CASDOOR_CLIENT_ID') # OAuth client ID
|
||||
CASDOOR_CLIENT_SECRET = env('CASDOOR_CLIENT_SECRET') # OAuth client secret
|
||||
CASDOOR_ORG_NAME = env('CASDOOR_ORG_NAME') # Default organization
|
||||
# CASDOOR_SSL_VERIFY is NOT a Django setting — it is read directly from the
|
||||
# environment at the top of settings.py (before any imports) to apply SSL
|
||||
# bypass via ssl_patch.py or set REQUESTS_CA_BUNDLE for a custom CA.
|
||||
# See the SSL Bypass section above for the correct implementation.
|
||||
|
||||
# --- Login Behavior ---
|
||||
ALLOW_LOCAL_LOGIN = env.bool('ALLOW_LOCAL_LOGIN', default=False) # Show local form
|
||||
|
||||
# --- Allauth ---
|
||||
SOCIALACCOUNT_ADAPTER = '<app>.adapters.CasdoorAccountAdapter'
|
||||
ACCOUNT_ADAPTER = '<app>.adapters.LocalAccountAdapter'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
Standard test cases every implementation should cover:
|
||||
|
||||
```python
|
||||
from django.test import TestCase, override_settings
|
||||
from unittest.mock import MagicMock
|
||||
from django.contrib.auth.models import User, Group
|
||||
from <app>.adapters import CasdoorAccountAdapter, LocalAccountAdapter
|
||||
|
||||
|
||||
class CasdoorAdapterTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.adapter = CasdoorAccountAdapter()
|
||||
|
||||
def test_signup_always_open(self):
|
||||
"""SSO signup must always be permitted."""
|
||||
self.assertTrue(self.adapter.is_open_for_signup(MagicMock(), MagicMock()))
|
||||
|
||||
def test_superuser_never_set_via_sso(self):
|
||||
"""populate_user must force is_superuser=False."""
|
||||
sociallogin = MagicMock()
|
||||
data = {'email': 'admin@example.com', 'groups': ['admin']}
|
||||
user = self.adapter.populate_user(MagicMock(), sociallogin, data)
|
||||
self.assertFalse(user.is_superuser)
|
||||
|
||||
def test_email_used_as_username(self):
|
||||
"""Username must be the full email address."""
|
||||
sociallogin = MagicMock()
|
||||
data = {'email': 'jane@example.com'}
|
||||
user = self.adapter.populate_user(MagicMock(), sociallogin, data)
|
||||
self.assertEqual(user.username, 'jane@example.com')
|
||||
|
||||
def test_staff_flag_from_groups(self):
|
||||
"""is_staff must be True when user belongs to staff/sme/admin."""
|
||||
sociallogin = MagicMock()
|
||||
for group in ['staff', 'sme', 'admin']:
|
||||
data = {'email': 'user@example.com', 'groups': [group]}
|
||||
user = self.adapter.populate_user(MagicMock(), sociallogin, data)
|
||||
self.assertTrue(user.is_staff, f"is_staff should be True for group '{group}'")
|
||||
|
||||
def test_name_fallback_parsing(self):
|
||||
"""When given_name/family_name absent, parse 'name' claim."""
|
||||
sociallogin = MagicMock()
|
||||
data = {'email': 'user@example.com', 'name': 'Jane Doe'}
|
||||
user = self.adapter.populate_user(MagicMock(), sociallogin, data)
|
||||
self.assertEqual(user.first_name, 'Jane')
|
||||
self.assertEqual(user.last_name, 'Doe')
|
||||
|
||||
def test_group_mapping(self):
|
||||
"""Casdoor groups must map to correctly named Django groups."""
|
||||
Group.objects.create(name='View Only')
|
||||
Group.objects.create(name='Staff')
|
||||
user = User.objects.create_user('test@example.com', 'test@example.com')
|
||||
self.adapter._assign_groups(user, ['view_only', 'staff'])
|
||||
group_names = set(user.groups.values_list('name', flat=True))
|
||||
self.assertEqual(group_names, {'View Only', 'Staff'})
|
||||
|
||||
def test_superuser_sso_login_blocked(self):
|
||||
"""pre_social_login must raise ImmediateHttpResponse for superusers."""
|
||||
from allauth.core.exceptions import ImmediateHttpResponse
|
||||
user = User.objects.create_superuser(
|
||||
'admin@example.com', 'admin@example.com', 'pass'
|
||||
)
|
||||
sociallogin = MagicMock()
|
||||
sociallogin.user = user
|
||||
sociallogin.user.id = user.id
|
||||
with self.assertRaises(ImmediateHttpResponse):
|
||||
self.adapter.pre_social_login(MagicMock(), sociallogin)
|
||||
|
||||
def test_groups_resync_on_returning_login(self):
|
||||
"""pre_social_login must re-sync groups for existing users."""
|
||||
Group.objects.create(name='Admin')
|
||||
Group.objects.create(name='Staff')
|
||||
user = User.objects.create_user('user@example.com', 'user@example.com')
|
||||
user.groups.add(Group.objects.get(name='Staff'))
|
||||
|
||||
sociallogin = MagicMock()
|
||||
sociallogin.user = user
|
||||
sociallogin.user.id = user.id
|
||||
sociallogin.account.extra_data = {
|
||||
'groups': ['admin'],
|
||||
'organization': '',
|
||||
}
|
||||
self.adapter.pre_social_login(MagicMock(), sociallogin)
|
||||
group_names = set(user.groups.values_list('name', flat=True))
|
||||
self.assertEqual(group_names, {'Admin'})
|
||||
|
||||
|
||||
class LocalAdapterTest(TestCase):
|
||||
|
||||
def test_local_signup_disabled(self):
|
||||
"""Local signup must always be disabled."""
|
||||
adapter = LocalAccountAdapter()
|
||||
self.assertFalse(adapter.is_open_for_signup(MagicMock()))
|
||||
```
|
||||
521
docs/Pattern_Sphinx-Documentation_V1-00.md
Normal file
521
docs/Pattern_Sphinx-Documentation_V1-00.md
Normal file
@@ -0,0 +1,521 @@
|
||||
# Sphinx Documentation Pattern v1.0.0
|
||||
|
||||
Standardizes how Django projects build, configure, and deploy Sphinx documentation under a single `settings.py` — using the `TESTING` env-var flag to relax required-secret checks so docs build cleanly in CI without a real `.env`.
|
||||
|
||||
## 🐾 Red Panda Approval™
|
||||
|
||||
This pattern follows Red Panda Approval standards.
|
||||
|
||||
---
|
||||
|
||||
## Why a Pattern, Not a Shared Implementation
|
||||
|
||||
Every Django project has its own:
|
||||
|
||||
- **Required env vars** — one project needs `MCP_JWT_SECRET`, another needs `SLACK_TOKEN`, a third needs neither.
|
||||
- **App layout** — `apps/` vs. top-level packages; some projects ship one app, others fifteen.
|
||||
- **Autodoc-poisoning attributes** — DRF projects have class-level `queryset = Model.objects.filter(...)`; pure-Django projects may not.
|
||||
- **Deploy target** — different hosts, ports, paths, and SSH key names per environment.
|
||||
|
||||
A shared library can't paper over those differences. Instead, this pattern defines:
|
||||
|
||||
- **Required interface** — the four files every project must have.
|
||||
- **Recommended behaviours** — what most projects should include.
|
||||
- **Extension guidelines** — what to add or skip per project.
|
||||
- **Standard Sphinx extension set** — for consistency across projects.
|
||||
|
||||
---
|
||||
|
||||
## Required Interface
|
||||
|
||||
The non-negotiable minimum every Django project must provide.
|
||||
|
||||
### 1. `settings.py` — TESTING-gated safe defaults
|
||||
|
||||
Every required env var (those without a `default=`) must have a `TESTING`-mode fallback. Read `TESTING` **first**, then branch every required `env('X')` call:
|
||||
|
||||
```python
|
||||
# Test mode flag — read first so it can relax required-env-var checks below.
|
||||
TESTING = env.bool('TESTING', default=False)
|
||||
|
||||
DEBUG = env.bool('DEBUG', default=False)
|
||||
|
||||
# In TESTING mode (unit tests, docs build) required keys fall back to safe
|
||||
# dummies so the settings module imports without a real .env. In production
|
||||
# they remain required — missing values fail loud.
|
||||
if TESTING:
|
||||
SECRET_KEY = env('SECRET_KEY', default='testing-insecure-key')
|
||||
ALLOWED_HOSTS = env.list('ALLOWED_HOSTS', default=['testserver', 'localhost', '127.0.0.1'])
|
||||
CSRF_TRUSTED_ORIGINS = env.list('CSRF_TRUSTED_ORIGINS', default=['http://localhost'])
|
||||
# ...any other required secrets get a 'testing-insecure-*' default here
|
||||
else:
|
||||
SECRET_KEY = env('SECRET_KEY')
|
||||
ALLOWED_HOSTS = env.list('ALLOWED_HOSTS')
|
||||
CSRF_TRUSTED_ORIGINS = env.list('CSRF_TRUSTED_ORIGINS')
|
||||
# ...and the production no-default form here
|
||||
```
|
||||
|
||||
Rule: **every** required env var read in `settings.py` (anything that uses `env('X')` without `default=`) gets paired branches like above. Production fails loud on missing; TESTING falls back.
|
||||
|
||||
### 2. Database choice gated on `TESTING`
|
||||
|
||||
```python
|
||||
if TESTING:
|
||||
# Test/docs build: in-memory SQLite. No real DB needed.
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': ':memory:',
|
||||
}
|
||||
}
|
||||
elif env('APP_DB_NAME', default=None):
|
||||
# Production: PostgreSQL (or whatever the project uses)
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.postgresql',
|
||||
'NAME': env('APP_DB_NAME'),
|
||||
'USER': env('APP_DB_USER'),
|
||||
'PASSWORD': env('APP_DB_PASSWORD'),
|
||||
'HOST': env('DB_HOST'),
|
||||
'PORT': env('DB_PORT'),
|
||||
}
|
||||
}
|
||||
else:
|
||||
# Local development: SQLite file
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': BASE_DIR / 'db.sqlite3',
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. `docs/source/conf.py` — boot Django in TESTING mode + neuter QuerySet repr
|
||||
|
||||
```python
|
||||
import os
|
||||
import sys
|
||||
|
||||
import django
|
||||
|
||||
# Adjust this path to point at your Django package directory.
|
||||
sys.path.insert(0, os.path.abspath('../../<project_package>'))
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', '<project_package>.settings')
|
||||
|
||||
# Load real .env if present (local dev). In CI there is none and that's fine.
|
||||
_repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||
_env_file = os.path.join(_repo_root, '.env')
|
||||
if os.path.exists(_env_file):
|
||||
with open(_env_file) as _f:
|
||||
for _line in _f:
|
||||
_line = _line.strip()
|
||||
if not _line or _line.startswith('#') or '=' not in _line:
|
||||
continue
|
||||
_key, _val = _line.split('=', 1)
|
||||
os.environ.setdefault(_key.strip(), _val.strip())
|
||||
|
||||
# Force TESTING mode so settings.py uses its safe dummy defaults and the
|
||||
# in-memory SQLite database. The docs build never serves traffic or touches
|
||||
# real data, so the production "fail loud on missing secret" contract does
|
||||
# not apply here.
|
||||
os.environ['TESTING'] = 'true'
|
||||
|
||||
django.setup()
|
||||
|
||||
# Sphinx 9 autodoc calls repr() on every class attribute it documents.
|
||||
# Django's QuerySet.__repr__ executes a SELECT against the database — which
|
||||
# documentation has no business doing. Intercept object_description so
|
||||
# QuerySet instances render as a static string instead.
|
||||
from django.db.models.query import QuerySet # noqa: E402
|
||||
import sphinx.util.inspect as _sphinx_inspect # noqa: E402
|
||||
|
||||
_orig_object_description = _sphinx_inspect.object_description
|
||||
|
||||
|
||||
def _safe_object_description(obj, *args, **kwargs):
|
||||
if isinstance(obj, QuerySet):
|
||||
return f'<QuerySet [{obj.model.__name__}]>'
|
||||
return _orig_object_description(obj, *args, **kwargs)
|
||||
|
||||
|
||||
_sphinx_inspect.object_description = _safe_object_description
|
||||
|
||||
# ── Sphinx configuration below ────────────────────────────────────────────
|
||||
project = '<Project Name>'
|
||||
copyright = '<year>, <Project Team>'
|
||||
author = '<Project Team>'
|
||||
release = '1.0'
|
||||
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx.ext.napoleon',
|
||||
'sphinx.ext.intersphinx',
|
||||
'sphinx_autodoc_typehints',
|
||||
'sphinxcontrib.httpdomain',
|
||||
'sphinxcontrib.mermaid',
|
||||
'myst_parser',
|
||||
]
|
||||
|
||||
source_suffix = {'.rst': 'restructuredtext', '.md': 'markdown'}
|
||||
|
||||
myst_enable_extensions = ['colon_fence', 'deflist', 'tasklist', 'attrs_inline']
|
||||
myst_heading_anchors = 4
|
||||
|
||||
autodoc_default_options = {
|
||||
'members': True,
|
||||
'member-order': 'bysource',
|
||||
'special-members': '__init__',
|
||||
'undoc-members': True,
|
||||
'exclude-members': '__weakref__',
|
||||
}
|
||||
autodoc_inherit_docstrings = False
|
||||
napoleon_use_ivar = True
|
||||
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
html_static_path = ['_static']
|
||||
html_theme_options = {
|
||||
'navigation_depth': 4,
|
||||
'collapse_navigation': False,
|
||||
'sticky_navigation': True,
|
||||
'includehidden': True,
|
||||
'titles_only': False,
|
||||
}
|
||||
```
|
||||
|
||||
### 4. `.gitea/workflows/docs.yml` — build + failure-debug + deploy
|
||||
|
||||
The failure-debug trio (`continue-on-error` + log dump + explicit fail) is **required** — without it, the Sphinx `ValueError` traceback in `/tmp/sphinx-err-*.log` is invisible in the Gitea UI and the build is effectively undiagnosable.
|
||||
|
||||
```yaml
|
||||
name: Build & Deploy Docs
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '<project_package>/**'
|
||||
- 'docs/**'
|
||||
- 'pyproject.toml'
|
||||
- '.gitea/workflows/docs.yml'
|
||||
|
||||
jobs:
|
||||
build-and-deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install package + docs deps
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install -e ".[docs]"
|
||||
|
||||
- name: Read version from pyproject.toml
|
||||
id: version
|
||||
run: |
|
||||
VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
||||
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# ─── Failure-debug trio (REQUIRED) ─────────────────────────────────
|
||||
- name: Build HTML
|
||||
id: build_html
|
||||
run: |
|
||||
cd docs
|
||||
./regenerate_docs.sh
|
||||
continue-on-error: true
|
||||
|
||||
- name: Print Sphinx error log on failure
|
||||
if: steps.build_html.outcome == 'failure'
|
||||
run: |
|
||||
echo "=== Sphinx error log ==="
|
||||
cat /tmp/sphinx-err-*.log 2>/dev/null || echo "(no sphinx error log found)"
|
||||
|
||||
- name: Fail if build failed
|
||||
if: steps.build_html.outcome == 'failure'
|
||||
run: exit 1
|
||||
# ───────────────────────────────────────────────────────────────────
|
||||
|
||||
- name: Install rsync + openssh
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends rsync openssh-client
|
||||
|
||||
- name: Configure SSH
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
printf '%s\n' "${{ secrets.DOCS_DEPLOY_KEY }}" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
ssh-keyscan -p ${{ vars.DOCS_HOST_PORT }} ${{ vars.DOCS_HOST }} >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Test SSH connectivity
|
||||
run: |
|
||||
ssh -o BatchMode=yes -o ConnectTimeout=10 \
|
||||
-p ${{ vars.DOCS_HOST_PORT }} -i ~/.ssh/id_ed25519 \
|
||||
git@${{ vars.DOCS_HOST }} "id && echo 'SSH OK'"
|
||||
|
||||
- name: Rsync to versioned path
|
||||
run: |
|
||||
rsync -av --delete \
|
||||
-e "ssh -p ${{ vars.DOCS_HOST_PORT }} -i ~/.ssh/id_ed25519" \
|
||||
docs/_build/html/ \
|
||||
git@${{ vars.DOCS_HOST }}:/var/www/docs/<project_slug>/${{ steps.version.outputs.version }}/
|
||||
|
||||
- name: Rsync to latest
|
||||
run: |
|
||||
rsync -av --delete \
|
||||
-e "ssh -p ${{ vars.DOCS_HOST_PORT }} -i ~/.ssh/id_ed25519" \
|
||||
docs/_build/html/ \
|
||||
git@${{ vars.DOCS_HOST }}:/var/www/docs/<project_slug>/latest/
|
||||
|
||||
- name: Regenerate versions index
|
||||
run: |
|
||||
ssh -p ${{ vars.DOCS_HOST_PORT }} -i ~/.ssh/id_ed25519 git@${{ vars.DOCS_HOST }} \
|
||||
'python3 - <<PY
|
||||
import pathlib
|
||||
root = pathlib.Path("/var/www/docs/<project_slug>")
|
||||
versions = sorted(
|
||||
(p.name for p in root.iterdir() if p.is_dir()),
|
||||
reverse=True,
|
||||
)
|
||||
html = ["<!DOCTYPE html><html><head><title><Project> Docs</title></head><body>",
|
||||
"<h1><Project> Documentation</h1><ul>"]
|
||||
for v in versions:
|
||||
html.append(f"<li><a href=\"{v}/\">{v}</a></li>")
|
||||
html.append("</ul></body></html>")
|
||||
(root / "index.html").write_text("\n".join(html))
|
||||
PY'
|
||||
```
|
||||
|
||||
Required Gitea secrets/variables:
|
||||
|
||||
- `secrets.DOCS_DEPLOY_KEY` — SSH private key authorised on the deploy host.
|
||||
- `vars.DOCS_HOST` — deploy host (e.g. `docs.example.com`).
|
||||
- `vars.DOCS_HOST_PORT` — SSH port (typically `22`).
|
||||
|
||||
---
|
||||
|
||||
## Standard Sphinx Extensions
|
||||
|
||||
Use this exact extension set for consistency across projects:
|
||||
|
||||
```python
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc', # Pull docs from Python docstrings
|
||||
'sphinx.ext.viewcode', # "[source]" links to highlighted source
|
||||
'sphinx.ext.napoleon', # Google / NumPy style docstring support
|
||||
'sphinx.ext.intersphinx', # Cross-link to other projects' Sphinx docs
|
||||
'sphinx_autodoc_typehints', # Render PEP 484 type hints in docs
|
||||
'sphinxcontrib.httpdomain', # ".. http:get::" etc. for REST APIs
|
||||
'sphinxcontrib.mermaid', # Mermaid diagrams in Markdown / RST
|
||||
'myst_parser', # Markdown source files alongside RST
|
||||
]
|
||||
```
|
||||
|
||||
And the matching `pyproject.toml` extras group:
|
||||
|
||||
```toml
|
||||
[project.optional-dependencies]
|
||||
docs = [
|
||||
"sphinx",
|
||||
"sphinx-rtd-theme",
|
||||
"sphinx-autodoc-typehints",
|
||||
"sphinx-autobuild",
|
||||
"sphinxcontrib-httpdomain",
|
||||
"sphinxcontrib-mermaid",
|
||||
"myst-parser",
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recommended Behaviours
|
||||
|
||||
Behaviours that most projects should include but are not strictly required:
|
||||
|
||||
- **Live rebuild during authoring** — `make livehtml` (via `sphinx-autobuild`) for hot-reload editing.
|
||||
- **One-shot regen script** — `docs/regenerate_docs.sh` runs `make clean`, `sphinx-apidoc` over every app, then `make html`. Drives both local development and the CI pipeline.
|
||||
- **Mermaid for diagrams** — text-based, diffable, lives in the `.md` / `.rst` source. Avoid binary diagram assets.
|
||||
- **Static images in `source/_static/`** — referenced with relative paths.
|
||||
- **Hand-written prose in Markdown (MyST)** alongside autogenerated reference docs in RST. The two coexist via `myst_parser` + `source_suffix`.
|
||||
- **Project root `CLAUDE.md` (or equivalent) names docs as the single source of truth** — discourage parallel READMEs that drift.
|
||||
|
||||
---
|
||||
|
||||
## Pattern Variant 1: DRF / QuerySet Autodoc Poisoning
|
||||
|
||||
**Problem.** Sphinx 9 autodoc renders class attributes by calling `repr()` on the live object. Django's `QuerySet.__repr__` triggers `_fetch_all()`, which opens a database connection and runs a `SELECT`. For DRF viewsets like:
|
||||
|
||||
```python
|
||||
class CurrencyViewSet(viewsets.ReadOnlyModelViewSet):
|
||||
queryset = Currency.objects.filter(is_active=True) # ← autodoc tries to execute this
|
||||
serializer_class = CurrencySerializer
|
||||
```
|
||||
|
||||
…the docs build crashes with `psycopg.OperationalError: failed to resolve host 'postgres'` (or whatever DB hostname is configured), even in TESTING mode where the in-memory SQLite has no tables.
|
||||
|
||||
**Solution.** Monkey-patch `sphinx.util.inspect.object_description` in `conf.py` to short-circuit QuerySets before `repr()` is called:
|
||||
|
||||
```python
|
||||
from django.db.models.query import QuerySet
|
||||
import sphinx.util.inspect as _sphinx_inspect
|
||||
|
||||
_orig_object_description = _sphinx_inspect.object_description
|
||||
|
||||
|
||||
def _safe_object_description(obj, *args, **kwargs):
|
||||
if isinstance(obj, QuerySet):
|
||||
return f'<QuerySet [{obj.model.__name__}]>'
|
||||
return _orig_object_description(obj, *args, **kwargs)
|
||||
|
||||
|
||||
_sphinx_inspect.object_description = _safe_object_description
|
||||
```
|
||||
|
||||
This must run **after** `django.setup()` (so `QuerySet` can be imported) but **before** Sphinx starts processing documents.
|
||||
|
||||
---
|
||||
|
||||
## Pattern Variant 2: Settings-Driven TESTING Mode
|
||||
|
||||
**Problem.** Docs build needs to import `settings.py` but has no real `.env` in CI. Production-mode `env('SECRET_KEY')` calls (no default) raise `ImproperlyConfigured` and the build crashes before Sphinx even starts.
|
||||
|
||||
**Solution.** Read `TESTING` first in `settings.py`, then gate every required `env('X')` behind it:
|
||||
|
||||
```python
|
||||
TESTING = env.bool('TESTING', default=False)
|
||||
if TESTING:
|
||||
SECRET_KEY = env('SECRET_KEY', default='testing-insecure-key')
|
||||
else:
|
||||
SECRET_KEY = env('SECRET_KEY')
|
||||
```
|
||||
|
||||
`conf.py` flips the switch:
|
||||
|
||||
```python
|
||||
os.environ['TESTING'] = 'true'
|
||||
django.setup()
|
||||
```
|
||||
|
||||
**Bonus.** This also fixes a latent bug where `python manage.py test` would fail in any environment without `.env`. The same defaults that unblock the docs build now unblock the test suite — one mechanism, two payoffs.
|
||||
|
||||
---
|
||||
|
||||
## Pattern Variant 3: Gitea Actions Deploy Workflow
|
||||
|
||||
The workflow has four logical phases:
|
||||
|
||||
1. **Setup** — checkout, Python, `pip install -e ".[docs]"`, read version from `pyproject.toml`.
|
||||
2. **Build with failure visibility** — the three-step trio shown above. The `continue-on-error: true` on the build step plus `if: steps.build_html.outcome == 'failure'` on the log-dump and fail steps ensures the Sphinx traceback reaches the Gitea log even when the build crashes.
|
||||
3. **SSH setup** — write the deploy key to `~/.ssh/id_ed25519`, scan the host into `known_hosts`, verify connectivity.
|
||||
4. **Deploy** — rsync to `/var/www/docs/<project>/<version>/`, rsync to `…/latest/`, regenerate the versions index page on the remote host via a heredoc Python script.
|
||||
|
||||
The deploy host is expected to serve `/var/www/docs/` over HTTPS via nginx or similar. Each pushed version gets its own directory; `latest/` is a copy of the most recent build. The versions index lists every directory alphabetically.
|
||||
|
||||
---
|
||||
|
||||
## Domain Extension Examples
|
||||
|
||||
### Project without DRF / class-level QuerySets
|
||||
|
||||
If your project has no `queryset = Model.objects.filter(...)` attributes at module load time, the `_safe_object_description` monkey-patch is unnecessary. You can omit it. The `TESTING=true` switch is still required because settings.py still has required env vars.
|
||||
|
||||
### Project with extra required secrets
|
||||
|
||||
Add each extra key to the TESTING branch in `settings.py`:
|
||||
|
||||
```python
|
||||
if TESTING:
|
||||
SECRET_KEY = env('SECRET_KEY', default='testing-insecure-key')
|
||||
SLACK_TOKEN = env('SLACK_TOKEN', default='testing-insecure-slack')
|
||||
STRIPE_API_KEY = env('STRIPE_API_KEY', default='testing-insecure-stripe')
|
||||
else:
|
||||
SECRET_KEY = env('SECRET_KEY')
|
||||
SLACK_TOKEN = env('SLACK_TOKEN')
|
||||
STRIPE_API_KEY = env('STRIPE_API_KEY')
|
||||
```
|
||||
|
||||
No changes needed to `conf.py` — the single `TESTING=true` flip covers them all.
|
||||
|
||||
### Project on a non-Postgres database (MySQL, MariaDB)
|
||||
|
||||
No special handling needed. The `if TESTING:` branch in `settings.py` switches to in-memory SQLite regardless of what production uses. The MySQL driver is never imported during a docs build.
|
||||
|
||||
---
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
- ❌ **Don't load `.env.example` as a runtime fallback.** It's a documentation file with placeholder values like `DB_HOST=postgres` — those placeholders will poison the docs build by making `settings.py` believe Postgres is available.
|
||||
- ❌ **Don't override `settings.DATABASES` after `django.setup()`.** Django's `ConnectionHandler.databases` is a `@cached_property` populated during app loading; mutating `settings.DATABASES` afterwards has no effect.
|
||||
- ❌ **Don't add a separate `settings_docs.py`.** Env-var toggles are the project convention. A separate settings module fragments the config surface and forces every dev to remember which settings file applies in which context.
|
||||
- ❌ **Don't hand-edit `docs/source/reference/apps/`.** That tree is regenerated by `sphinx-apidoc` on every CI run. Hand-edits get overwritten.
|
||||
- ❌ **Don't suppress build errors in CI without dumping `/tmp/sphinx-err-*.log` first.** Sphinx writes its full traceback there and nowhere else; without the dump, the Gitea UI shows a one-line `ValueError` with no useful context.
|
||||
- ❌ **Don't use `os.environ.setdefault('TESTING', 'true')` in `conf.py`.** A user with `TESTING=false` in their local `.env` will see the setdefault skipped and hit production-mode behaviour during docs build. Use plain `os.environ['TESTING'] = 'true'` so it always wins.
|
||||
|
||||
---
|
||||
|
||||
## Settings
|
||||
|
||||
Document the `TESTING` env var contract:
|
||||
|
||||
```python
|
||||
# settings.py
|
||||
TESTING = env.bool('TESTING', default=False)
|
||||
# When true, gates safe-default branches for:
|
||||
# - Required secrets (SECRET_KEY and any other env('X') with no default)
|
||||
# - Required lists (ALLOWED_HOSTS, CSRF_TRUSTED_ORIGINS)
|
||||
# - DATABASES → in-memory SQLite
|
||||
# - CACHES → dummy backend
|
||||
# - DRF throttling → disabled
|
||||
# - MIGRATION_MODULES → disabled (no DB schema)
|
||||
# - PASSWORD_HASHERS → fast hashers
|
||||
# - LOGGING → minimal
|
||||
#
|
||||
# Set true for: pytest, manage.py test, docs build.
|
||||
# Set false (or unset) for: production, local dev with real services.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
Two verification recipes every project should run before pushing.
|
||||
|
||||
### 1. Local build with real `.env`
|
||||
|
||||
```bash
|
||||
cd docs
|
||||
make clean && make html
|
||||
```
|
||||
|
||||
Expected: `build succeeded.` with zero warnings. Open `_build/html/index.html` to spot-check rendering.
|
||||
|
||||
### 2. CI simulation (no `.env`)
|
||||
|
||||
```bash
|
||||
mv .env .env.bak
|
||||
cd docs && make clean && make html
|
||||
cd .. && mv .env.bak .env
|
||||
```
|
||||
|
||||
Expected: `build succeeded.` again. Settings.py uses TESTING-mode dummies; the in-memory SQLite has no tables but autodoc never queries it because the monkey-patch short-circuits QuerySet repr().
|
||||
|
||||
### 3. Latent test-suite bug check
|
||||
|
||||
```bash
|
||||
mv .env .env.bak
|
||||
python manage.py test --keepdb 2>&1 | head -5
|
||||
mv .env.bak .env
|
||||
```
|
||||
|
||||
Expected: tests start running normally (not `ImproperlyConfigured: Set the SECRET_KEY environment variable`). This confirms the TESTING-mode defaults are wired into `settings.py` correctly — the docs build and the test suite share the same fallback mechanism.
|
||||
|
||||
### 4. CI dry-run (Gitea Actions)
|
||||
|
||||
Push to a feature branch. The workflow's failure-debug trio means any crash surfaces with a full traceback in the Gitea Actions log. Read the trace, fix the cause, push again.
|
||||
@@ -1,324 +0,0 @@
|
||||
## 🐾 Red Panda Approval™
|
||||
|
||||
This project follows Red Panda Approval standards — our gold standard for Django application quality. Code must be elegant, reliable, and maintainable to earn the approval of our adorable red panda judges.
|
||||
|
||||
### The 5 Sacred Django Criteria
|
||||
1. **Fresh Migration Test** — Clean migrations from empty database
|
||||
2. **Elegant Simplicity** — No unnecessary complexity
|
||||
3. **Observable & Debuggable** — Proper logging and error handling
|
||||
4. **Consistent Patterns** — Follow Django conventions
|
||||
5. **Actually Works** — Passes all checks and serves real user needs
|
||||
|
||||
## Environment Standards
|
||||
- Virtual environment: ~/env/PROJECT/bin/activate
|
||||
- Use pyproject.toml for project configuration (no setup.py, no requirements.txt)
|
||||
- Python version: specified in pyproject.toml
|
||||
- Dependencies: floor-pinned with ceiling (e.g. `Django>=5.2,<6.0`)
|
||||
|
||||
### Dependency Pinning
|
||||
|
||||
```toml
|
||||
# Correct — floor pin with ceiling
|
||||
dependencies = [
|
||||
"Django>=5.2,<6.0",
|
||||
"djangorestframework>=3.14,<4.0",
|
||||
"cryptography>=41.0,<45.0",
|
||||
]
|
||||
|
||||
# Wrong — exact pins in library packages
|
||||
dependencies = [
|
||||
"Django==5.2.7", # too strict, breaks downstream
|
||||
]
|
||||
```
|
||||
|
||||
Exact pins (`==`) are only appropriate in application-level lock files, not in reusable library packages.
|
||||
|
||||
## Directory Structure
|
||||
myproject/ # Git repository root
|
||||
├── .gitignore
|
||||
├── README.md
|
||||
├── pyproject.toml # Project configuration (moved to repo root)
|
||||
├── docker-compose.yml
|
||||
├── .env # Docker Compose environment
|
||||
│ # DB_ENGINE=postgresql
|
||||
│ # APP_DB_NAME=angelia2
|
||||
│ # APP_DB_USER=angelia
|
||||
│ # APP_DB_PASSWORD=changeme
|
||||
│ # DB_HOST=db
|
||||
│ # DB_PORT=5432
|
||||
├── .env.example
|
||||
│
|
||||
├── project/ # Django project root (manage.py lives here)
|
||||
│ ├── manage.py
|
||||
│ ├── Dockerfile
|
||||
│ ├── .env # Local development environment
|
||||
│ │ # DB_ENGINE=sqlite
|
||||
├── .env.example
|
||||
│
|
||||
├── config/ # Django configuration module
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── settings.py
|
||||
│ │ ├── urls.py
|
||||
│ │ ├── wsgi.py
|
||||
│ │ └── asgi.py
|
||||
│ │
|
||||
│ ├── accounts/ # Django app
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── models.py
|
||||
│ │ ├── views.py
|
||||
│ │ └── urls.py
|
||||
│ │
|
||||
│ ├── blog/ # Django app
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── models.py
|
||||
│ │ ├── views.py
|
||||
│ │ └── urls.py
|
||||
│ │
|
||||
│ ├── static/
|
||||
│ │ ├── css/
|
||||
│ │ └── js/
|
||||
│ │
|
||||
│ └── templates/
|
||||
│ └── base.html
|
||||
│
|
||||
├── web/ # Nginx configuration
|
||||
│ └── nginx.conf
|
||||
│
|
||||
├── db/ # PostgreSQL configuration
|
||||
│ └── postgresql.conf
|
||||
│
|
||||
└── docs/ # Project documentation
|
||||
└── index.md
|
||||
|
||||
## Settings Structure
|
||||
- Use a single settings.py file
|
||||
- Use django-environ or python-dotenv for environment variables
|
||||
- Never commit .env files to version control
|
||||
- Provide .env.example with all required variables documented
|
||||
- Create .gitignore file
|
||||
- Create a .dockerignore file
|
||||
|
||||
## Environment Variables
|
||||
|
||||
### PostgreSQL settings (only if DB_ENGINE=postgresql)
|
||||
```
|
||||
APP_DB_NAME=angelia2
|
||||
APP_DB_USER=angelia
|
||||
APP_DB_PASSWORD=changeme
|
||||
DB_HOST=db
|
||||
DB_PORT=5432
|
||||
```
|
||||
|
||||
## Code Organization
|
||||
- Imports: PEP 8 ordering (stdlib, third-party, local)
|
||||
- Type hints on function parameters
|
||||
- CSS: External .css files only (no inline styles, no embedded `<style>` tags)
|
||||
- JS: External .js files only (no inline handlers, no embedded `<script>` blocks)
|
||||
- Maximum file length: 1000 lines
|
||||
- If a file exceeds 500 lines, consider splitting by domain concept
|
||||
|
||||
## Database Conventions
|
||||
- Migrations run cleanly from empty database
|
||||
- Never edit deployed migrations
|
||||
- Use meaningful migration names: --name add_email_to_profile
|
||||
- One logical change per migration when possible
|
||||
- Test migrations both forward and backward
|
||||
|
||||
### Development vs Production
|
||||
- Development: SQLite
|
||||
- Production: PostgreSQL
|
||||
|
||||
## Caching
|
||||
- Expensive queries are cached
|
||||
- Cache keys follow naming convention
|
||||
- TTLs are appropriate (not infinite)
|
||||
- Invalidation is documented
|
||||
- Key Naming Pattern: {app}:{model}:{identifier}:{field}
|
||||
|
||||
## Model Naming
|
||||
- Model names: singular PascalCase (User, BlogPost, OrderItem)
|
||||
- Correct English pluralization on related names
|
||||
- All models have created_at and updated_at
|
||||
- All models define __str__ and get_absolute_url
|
||||
- TextChoices used for status fields
|
||||
- related_name defined on ForeignKey fields
|
||||
- Related names: plural snake_case with proper English pluralization
|
||||
|
||||
## Forms
|
||||
- Use ModelForm with explicit fields list (never __all__)
|
||||
|
||||
## Field Naming
|
||||
- Foreign keys: singular without _id suffix (author, category, parent)
|
||||
- Boolean fields: use prefixes (is_active, has_permission, can_edit)
|
||||
- Date fields: use suffixes (created_at, updated_at, published_on)
|
||||
- Avoid abbreviations (use description, not desc)
|
||||
|
||||
## Required Model Fields
|
||||
- All models should include:
|
||||
- created_at = models.DateTimeField(auto_now_add=True)
|
||||
- updated_at = models.DateTimeField(auto_now=True)
|
||||
- Consider adding:
|
||||
- id = models.UUIDField(primary_key=True) for public-facing models
|
||||
- is_active = models.BooleanField(default=True) for soft deletes
|
||||
|
||||
## Indexing
|
||||
- Add db_index=True to frequently queried fields
|
||||
- Use Meta.indexes for composite indexes
|
||||
- Document why each index exists
|
||||
|
||||
## Queries
|
||||
- Use select_related() for foreign keys
|
||||
- Use prefetch_related() for reverse relations and M2M
|
||||
- Avoid queries in loops (N+1 problem)
|
||||
- Use .only() and .defer() for large models
|
||||
- Add comments explaining complex querysets
|
||||
|
||||
## Docstrings
|
||||
- Use Sphinx style docstrings
|
||||
- Document all public functions, classes, and modules
|
||||
- Skip docstrings for obvious one-liners and standard Django overrides
|
||||
|
||||
## Views
|
||||
- Use Function-Based Views (FBVs) exclusively
|
||||
- Explicit logic is preferred over implicit inheritance
|
||||
- Extract shared logic into utility functions
|
||||
|
||||
## URLs & Identifiers
|
||||
|
||||
- Public URLs use short UUIDs (12 characters) via `shortuuid`
|
||||
- Never expose sequential IDs in URLs (security/enumeration risk)
|
||||
- Internal references may use standard UUIDs or PKs
|
||||
|
||||
## URL Patterns
|
||||
- Resource-based URLs (RESTful style)
|
||||
- Namespaced URL names per app
|
||||
- Trailing slashes (Django default)
|
||||
- Flat structure preferred over deep nesting
|
||||
|
||||
## Background Tasks
|
||||
- All tasks are run synchronously unless the design specifies background tasks are needed for long operations
|
||||
- Long operations use Celery tasks
|
||||
- Use Memcached, task progress pattern: {app}:task:{task_id}:progress
|
||||
- Tasks are idempotent
|
||||
- Tasks include retry logic
|
||||
- Tasks live in app/tasks.py
|
||||
- RabbitMQ is the Message Broker
|
||||
- Flower Monitoring: Use for debugging failed tasks
|
||||
|
||||
## Testing
|
||||
- Framework: Django TestCase (not pytest)
|
||||
- Separate test files per module: test_models.py, test_views.py, test_forms.py
|
||||
|
||||
## Frontend Standards
|
||||
|
||||
### New Projects (DaisyUI + Tailwind)
|
||||
- DaisyUI 4 via CDN for component classes
|
||||
- Tailwind CSS via CDN for utility classes
|
||||
- Theme management via Themis (DaisyUI `data-theme` attribute)
|
||||
- All apps extend `themis/base.html` for consistent navigation
|
||||
- No inline styles or scripts
|
||||
|
||||
### Existing Projects (Bootstrap 5)
|
||||
- Bootstrap 5 via CDN
|
||||
- Bootstrap Icons via CDN
|
||||
- Bootswatch for theme variants (if applicable)
|
||||
- django-bootstrap5 and crispy-bootstrap5 for form rendering
|
||||
|
||||
## Preferred Packages
|
||||
|
||||
### Core Django
|
||||
- django>=5.2,<6.0
|
||||
- django-environ — Environment variables
|
||||
|
||||
### Authentication & Security
|
||||
- django-allauth — User management
|
||||
- django-allauth-2fa — Two-factor authentication
|
||||
|
||||
### API Development
|
||||
- djangorestframework>=3.14,<4.0 — REST APIs
|
||||
- drf-spectacular — OpenAPI/Swagger documentation
|
||||
|
||||
### Encryption
|
||||
- cryptography — Fernet encryption for secrets/API keys
|
||||
|
||||
### Background Tasks
|
||||
- celery — Async task queue
|
||||
- django-celery-progress — Progress bars
|
||||
- flower — Celery monitoring
|
||||
|
||||
### Caching
|
||||
- pymemcache — Memcached backend
|
||||
|
||||
### Database
|
||||
- psycopg[binary] — PostgreSQL adapter
|
||||
- shortuuid — Short UUIDs for public URLs
|
||||
|
||||
### Production
|
||||
- gunicorn — WSGI server
|
||||
|
||||
### Shared Apps
|
||||
- django-heluca-themis — User preferences, themes, key management, navigation
|
||||
|
||||
### Deprecated / Removed
|
||||
- ~~pytz~~ — Use stdlib `zoneinfo` (Python 3.9+, Django 4+)
|
||||
- ~~Pillow~~ — Only add if your app needs ImageField
|
||||
- ~~django-heluca-core~~ — Replaced by Themis
|
||||
- ~~dj-database-url~~ — Use individual Django DB env vars instead
|
||||
|
||||
## Anti-Patterns to Avoid
|
||||
|
||||
### Models
|
||||
- Don't use `Model.objects.get()` without handling `DoesNotExist`
|
||||
- Don't use `null=True` on `CharField` or `TextField` (use `blank=True, default=""`)
|
||||
- Don't use `related_name='+'` unless you have a specific reason
|
||||
- Don't override `save()` for business logic (use signals or service functions)
|
||||
- Don't use `auto_now=True` on fields you might need to manually set
|
||||
- Don't use `ForeignKey` without specifying `on_delete` explicitly
|
||||
- Don't use `Meta.ordering` on large tables (specify ordering in queries)
|
||||
|
||||
### Queries
|
||||
- Don't query inside loops (N+1 problem)
|
||||
- Don't use `.all()` when you need a subset
|
||||
- Don't use raw SQL unless absolutely necessary
|
||||
- Don't forget `select_related()` and `prefetch_related()`
|
||||
|
||||
### Views
|
||||
- Don't put business logic in views
|
||||
- Don't use `request.POST.get()` without validation (use forms)
|
||||
- Don't return sensitive data in error messages
|
||||
- Don't forget `login_required` decorator on protected views
|
||||
|
||||
### Forms
|
||||
- Don't use `fields = '__all__'` in ModelForm
|
||||
- Don't trust client-side validation alone
|
||||
- Don't use `exclude` in ModelForm (use explicit `fields`)
|
||||
|
||||
### Templates
|
||||
- Don't use `{{ variable }}` for URLs (use `{% url %}` tag)
|
||||
- Don't put logic in templates
|
||||
- Don't use inline CSS or JavaScript (external files only)
|
||||
- Don't forget `{% csrf_token %}` in forms
|
||||
|
||||
### Security
|
||||
- Don't store secrets in `settings.py` (use environment variables)
|
||||
- Don't commit `.env` files to version control
|
||||
- Don't use `DEBUG=True` in production
|
||||
- Don't expose sequential IDs in public URLs
|
||||
- Don't use `mark_safe()` on user-supplied content
|
||||
- Don't disable CSRF protection
|
||||
|
||||
### Imports & Code Style
|
||||
- Don't use `from module import *`
|
||||
- Don't use mutable default arguments
|
||||
- Don't use bare `except:` clauses
|
||||
- Don't ignore linter warnings without documented reason
|
||||
|
||||
### Migrations
|
||||
- Don't edit migrations that have been deployed
|
||||
- Don't use `RunPython` without a reverse function
|
||||
- Don't add non-nullable fields without a default value
|
||||
|
||||
### Celery Tasks
|
||||
- Don't pass model instances to tasks (pass IDs and re-fetch)
|
||||
- Don't assume tasks run immediately
|
||||
- Don't forget retry logic for external service calls
|
||||
614
docs/Red Panda Standards_Django_V1-02.md
Normal file
614
docs/Red Panda Standards_Django_V1-02.md
Normal file
@@ -0,0 +1,614 @@
|
||||
## 🐾 Red Panda Approval™
|
||||
|
||||
This project follows Red Panda Approval standards — our gold standard for Django application quality. Code must be elegant, reliable, and maintainable to earn the approval of our adorable red panda judges.
|
||||
|
||||
### The 5 Sacred Django Criteria
|
||||
1. **Fresh Migration Test** — Clean migrations from empty database
|
||||
2. **Elegant Simplicity** — No unnecessary complexity
|
||||
3. **Observable & Debuggable** — Proper logging and error handling
|
||||
4. **Consistent Patterns** — Follow Django conventions
|
||||
5. **Actually Works** — Passes all checks and serves real user needs
|
||||
|
||||
### Changelog
|
||||
- **V1-02** — Added Docker Compose environment-scoping standard (per-service `environment:` blocks), nginx reverse-proxy reference config (Docker DNS resolver, `X-Forwarded-Proto` preservation, access-log filtering, internal-network allowlists), and Memcached deployment note (bind to `0.0.0.0`, not `localhost`).
|
||||
- **V1-01** — Initial published standards.
|
||||
|
||||
## Environment Standards
|
||||
- Virtual environment: ~/env/PROJECT/bin/activate
|
||||
- Use pyproject.toml for project configuration (no setup.py, no requirements.txt)
|
||||
- Python version: specified in pyproject.toml
|
||||
- Dependencies: floor-pinned with ceiling (e.g. `Django>=5.2,<6.0`)
|
||||
|
||||
### Dependency Pinning
|
||||
|
||||
```toml
|
||||
# Correct — floor pin with ceiling
|
||||
dependencies = [
|
||||
"Django>=5.2,<6.0",
|
||||
"djangorestframework>=3.14,<4.0",
|
||||
"cryptography>=41.0,<45.0",
|
||||
]
|
||||
|
||||
# Wrong — exact pins in library packages
|
||||
dependencies = [
|
||||
"Django==5.2.7", # too strict, breaks downstream
|
||||
]
|
||||
```
|
||||
|
||||
Exact pins (`==`) are only appropriate in application-level lock files, not in reusable library packages.
|
||||
|
||||
## Directory Structure
|
||||
myproject/ # Git repository root
|
||||
├── .gitignore
|
||||
├── README.md
|
||||
├── pyproject.toml # Project configuration (moved to repo root)
|
||||
├── docker-compose.yaml # Per-service environment scoping (see below)
|
||||
├── .env # Docker Compose interpolation source — NOT committed
|
||||
├── .env.example # Template listing every `${VAR}` with which service consumes it
|
||||
│
|
||||
├── project/ # Django project root (manage.py lives here)
|
||||
│ ├── manage.py
|
||||
│ ├── Dockerfile
|
||||
│ ├── .env # Local bare-Python dev environment (runserver, celery, etc.)
|
||||
│ │ # Only read by bare-Python runs; NOT by the compose stack
|
||||
│ ├── .env.example
|
||||
│ │
|
||||
│ ├── config/ # Django configuration module
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── settings.py
|
||||
│ │ ├── urls.py
|
||||
│ │ ├── wsgi.py
|
||||
│ │ └── asgi.py
|
||||
│ │
|
||||
│ ├── accounts/ # Django app
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── models.py
|
||||
│ │ ├── views.py
|
||||
│ │ └── urls.py
|
||||
│ │
|
||||
│ ├── blog/ # Django app
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── models.py
|
||||
│ │ ├── views.py
|
||||
│ │ └── urls.py
|
||||
│ │
|
||||
│ ├── static/
|
||||
│ │ ├── css/
|
||||
│ │ └── js/
|
||||
│ │
|
||||
│ └── templates/
|
||||
│ └── base.html
|
||||
│
|
||||
├── nginx/ # Nginx configuration (see Nginx Reverse Proxy below)
|
||||
│ └── PROJECT.conf
|
||||
│
|
||||
├── db/ # PostgreSQL configuration (if customised)
|
||||
│ └── postgresql.conf
|
||||
│
|
||||
└── docs/ # Project documentation
|
||||
└── index.md
|
||||
|
||||
## Settings Structure
|
||||
- Use a single settings.py file
|
||||
- Use django-environ or python-dotenv for environment variables
|
||||
- Never commit .env files to version control
|
||||
- Provide .env.example with all required variables documented
|
||||
- Create .gitignore file
|
||||
- Create a .dockerignore file
|
||||
|
||||
## Environment Variables
|
||||
|
||||
### PostgreSQL settings (only if DB_ENGINE=postgresql)
|
||||
```
|
||||
APP_DB_NAME=angelia2
|
||||
APP_DB_USER=angelia
|
||||
APP_DB_PASSWORD=changeme
|
||||
DB_HOST=db
|
||||
DB_PORT=5432
|
||||
```
|
||||
|
||||
## Docker Compose — Per-Service Environment Scoping
|
||||
|
||||
> **New in V1-02.** The monolithic `env_file:` pattern is deprecated.
|
||||
|
||||
### Rule: every service declares only the env vars it actually needs
|
||||
|
||||
In `docker-compose.yaml`, each service uses an `environment:` block listing only the variables that service consumes, with values interpolated from `.env` (at the repo root) using `${VAR}` syntax. Do **not** use `env_file: .env` shared across services.
|
||||
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
image: git.helu.ca/r/myproject:latest
|
||||
command: ["web"]
|
||||
environment:
|
||||
# Django core
|
||||
- DJANGO_SETTINGS_MODULE=myproject.settings
|
||||
- SECRET_KEY=${SECRET_KEY}
|
||||
- DEBUG=${DEBUG}
|
||||
- ALLOWED_HOSTS=${ALLOWED_HOSTS}
|
||||
- CSRF_TRUSTED_ORIGINS=${CSRF_TRUSTED_ORIGINS}
|
||||
# Postgres
|
||||
- APP_DB_NAME=${APP_DB_NAME}
|
||||
- APP_DB_USER=${APP_DB_USER}
|
||||
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
|
||||
- DB_HOST=${DB_HOST}
|
||||
- DB_PORT=${DB_PORT}
|
||||
# ...
|
||||
|
||||
worker:
|
||||
image: git.helu.ca/r/myproject:latest
|
||||
command: ["worker"]
|
||||
environment:
|
||||
- DJANGO_SETTINGS_MODULE=myproject.settings
|
||||
- SECRET_KEY=${SECRET_KEY}
|
||||
- APP_DB_NAME=${APP_DB_NAME}
|
||||
# ...
|
||||
# NO ALLOWED_HOSTS, CSRF_TRUSTED_ORIGINS, EMAIL_* — worker doesn't serve HTTP
|
||||
```
|
||||
|
||||
### Why this matters
|
||||
|
||||
1. **Least privilege / blast radius.** A compromised MCP container shouldn't see Celery broker credentials or encryption keys. A Celery worker shouldn't see `ALLOWED_HOSTS` or CSRF config — it doesn't serve HTTP. When every service shares one env file, a misconfigured secret takes down the whole stack instead of just the services that need that secret.
|
||||
|
||||
2. **Self-documenting surface.** Reading `docker-compose.yaml` immediately tells you what each container depends on. With `env_file:`, every container has access to every secret and you can't tell from the compose file which service uses which variable.
|
||||
|
||||
3. **Ansible rendering.** The compose file can be converted to a Jinja2 template and rendered per-host by an Ansible role, with secrets pulled from the vault. The `${VAR}` pattern is the natural interface between compose and Ansible.
|
||||
|
||||
4. **Parsing correctness.** Docker Compose's `env_file:` parser does **not** strip inline `# comments`, honours CRLF `\r` as part of values, and handles quoting differently than `python-decouple`/`django-environ`. An `.env` that works with bare-Python `manage.py runserver` can silently feed a mangled URL (e.g. `CELERY_BROKER_URL` with a trailing `\r` or stray comment) to a container. Shell-style `${VAR}` interpolation avoids this because the value is unescaped by the shell the same way every time.
|
||||
|
||||
### `.env.example` template convention
|
||||
|
||||
Annotate each variable with which service(s) consume it:
|
||||
|
||||
```bash
|
||||
# --- Django core ------------------------------------------------------------
|
||||
# Consumed by: app, mcp, worker
|
||||
SECRET_KEY=change-me-to-a-real-secret-key
|
||||
DEBUG=False
|
||||
|
||||
# --- PostgreSQL ------------------------------------------------------------
|
||||
# Consumed by: app, mcp, worker
|
||||
APP_DB_NAME=myproject
|
||||
APP_DB_USER=myproject
|
||||
APP_DB_PASSWORD=change-me
|
||||
|
||||
# --- Celery / RabbitMQ -----------------------------------------------------
|
||||
# Consumed by: app (producer), worker (consumer). NOT mcp.
|
||||
# Percent-encode any password chars with URL meaning: @ : / # % + ? & = and space
|
||||
CELERY_BROKER_URL=amqp://myproject:change-me@oberon.incus:5672/myproject
|
||||
```
|
||||
|
||||
### Diagnostic: "what did Django actually parse?"
|
||||
|
||||
When a service misbehaves on startup (broker 403, DB auth error, unreachable cache), the fastest diagnostic is to print what settings.py actually resolved to — that removes every layer of env-file / interpolation / URL-encoding ambiguity:
|
||||
|
||||
```bash
|
||||
docker compose run --rm --no-deps worker \
|
||||
python -c "from django.conf import settings; print(repr(settings.CELERY_BROKER_URL))"
|
||||
|
||||
docker compose run --rm --no-deps app \
|
||||
python -c "from django.conf import settings; print(settings.DATABASES['default'])"
|
||||
```
|
||||
|
||||
The `repr(...)` form surfaces CRLF, trailing whitespace, stray quotes, and characters that should have been percent-encoded but weren't.
|
||||
|
||||
### Broker URL gotcha (documented for every new project)
|
||||
|
||||
RabbitMQ connection URLs must percent-encode any password character with URL meaning (`@ : / # % + ? & =` and space). Kombu's URL parser is strict — an unencoded `#` in the password is read as the start of a URL fragment, and an unencoded `@` shifts the username/host boundary, both causing `ACCESS_REFUSED - Login was refused using authentication mechanism PLAIN` at worker startup. Bare-Python tests that pass the password as a kwarg rather than a URL won't exhibit this and can mask the bug.
|
||||
|
||||
## Nginx Reverse Proxy
|
||||
|
||||
> **New in V1-02.** Standard reference config for any Red Panda project running behind HAProxy on Titania.
|
||||
|
||||
Deployed as a sidecar container in the compose stack, fronting the Django app (gunicorn) and — where applicable — an MCP or streaming service. HAProxy handles TLS termination; nginx is plain HTTP on the internal network.
|
||||
|
||||
### Required building blocks
|
||||
|
||||
1. **Docker DNS resolver + variable-based `proxy_pass`.** `upstream` blocks resolve container hostnames **once at startup** and cache the IP forever. When `docker compose restart app` assigns a new IP, nginx returns 502 until fully reloaded. Use:
|
||||
|
||||
```nginx
|
||||
resolver 127.0.0.11 valid=10s;
|
||||
server {
|
||||
set $backend_app http://app:8000;
|
||||
location / {
|
||||
proxy_pass $backend_app; # variable → re-resolve each request
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **`$proxy_x_forwarded_proto` map.** Inside the compose network `$scheme` is always `http` because HAProxy already terminated TLS. Passing `$scheme` to Django breaks `request.is_secure()`, secure cookies, and `build_absolute_uri()`. Preserve the HAProxy header:
|
||||
|
||||
```nginx
|
||||
map $http_x_forwarded_proto $proxy_x_forwarded_proto {
|
||||
default $http_x_forwarded_proto;
|
||||
"" $scheme;
|
||||
}
|
||||
# Then in every proxy block:
|
||||
proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto;
|
||||
```
|
||||
|
||||
3. **Access-log suppression for probe paths.** HAProxy and Prometheus probe every 15–30 s; logging them floods Loki. The `nginx:alpine` image has a default http-level `access_log`, so a server-level `access_log` is *additive*, not replacing. You need both:
|
||||
|
||||
```nginx
|
||||
map $request_uri $loggable {
|
||||
default 1;
|
||||
~^/live(/|\?|$) 0;
|
||||
~^/ready(/|\?|$) 0;
|
||||
~^/metrics(/|\?|$) 0;
|
||||
~^/healthz(/|\?|$) 0;
|
||||
~^/nginx_status(/|\?|$) 0;
|
||||
~^/mcp/health(/|\?|$) 0;
|
||||
}
|
||||
|
||||
access_log off; # defeat inherited default
|
||||
access_log /dev/stdout combined if=$loggable; # then install filtered version
|
||||
```
|
||||
|
||||
4. **Internal-network allowlist for all probe + metric endpoints.** RFC1918 + loopback, applied to `/live/`, `/ready/`, `/healthz`, `/metrics`, and `/nginx_status`:
|
||||
|
||||
```nginx
|
||||
location = /metrics {
|
||||
allow 127.0.0.0/8; # loopback
|
||||
allow 10.0.0.0/8; # RFC1918 — primary internal (Incus, HAProxy)
|
||||
allow 172.16.0.0/12; # RFC1918 — Docker bridge networks
|
||||
allow 192.168.0.0/16; # RFC1918
|
||||
deny all;
|
||||
proxy_pass $backend_app;
|
||||
# ...
|
||||
}
|
||||
```
|
||||
|
||||
All four RFC1918 ranges must be present — omitting `172.16.0.0/12` silently breaks scrapes from a Prometheus container on the default Docker bridge.
|
||||
|
||||
5. **Security headers on the catch-all**, marked `always` so they apply to upstream 4xx/5xx:
|
||||
|
||||
```nginx
|
||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||
add_header X-Content-Type-Options "nosniff" always;
|
||||
add_header X-XSS-Protection "1; mode=block" always;
|
||||
```
|
||||
|
||||
Stronger policies (CSP, Referrer-Policy, HSTS) are set at HAProxy so they're consistent across every backend.
|
||||
|
||||
6. **Catch-all proxies to Django.** nginx should intercept only the paths that need special handling (`/static/`, `/media/`, `/mcp/`, `/healthz`, `/metrics`, `/nginx_status`, the probes). Everything else flows through to Django, which returns its own themed 404 for unrouted paths — not nginx's bare default page.
|
||||
|
||||
### Minimal reference config
|
||||
|
||||
```nginx
|
||||
resolver 127.0.0.11 valid=10s;
|
||||
|
||||
map $http_x_forwarded_proto $proxy_x_forwarded_proto {
|
||||
default $http_x_forwarded_proto;
|
||||
"" $scheme;
|
||||
}
|
||||
|
||||
map $request_uri $loggable {
|
||||
default 1;
|
||||
~^/live(/|\?|$) 0;
|
||||
~^/ready(/|\?|$) 0;
|
||||
~^/metrics(/|\?|$) 0;
|
||||
~^/healthz(/|\?|$) 0;
|
||||
~^/nginx_status(/|\?|$) 0;
|
||||
}
|
||||
|
||||
access_log off;
|
||||
access_log /dev/stdout combined if=$loggable;
|
||||
|
||||
server {
|
||||
listen 80 default_server;
|
||||
server_name _;
|
||||
|
||||
client_max_body_size 64m;
|
||||
|
||||
set $backend_app http://app:8000;
|
||||
|
||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||
add_header X-Content-Type-Options "nosniff" always;
|
||||
add_header X-XSS-Protection "1; mode=block" always;
|
||||
|
||||
location /static/ {
|
||||
alias /var/www/static/;
|
||||
access_log off;
|
||||
expires 30d;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
location /media/ {
|
||||
alias /var/www/media/;
|
||||
access_log off;
|
||||
expires 7d;
|
||||
}
|
||||
|
||||
# Internal-only endpoints — allowlist applied uniformly
|
||||
location = /live/ { include /etc/nginx/snippets/internal-only.conf; proxy_pass $backend_app; include /etc/nginx/snippets/proxy-headers.conf; access_log off; }
|
||||
location = /ready/ { include /etc/nginx/snippets/internal-only.conf; proxy_pass $backend_app; include /etc/nginx/snippets/proxy-headers.conf; access_log off; }
|
||||
location = /metrics { include /etc/nginx/snippets/internal-only.conf; proxy_pass $backend_app; include /etc/nginx/snippets/proxy-headers.conf; access_log off; }
|
||||
location = /nginx_status {
|
||||
include /etc/nginx/snippets/internal-only.conf;
|
||||
stub_status on;
|
||||
access_log off;
|
||||
}
|
||||
|
||||
location / {
|
||||
proxy_pass $backend_app;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $proxy_x_forwarded_proto;
|
||||
proxy_redirect off;
|
||||
proxy_read_timeout 300s;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> Projects may inline the `proxy_set_header` block rather than using snippets; both are acceptable. The important thing is that every `proxy_pass` has the same four headers (`Host`, `X-Real-IP`, `X-Forwarded-For`, `X-Forwarded-Proto` using `$proxy_x_forwarded_proto`).
|
||||
|
||||
## Memcached
|
||||
|
||||
> **New in V1-02.** Memcached is a standard Red Panda dependency. Every Django service uses it for session storage, task-progress tracking, and cheap key-value caching.
|
||||
|
||||
- Package: `pymemcache` (via `django.core.cache.backends.memcached.PyMemcacheCache`)
|
||||
- Key prefix: per-app, configured via env var (e.g. `KVDB_PREFIX=mnemosyne`)
|
||||
- Cache-key pattern: `{app}:{model}:{identifier}:{field}`
|
||||
|
||||
### Deployment convention
|
||||
|
||||
Memcached runs as a service on the application host (in Ouranos: a package install per Incus container). Configure it to bind to **all interfaces**, not just `localhost`, so:
|
||||
|
||||
- Containers on the same host can reach it via the host's LAN name (e.g. `puck.incus:11211`).
|
||||
- Other hosts in the lab can reach it for multi-host debugging.
|
||||
|
||||
```bash
|
||||
# /etc/memcached.conf on the Docker host
|
||||
-l 0.0.0.0
|
||||
-p 11211
|
||||
-U 0
|
||||
```
|
||||
|
||||
The bare-Python "`localhost:11211` works" default is a dev-only convenience — it breaks as soon as Django moves into a container, because inside the container `127.0.0.1` is the container itself. The production `.env` must use the LAN-resolvable hostname:
|
||||
|
||||
```
|
||||
KVDB_LOCATION=puck.incus:11211
|
||||
KVDB_PREFIX=myproject
|
||||
```
|
||||
|
||||
### Health-check reachability
|
||||
|
||||
The Django readiness probe (`GET /ready/`) must verify Memcached is reachable. If the probe returns 503 and the log shows no cause, hit the endpoint directly to read the JSON body which names the broken dependency:
|
||||
|
||||
```bash
|
||||
docker compose exec app curl -sS -o - -w "\nHTTP %{http_code}\n" http://localhost:8000/ready/
|
||||
```
|
||||
|
||||
## Code Organization
|
||||
- Imports: PEP 8 ordering (stdlib, third-party, local)
|
||||
- Type hints on function parameters
|
||||
- CSS: External .css files only (no inline styles, no embedded `<style>` tags)
|
||||
- JS: External .js files only (no inline handlers, no embedded `<script>` blocks)
|
||||
- Maximum file length: 1000 lines
|
||||
- If a file exceeds 500 lines, consider splitting by domain concept
|
||||
|
||||
## Database Conventions
|
||||
- Migrations run cleanly from empty database
|
||||
- Never edit deployed migrations
|
||||
- Use meaningful migration names: --name add_email_to_profile
|
||||
- One logical change per migration when possible
|
||||
- Test migrations both forward and backward
|
||||
|
||||
### Development vs Production
|
||||
- Development: SQLite
|
||||
- Production: PostgreSQL
|
||||
|
||||
## Caching
|
||||
- Expensive queries are cached
|
||||
- Cache keys follow naming convention
|
||||
- TTLs are appropriate (not infinite)
|
||||
- Invalidation is documented
|
||||
- Key Naming Pattern: {app}:{model}:{identifier}:{field}
|
||||
- See the **Memcached** section above for deployment details
|
||||
|
||||
## Model Naming
|
||||
- Model names: singular PascalCase (User, BlogPost, OrderItem)
|
||||
- Correct English pluralization on related names
|
||||
- All models have created_at and updated_at
|
||||
- All models define __str__ and get_absolute_url
|
||||
- TextChoices used for status fields
|
||||
- related_name defined on ForeignKey fields
|
||||
- Related names: plural snake_case with proper English pluralization
|
||||
|
||||
## Forms
|
||||
- Use ModelForm with explicit fields list (never __all__)
|
||||
|
||||
## Field Naming
|
||||
- Foreign keys: singular without _id suffix (author, category, parent)
|
||||
- Boolean fields: use prefixes (is_active, has_permission, can_edit)
|
||||
- Date fields: use suffixes (created_at, updated_at, published_on)
|
||||
- Avoid abbreviations (use description, not desc)
|
||||
|
||||
## Required Model Fields
|
||||
- All models should include:
|
||||
- created_at = models.DateTimeField(auto_now_add=True)
|
||||
- updated_at = models.DateTimeField(auto_now=True)
|
||||
- Consider adding:
|
||||
- id = models.UUIDField(primary_key=True) for public-facing models
|
||||
- is_active = models.BooleanField(default=True) for soft deletes
|
||||
|
||||
## Indexing
|
||||
- Add db_index=True to frequently queried fields
|
||||
- Use Meta.indexes for composite indexes
|
||||
- Document why each index exists
|
||||
|
||||
## Queries
|
||||
- Use select_related() for foreign keys
|
||||
- Use prefetch_related() for reverse relations and M2M
|
||||
- Avoid queries in loops (N+1 problem)
|
||||
- Use .only() and .defer() for large models
|
||||
- Add comments explaining complex querysets
|
||||
|
||||
## Docstrings
|
||||
- Use Sphinx style docstrings
|
||||
- Document all public functions, classes, and modules
|
||||
- Skip docstrings for obvious one-liners and standard Django overrides
|
||||
|
||||
## Views
|
||||
- Use Function-Based Views (FBVs) exclusively
|
||||
- Explicit logic is preferred over implicit inheritance
|
||||
- Extract shared logic into utility functions
|
||||
|
||||
## URLs & Identifiers
|
||||
|
||||
- Public URLs use short UUIDs (12 characters) via `shortuuid`
|
||||
- Never expose sequential IDs in URLs (security/enumeration risk)
|
||||
- Internal references may use standard UUIDs or PKs
|
||||
|
||||
## URL Patterns
|
||||
- Resource-based URLs (RESTful style)
|
||||
- Namespaced URL names per app
|
||||
- Trailing slashes (Django default)
|
||||
- Flat structure preferred over deep nesting
|
||||
|
||||
## Background Tasks
|
||||
- All tasks are run synchronously unless the design specifies background tasks are needed for long operations
|
||||
- Long operations use Celery tasks
|
||||
- Use Memcached, task progress pattern: {app}:task:{task_id}:progress
|
||||
- Tasks are idempotent
|
||||
- Tasks include retry logic
|
||||
- Tasks live in app/tasks.py
|
||||
- RabbitMQ is the Message Broker
|
||||
- Flower Monitoring: Use for debugging failed tasks
|
||||
- Per-service env scoping: the Celery worker container consumes `CELERY_BROKER_URL` + `LLM_API_SECRETS_ENCRYPTION_KEY` + `DAEDALUS_S3_*` but NOT `ALLOWED_HOSTS`/`CSRF_TRUSTED_ORIGINS`/`MCP_REQUIRE_AUTH`/`EMAIL_*` (see Docker Compose section)
|
||||
|
||||
## Testing
|
||||
- Framework: Django TestCase (not pytest)
|
||||
- Separate test files per module: test_models.py, test_views.py, test_forms.py
|
||||
|
||||
## Frontend Standards
|
||||
|
||||
### New Projects (DaisyUI + Tailwind)
|
||||
- DaisyUI 4 via CDN for component classes
|
||||
- Tailwind CSS via CDN for utility classes
|
||||
- Theme management via Themis (DaisyUI `data-theme` attribute)
|
||||
- All apps extend `themis/base.html` for consistent navigation
|
||||
- No inline styles or scripts
|
||||
|
||||
### Existing Projects (Bootstrap 5)
|
||||
- Bootstrap 5 via CDN
|
||||
- Bootstrap Icons via CDN
|
||||
- Bootswatch for theme variants (if applicable)
|
||||
- django-bootstrap5 and crispy-bootstrap5 for form rendering
|
||||
|
||||
## Preferred Packages
|
||||
|
||||
### Core Django
|
||||
- django>=5.2,<6.0
|
||||
- django-environ — Environment variables
|
||||
|
||||
### Authentication & Security
|
||||
- django-allauth — User management
|
||||
- django-allauth-2fa — Two-factor authentication
|
||||
|
||||
### API Development
|
||||
- djangorestframework>=3.14,<4.0 — REST APIs
|
||||
- drf-spectacular — OpenAPI/Swagger documentation
|
||||
|
||||
### Encryption
|
||||
- cryptography — Fernet encryption for secrets/API keys
|
||||
|
||||
### Background Tasks
|
||||
- celery — Async task queue
|
||||
- django-celery-progress — Progress bars
|
||||
- flower — Celery monitoring
|
||||
|
||||
### Caching
|
||||
- pymemcache — Memcached backend
|
||||
|
||||
### Database
|
||||
- psycopg[binary] — PostgreSQL adapter
|
||||
- shortuuid — Short UUIDs for public URLs
|
||||
|
||||
### Production
|
||||
- gunicorn — WSGI server
|
||||
|
||||
### Shared Apps
|
||||
- django-heluca-themis — User preferences, themes, key management, navigation
|
||||
|
||||
### Deprecated / Removed
|
||||
- ~~pytz~~ — Use stdlib `zoneinfo` (Python 3.9+, Django 4+)
|
||||
- ~~Pillow~~ — Only add if your app needs ImageField
|
||||
- ~~django-heluca-core~~ — Replaced by Themis
|
||||
- ~~dj-database-url~~ — Use individual Django DB env vars instead
|
||||
|
||||
## Anti-Patterns to Avoid
|
||||
|
||||
### Docker Compose
|
||||
- Don't share a single `env_file:` across services (see per-service scoping above)
|
||||
- Don't put secrets in the compose file's `environment:` block as literals — use `${VAR}` interpolation
|
||||
- Don't commit a populated `.env` — only `.env.example`
|
||||
|
||||
### Nginx
|
||||
- Don't use `upstream` blocks for container hostnames without `resolver` + variable `proxy_pass` (nginx caches the IP and returns 502 after container restart)
|
||||
- Don't pass `$scheme` as `X-Forwarded-Proto` when behind an external TLS terminator — use `$proxy_x_forwarded_proto` via the map pattern
|
||||
- Don't rely on server-level `access_log` to override the image default — explicitly `access_log off;` first
|
||||
- Don't allowlist only `10.0.0.0/8` for `/metrics` — also include `172.16.0.0/12` for Docker bridge sources
|
||||
|
||||
### Memcached
|
||||
- Don't bind to `127.0.0.1` only on a host that runs Docker services — containers can't reach it
|
||||
- Don't use `KVDB_LOCATION=127.0.0.1:11211` in a containerised `.env` (127.0.0.1 is the container itself)
|
||||
|
||||
### Models
|
||||
- Don't use `Model.objects.get()` without handling `DoesNotExist`
|
||||
- Don't use `null=True` on `CharField` or `TextField` (use `blank=True, default=""`)
|
||||
- Don't use `related_name='+'` unless you have a specific reason
|
||||
- Don't override `save()` for business logic (use signals or service functions)
|
||||
- Don't use `auto_now=True` on fields you might need to manually set
|
||||
- Don't use `ForeignKey` without specifying `on_delete` explicitly
|
||||
- Don't use `Meta.ordering` on large tables (specify ordering in queries)
|
||||
|
||||
### Queries
|
||||
- Don't query inside loops (N+1 problem)
|
||||
- Don't use `.all()` when you need a subset
|
||||
- Don't use raw SQL unless absolutely necessary
|
||||
- Don't forget `select_related()` and `prefetch_related()`
|
||||
|
||||
### Views
|
||||
- Don't put business logic in views
|
||||
- Don't use `request.POST.get()` without validation (use forms)
|
||||
- Don't return sensitive data in error messages
|
||||
- Don't forget `login_required` decorator on protected views
|
||||
|
||||
### Forms
|
||||
- Don't use `fields = '__all__'` in ModelForm
|
||||
- Don't trust client-side validation alone
|
||||
- Don't use `exclude` in ModelForm (use explicit `fields`)
|
||||
|
||||
### Templates
|
||||
- Don't use `{{ variable }}` for URLs (use `{% url %}` tag)
|
||||
- Don't put logic in templates
|
||||
- Don't use inline CSS or JavaScript (external files only)
|
||||
- Don't forget `{% csrf_token %}` in forms
|
||||
|
||||
### Security
|
||||
- Don't store secrets in `settings.py` (use environment variables)
|
||||
- Don't commit `.env` files to version control
|
||||
- Don't use `DEBUG=True` in production
|
||||
- Don't expose sequential IDs in public URLs
|
||||
- Don't use `mark_safe()` on user-supplied content
|
||||
- Don't disable CSRF protection
|
||||
|
||||
### Imports & Code Style
|
||||
- Don't use `from module import *`
|
||||
- Don't use mutable default arguments
|
||||
- Don't use bare `except:` clauses
|
||||
- Don't ignore linter warnings without documented reason
|
||||
|
||||
### Migrations
|
||||
- Don't edit migrations that have been deployed
|
||||
- Don't use `RunPython` without a reverse function
|
||||
- Don't add non-nullable fields without a default value
|
||||
|
||||
### Celery Tasks
|
||||
- Don't pass model instances to tasks (pass IDs and re-fetch)
|
||||
- Don't assume tasks run immediately
|
||||
- Don't forget retry logic for external service calls
|
||||
- Don't forget to percent-encode special characters in `CELERY_BROKER_URL` (`@ : / # % + ? & =` and space)
|
||||
400
docs/deploy.md
Normal file
400
docs/deploy.md
Normal file
@@ -0,0 +1,400 @@
|
||||
# Mnemosyne — Ansible Deployment Reference
|
||||
|
||||
This document gives the Ansible author everything needed to write and maintain the
|
||||
Mnemosyne deployment role. All implementation decisions are already locked in
|
||||
`docker-compose.yaml` and `nginx/mnemosyne.conf`; this document explains the
|
||||
*why* behind each decision and provides the authoritative list of variables,
|
||||
one-time steps, and verification checks.
|
||||
|
||||
---
|
||||
|
||||
## 1. Host & Stack Overview
|
||||
|
||||
| Item | Value |
|
||||
|------|-------|
|
||||
| Deploy target | `puck.incus` (Incus container, 10.10.0.0/24) |
|
||||
| Compose project directory | `/srv/mnemosyne` |
|
||||
| Image registry | `git.helu.ca/r/mnemosyne:latest` |
|
||||
| Public host port | **23181** (nginx → HAProxy on Titania → `https://mnemosyne.ouranos.helu.ca`) |
|
||||
| Internal app port | `app:8000` (Django/gunicorn) |
|
||||
| Internal MCP port | `mcp:8001` (FastMCP/uvicorn) |
|
||||
|
||||
The four compose services (`app`, `mcp`, `worker`, `web`) all run from the same
|
||||
image. A one-shot `static-init` service seeds the nginx static-file volume on
|
||||
every `up` so static-file changes propagate automatically on deploy without
|
||||
manual intervention.
|
||||
|
||||
---
|
||||
|
||||
## 2. External Dependencies (NOT managed by this role)
|
||||
|
||||
These services must exist before Mnemosyne can start. The role only consumes
|
||||
credentials; it does not provision these hosts.
|
||||
|
||||
| Service | Host | Notes |
|
||||
|---------|------|-------|
|
||||
| PostgreSQL | `portia.incus:5432` | Database `mnemosyne`, user `mnemosyne` |
|
||||
| Neo4j | `umbriel.incus:7687` | Bolt protocol. **Must be dedicated to Mnemosyne** — do not share with Spelunker or any other graph workload (see README §Note on Neo4j). HTTP browser on `umbriel.incus:25555`. |
|
||||
| RabbitMQ | `oberon.incus:5672` | vhost `mnemosyne`, user `mnemosyne` |
|
||||
| MinIO (Mnemosyne bucket) | `nyx.helu.ca:8555` | Bucket `mnemosyne-content`. Credentials scoped read+write. |
|
||||
| MinIO (Daedalus bucket) | `nyx.helu.ca:8555` | Bucket `daedalus`. **Read-only** cross-bucket credentials for the ingest worker. |
|
||||
| Memcached | `oberon.incus:11211` | Shared; prefix `mnemosyne` avoids collisions. |
|
||||
| Embedder (Qwen3-VL-Embedding) | Configured via `EMBEDDING_*` vars in settings | GPU host on Nyx; not managed here. |
|
||||
| Reranker (Synesis) | Configured via `RERANKER_*` vars in settings | GPU host on Nyx; not managed here. |
|
||||
|
||||
---
|
||||
|
||||
## 3. Role Tasks
|
||||
|
||||
### 3.1 Directory & file layout
|
||||
|
||||
```
|
||||
/srv/mnemosyne/
|
||||
├── docker-compose.yaml ← copied from repo (or symlinked via git pull)
|
||||
├── nginx/
|
||||
│ └── mnemosyne.conf ← copied from repo nginx/mnemosyne.conf
|
||||
└── .env ← rendered from Jinja2 template + vault secrets
|
||||
```
|
||||
|
||||
The role should:
|
||||
1. Create `/srv/mnemosyne/` and `nginx/` (owner: `root`, mode `0750`).
|
||||
2. Render `.env` from the vault-sourced Jinja2 template (mode `0600`, owner `root`).
|
||||
3. Copy (or `git pull`) `docker-compose.yaml` and `nginx/mnemosyne.conf` from the repo.
|
||||
|
||||
### 3.2 Pull & start
|
||||
|
||||
```yaml
|
||||
- name: Pull latest image
|
||||
community.docker.docker_compose_v2:
|
||||
project_src: /srv/mnemosyne
|
||||
pull: always
|
||||
|
||||
- name: Bring stack up
|
||||
community.docker.docker_compose_v2:
|
||||
project_src: /srv/mnemosyne
|
||||
state: present
|
||||
```
|
||||
|
||||
This triggers `static-init` automatically on every `up` — no separate handler needed.
|
||||
|
||||
### 3.3 One-time setup (run once on first deploy, idempotent thereafter)
|
||||
|
||||
These management commands are safe to re-run; they do nothing if the target state
|
||||
already exists. Run them as a post-start task gated on a `creates:` sentinel or
|
||||
an explicit `when: mnemosyne_first_deploy` flag.
|
||||
|
||||
```bash
|
||||
# Apply Django ORM migrations (PostgreSQL schema)
|
||||
docker compose -f /srv/mnemosyne/docker-compose.yaml run --rm app migrate
|
||||
|
||||
# Create Neo4j vector + full-text indexes and load library-type defaults
|
||||
docker compose -f /srv/mnemosyne/docker-compose.yaml \
|
||||
run --rm app setup
|
||||
|
||||
# Seed the MCPSigningKey used to sign long-lived Pallas team JWTs.
|
||||
# --retire-other deactivates any previously-active key. The hex
|
||||
# emitted to stdout is persisted in Mnemosyne's database and is
|
||||
# not re-injected from the vault — no operator action required
|
||||
# beyond running this command once per fresh deployment.
|
||||
docker compose -f /srv/mnemosyne/docker-compose.yaml \
|
||||
run --rm app \
|
||||
python manage.py seed_signing_key --kid daedalus-1 --retire-other
|
||||
|
||||
# Create Django groups for SSO role mapping (View Only / Staff / SME / Admin).
|
||||
# Safe to re-run — idempotent.
|
||||
docker compose -f /srv/mnemosyne/docker-compose.yaml \
|
||||
run --rm app \
|
||||
python manage.py create_sso_groups
|
||||
```
|
||||
|
||||
The `seed_signing_key` command prints the generated secret once to stdout — it
|
||||
is safe to discard that output after the command succeeds. Mnemosyne persists
|
||||
the active key inside ``MCPSigningKey`` and reads it directly when minting each
|
||||
team JWT; Daedalus never sees this value. To rotate, re-run the command with
|
||||
``--retire-other`` and then rotate every Pallas team JWT via the Daedalus admin
|
||||
UI so consumers pick up bearers signed with the new key.
|
||||
|
||||
---
|
||||
|
||||
## 4. Environment Variables (`.env` template)
|
||||
|
||||
All variables are consumed by `docker-compose.yaml` for interpolation into the
|
||||
relevant service `environment:` blocks. The per-service scoping is defined in
|
||||
`docker-compose.yaml`; the `.env` file just provides values.
|
||||
|
||||
### Django core — `app`, `mcp`, `worker`
|
||||
|
||||
| Variable | Example / default | Notes |
|
||||
|----------|-------------------|-------|
|
||||
| `SECRET_KEY` | `{{ vault_mnemosyne_secret_key }}` | Fernet-safe; never rotate without re-encrypting stored API keys first |
|
||||
| `DEBUG` | `False` | |
|
||||
| `TIME_ZONE` | `UTC` | |
|
||||
| `LANGUAGE_CODE` | `en-us` | |
|
||||
|
||||
### HTTP surface — `app` (CSRF), `app` + `mcp` (ALLOWED_HOSTS)
|
||||
|
||||
| Variable | Example |
|
||||
|----------|---------|
|
||||
| `ALLOWED_HOSTS` | `localhost,127.0.0.1,mnemosyne.ouranos.helu.ca` |
|
||||
| `CSRF_TRUSTED_ORIGINS` | `https://mnemosyne.ouranos.helu.ca` |
|
||||
|
||||
### PostgreSQL — `app`, `mcp`, `worker`
|
||||
|
||||
| Variable | Example |
|
||||
|----------|---------|
|
||||
| `APP_DB_NAME` | `mnemosyne` |
|
||||
| `APP_DB_USER` | `mnemosyne` |
|
||||
| `APP_DB_PASSWORD` | `{{ vault_mnemosyne_db_password }}` |
|
||||
| `DB_HOST` | `portia.incus` |
|
||||
| `DB_PORT` | `5432` |
|
||||
|
||||
### Neo4j — `app`, `mcp`, `worker`
|
||||
|
||||
| Variable | Example |
|
||||
|----------|---------|
|
||||
| `NEOMODEL_NEO4J_BOLT_URL` | `bolt://neo4j:{{ vault_neo4j_password }}@umbriel.incus:7687` |
|
||||
|
||||
> **URL-encode the password** if it contains `@ : / # % + ? & =` or a space.
|
||||
> The Bolt URL parser is strict.
|
||||
|
||||
### Memcached — `app`, `mcp`, `worker`
|
||||
|
||||
| Variable | Example |
|
||||
|----------|---------|
|
||||
| `KVDB_LOCATION` | `oberon.incus:11211` |
|
||||
| `KVDB_PREFIX` | `mnemosyne` |
|
||||
|
||||
### S3 / MinIO (Mnemosyne bucket) — `app`, `mcp`, `worker`
|
||||
|
||||
| Variable | Example |
|
||||
|----------|---------|
|
||||
| `USE_LOCAL_STORAGE` | `False` |
|
||||
| `AWS_ACCESS_KEY_ID` | `{{ vault_mnemosyne_s3_key }}` |
|
||||
| `AWS_SECRET_ACCESS_KEY` | `{{ vault_mnemosyne_s3_secret }}` |
|
||||
| `AWS_STORAGE_BUCKET_NAME` | `mnemosyne-content` |
|
||||
| `AWS_S3_ENDPOINT_URL` | `https://nyx.helu.ca:8555` |
|
||||
| `AWS_S3_USE_SSL` | `True` |
|
||||
| `AWS_S3_VERIFY` | `False` (self-signed cert on Nyx) |
|
||||
| `AWS_S3_REGION_NAME` | `us-east-1` |
|
||||
|
||||
### Daedalus S3 (cross-bucket reads) — `worker` only
|
||||
|
||||
| Variable | Example |
|
||||
|----------|---------|
|
||||
| `DAEDALUS_S3_ENDPOINT_URL` | `https://nyx.helu.ca:8555` |
|
||||
| `DAEDALUS_S3_ACCESS_KEY_ID` | `{{ vault_daedalus_s3_read_key }}` |
|
||||
| `DAEDALUS_S3_SECRET_ACCESS_KEY` | `{{ vault_daedalus_s3_read_secret }}` |
|
||||
| `DAEDALUS_S3_BUCKET_NAME` | `daedalus` |
|
||||
| `DAEDALUS_S3_REGION_NAME` | `us-east-1` |
|
||||
| `DAEDALUS_S3_USE_SSL` | `True` |
|
||||
| `DAEDALUS_S3_VERIFY` | `True` |
|
||||
|
||||
### Celery / RabbitMQ — `app` (producer), `worker` (consumer)
|
||||
|
||||
| Variable | Example |
|
||||
|----------|---------|
|
||||
| `CELERY_BROKER_URL` | `amqp://mnemosyne:{{ vault_rabbitmq_password \| urlencode }}@oberon.incus:5672/mnemosyne` |
|
||||
| `CELERY_RESULT_BACKEND` | `rpc://` |
|
||||
| `CELERY_TASK_ALWAYS_EAGER` | `False` |
|
||||
|
||||
> **Percent-encode** the RabbitMQ password in the broker URL if it contains any
|
||||
> URL-special characters. Use Ansible's `urlencode` filter or pre-encode in the
|
||||
> vault variable. An unencoded password is the most common cause of
|
||||
> `PLAIN 403 ACCESS_REFUSED` at worker startup.
|
||||
|
||||
### Worker tuning — `worker` only
|
||||
|
||||
| Variable | Default | Notes |
|
||||
|----------|---------|-------|
|
||||
| `CELERY_QUEUES` | `celery,embedding,batch` | Override per host for dedicated queue workers |
|
||||
| `CELERY_CONCURRENCY` | `2` | Number of worker processes |
|
||||
|
||||
### MCP server — `mcp` only
|
||||
|
||||
| Variable | Production value |
|
||||
|----------|-----------------|
|
||||
| `MCP_REQUIRE_AUTH` | `True` |
|
||||
|
||||
### SSO / Casdoor — `app` only
|
||||
|
||||
| Variable | Example / default | Notes |
|
||||
|----------|-------------------|-------|
|
||||
| `CASDOOR_ENABLED` | `True` | Set `False` to disable SSO and show only local login |
|
||||
| `CASDOOR_ORIGIN` | `https://casdoor.ouranos.helu.ca` | Backend URL used for OIDC discovery (`/.well-known/openid-configuration`) |
|
||||
| `CASDOOR_ORIGIN_FRONTEND` | `https://casdoor.ouranos.helu.ca` | Frontend URL shown to the browser (may differ behind a reverse proxy) |
|
||||
| `CASDOOR_CLIENT_ID` | `{{ vault_mnemosyne_casdoor_client_id }}` | OAuth client ID from the Casdoor application |
|
||||
| `CASDOOR_CLIENT_SECRET` | `{{ vault_mnemosyne_casdoor_client_secret }}` | OAuth client secret from the Casdoor application |
|
||||
| `CASDOOR_ORG_NAME` | `ouranos` | Default organisation slug in Casdoor |
|
||||
| `CASDOOR_SSL_VERIFY` | `true` | `true` in production; `false` only in sandboxes with self-signed certs |
|
||||
| `ALLOW_LOCAL_LOGIN` | `False` | Show the local username/password form to non-superusers. Superusers always see it regardless of this flag. |
|
||||
|
||||
Register the OIDC callback URL in the Casdoor application before enabling SSO:
|
||||
|
||||
```
|
||||
https://mnemosyne.ouranos.helu.ca/accounts/oidc/casdoor/login/callback/
|
||||
```
|
||||
|
||||
### LLM API encryption — `app`, `worker`
|
||||
|
||||
| Variable | Notes |
|
||||
|----------|-------|
|
||||
| `LLM_API_SECRETS_ENCRYPTION_KEY` | Fernet key. Generate once: `python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"`. Never rotate without re-encrypting all stored provider keys first. |
|
||||
|
||||
### Email — `app` only
|
||||
|
||||
| Variable | Example |
|
||||
|----------|---------|
|
||||
| `EMAIL_HOST` | `oberon.incus` |
|
||||
| `EMAIL_PORT` | `22025` |
|
||||
| `EMAIL_USE_TLS` | `False` |
|
||||
|
||||
### Embedding pipeline — `worker` only
|
||||
|
||||
| Variable | Default |
|
||||
|----------|---------|
|
||||
| `EMBEDDING_BATCH_SIZE` | `8` |
|
||||
| `EMBEDDING_TIMEOUT` | `120` |
|
||||
|
||||
### Search & re-ranker — `app`, `mcp`
|
||||
|
||||
| Variable | Default |
|
||||
|----------|---------|
|
||||
| `SEARCH_VECTOR_TOP_K` | `50` |
|
||||
| `SEARCH_FULLTEXT_TOP_K` | `30` |
|
||||
| `SEARCH_GRAPH_MAX_DEPTH` | `2` |
|
||||
| `SEARCH_RRF_K` | `60` |
|
||||
| `SEARCH_DEFAULT_LIMIT` | `20` |
|
||||
| `RERANKER_MAX_CANDIDATES` | `32` |
|
||||
| `RERANKER_TIMEOUT` | `30` |
|
||||
|
||||
### Logging — `app`, `mcp`, `worker`
|
||||
|
||||
| Variable | Default |
|
||||
|----------|---------|
|
||||
| `LOGGING_LEVEL` | `INFO` |
|
||||
| `DJANGO_LOGGING_LEVEL` | `WARNING` |
|
||||
| `CELERY_LOGGING_LEVEL` | `INFO` |
|
||||
|
||||
---
|
||||
|
||||
## 5. Health Probes & Verification
|
||||
|
||||
After `docker compose up -d`, wait for all services to report healthy:
|
||||
|
||||
```bash
|
||||
docker compose -f /srv/mnemosyne/docker-compose.yaml ps
|
||||
```
|
||||
|
||||
Expected: `app`, `mcp`, `worker`, `web` all `healthy`; `static-init` `exited (0)`.
|
||||
|
||||
### Per-service probes
|
||||
|
||||
| Service | Healthcheck command | Expected |
|
||||
|---------|---------------------|----------|
|
||||
| `app` | `curl -f http://localhost:8000/live/` | 200 |
|
||||
| `mcp` | `curl -f http://localhost:8001/mcp/health` | 200 JSON |
|
||||
| `web` | `curl -f http://localhost/live/` | 200 (proxied to app) |
|
||||
| `worker` | `celery -A mnemosyne inspect ping -d celery@$HOSTNAME` | `pong` |
|
||||
|
||||
### External checks (from inside the 10.10.0.0/24 network)
|
||||
|
||||
```bash
|
||||
# Django liveness (via nginx)
|
||||
curl -f http://puck.incus:23181/live/
|
||||
|
||||
# Django readiness (Postgres + Memcached)
|
||||
curl -f http://puck.incus:23181/ready/
|
||||
|
||||
# MCP health (proxied from /healthz → mcp:8001/mcp/health)
|
||||
curl -f http://puck.incus:23181/healthz
|
||||
|
||||
# Prometheus metrics (internal only)
|
||||
curl http://puck.incus:23181/metrics | head -5
|
||||
```
|
||||
|
||||
### Verify Daedalus auth (per-user API token)
|
||||
|
||||
Daedalus now authenticates as a Mnemosyne user via a `UserToken` minted
|
||||
at `/profile/tokens/`. To smoke-test from a deploy host:
|
||||
|
||||
```bash
|
||||
curl -H "Authorization: Bearer <user-token-plaintext>" \
|
||||
https://mnemosyne.ouranos.helu.ca/library/api/workspaces/ws_smoke/ \
|
||||
-o /dev/null -w "%{http_code}"
|
||||
# Expect: 200 if the workspace exists for that user, 404 otherwise.
|
||||
# An anonymous request gets 401 with `WWW-Authenticate: Bearer`.
|
||||
```
|
||||
|
||||
### Verify MCP connectivity (from a client with a valid UserToken)
|
||||
|
||||
```bash
|
||||
curl -H "Authorization: Bearer <token>" \
|
||||
https://mnemosyne.ouranos.helu.ca/mcp/health
|
||||
# Expect: {"status": "ok", ...}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Upgrade Procedure
|
||||
|
||||
A standard upgrade (new image pushed to `git.helu.ca/r/mnemosyne:latest`):
|
||||
|
||||
```bash
|
||||
cd /srv/mnemosyne
|
||||
docker compose pull
|
||||
docker compose up -d # static-init re-seeds; running containers replaced
|
||||
docker compose run --rm app migrate # no-op if no new migrations
|
||||
```
|
||||
|
||||
The `static-init` service runs to completion on every `up`, propagating static
|
||||
file changes without manual volume reset.
|
||||
|
||||
---
|
||||
|
||||
## 7. Rollback
|
||||
|
||||
```bash
|
||||
# Pin to a specific digest
|
||||
docker compose pull git.helu.ca/r/mnemosyne@sha256:<digest>
|
||||
# Edit docker-compose.yaml image: line to use the digest, then:
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Alternatively, tag good images in the registry before each deploy and reference
|
||||
the tag.
|
||||
|
||||
---
|
||||
|
||||
## 8. HAProxy / Titania Configuration Notes
|
||||
|
||||
Titania terminates TLS and forwards to `puck.incus:23181`. The nginx config
|
||||
preserves `X-Forwarded-Proto: https` so Django's `request.is_secure()`, secure
|
||||
cookies, and `build_absolute_uri()` work correctly.
|
||||
|
||||
The HAProxy `health_path` for this backend should be `/healthz` (not `/live/` or
|
||||
`/ready/`) — `/healthz` short-circuits directly to the FastMCP health endpoint
|
||||
without touching Django, so it can confirm the MCP server is up even if Django
|
||||
is momentarily unhealthy.
|
||||
|
||||
If HAProxy checks don't follow redirects, use `/live/` and `/ready/` **with** the
|
||||
trailing slash. The un-slashed forms (`/live`, `/ready`) trigger Django's
|
||||
`APPEND_SLASH` 301 redirect, which health checkers that don't follow redirects
|
||||
will report as a failure.
|
||||
|
||||
---
|
||||
|
||||
## 9. Vault Variables Summary
|
||||
|
||||
| Vault variable | Used in `.env` as |
|
||||
|----------------|-------------------|
|
||||
| `vault_mnemosyne_secret_key` | `SECRET_KEY` |
|
||||
| `vault_mnemosyne_db_password` | `APP_DB_PASSWORD` |
|
||||
| `vault_neo4j_password` | embedded in `NEOMODEL_NEO4J_BOLT_URL` |
|
||||
| `vault_mnemosyne_s3_key` | `AWS_ACCESS_KEY_ID` |
|
||||
| `vault_mnemosyne_s3_secret` | `AWS_SECRET_ACCESS_KEY` |
|
||||
| `vault_daedalus_s3_read_key` | `DAEDALUS_S3_ACCESS_KEY_ID` |
|
||||
| `vault_daedalus_s3_read_secret` | `DAEDALUS_S3_SECRET_ACCESS_KEY` |
|
||||
| `vault_rabbitmq_password` | embedded in `CELERY_BROKER_URL` |
|
||||
| `vault_mnemosyne_llm_encryption_key` | `LLM_API_SECRETS_ENCRYPTION_KEY` |
|
||||
| `vault_mnemosyne_casdoor_client_id` | `CASDOOR_CLIENT_ID` |
|
||||
| `vault_mnemosyne_casdoor_client_secret` | `CASDOOR_CLIENT_SECRET` |
|
||||
@@ -294,31 +294,37 @@ graph LR
|
||||
</div>
|
||||
|
||||
<div class="alert alert-warning border-start border-4 border-warning">
|
||||
<h4><i class="bi bi-lightning"></i> Neo4j Vector Indexes</h4>
|
||||
<pre class="bg-light p-3 rounded mb-0"><code>// Chunk text+image embeddings (4096 dimensions, no pgvector limits!)
|
||||
CREATE VECTOR INDEX chunk_embedding FOR (c:Chunk)
|
||||
<h4><i class="bi bi-lightning"></i> Neo4j Indexes (managed by <code>setup_neo4j_indexes</code>)</h4>
|
||||
<p>Run manually after the first <code>docker compose up</code>, once the system embedding model has been configured in <code>/admin/llm_manager/llmmodel/</code>: <code>docker compose exec app python manage.py setup_neo4j_indexes</code>. Vector dimensions come from the model's <code>vector_dimensions</code> field — the command hard-fails if no such row exists, which is why it is <em>not</em> bundled into the <code>init</code> sidecar (doing so would make the admin unreachable on first boot). Current production model: <strong>Pan Synesis · qwen3-vl-embedding-2b · 2048d</strong>.</p>
|
||||
<pre class="bg-light p-3 rounded mb-0"><code>// Chunk text+image embeddings (dimensions read from system embedding model)
|
||||
CREATE VECTOR INDEX chunk_embedding_index FOR (c:Chunk)
|
||||
ON (c.embedding) OPTIONS {indexConfig: {
|
||||
`vector.dimensions`: 4096,
|
||||
`vector.dimensions`: 2048,
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
|
||||
// Concept embeddings for semantic concept search
|
||||
CREATE VECTOR INDEX concept_embedding FOR (con:Concept)
|
||||
CREATE VECTOR INDEX concept_embedding_index FOR (con:Concept)
|
||||
ON (con.embedding) OPTIONS {indexConfig: {
|
||||
`vector.dimensions`: 4096,
|
||||
`vector.dimensions`: 2048,
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
|
||||
// Image multimodal embeddings
|
||||
CREATE VECTOR INDEX image_embedding FOR (ie:ImageEmbedding)
|
||||
CREATE VECTOR INDEX image_embedding_index FOR (ie:ImageEmbedding)
|
||||
ON (ie.embedding) OPTIONS {indexConfig: {
|
||||
`vector.dimensions`: 4096,
|
||||
`vector.dimensions`: 2048,
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
|
||||
// Full-text index for keyword/BM25-style search
|
||||
CREATE FULLTEXT INDEX chunk_fulltext FOR (c:Chunk) ON EACH [c.text_preview]</code></pre>
|
||||
// Full-text indexes (BM25-style keyword search)
|
||||
CREATE FULLTEXT INDEX chunk_text_fulltext FOR (c:Chunk) ON EACH [c.text_preview]
|
||||
CREATE FULLTEXT INDEX concept_name_fulltext FOR (c:Concept) ON EACH [c.name]
|
||||
CREATE FULLTEXT INDEX item_title_fulltext FOR (i:Item) ON EACH [i.title]
|
||||
CREATE FULLTEXT INDEX library_name_fulltext FOR (l:Library) ON EACH [l.name]</code></pre>
|
||||
<p class="mb-0 mt-3"><strong>Changing the embedding model or dimensions is a re-embedding event.</strong> Drop + recreate the vector indexes (<code>setup_neo4j_indexes --drop</code>) and re-queue all content for embedding. Old vectors at the previous dimension remain on the nodes until overwritten but are no longer indexed.</p>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
<!-- SECTION: CONTENT TYPES -->
|
||||
@@ -521,10 +527,11 @@ flowchart TD
|
||||
<div class="card-body">
|
||||
<p>Cosine similarity via Neo4j vector index on Chunk and ImageEmbedding nodes.</p>
|
||||
<pre class="bg-light p-2 rounded"><code>CALL db.index.vector.queryNodes(
|
||||
'chunk_embedding', 30,
|
||||
'chunk_embedding_index', 30,
|
||||
$query_vector
|
||||
) YIELD node, score
|
||||
WHERE score > $threshold</code></pre>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -548,9 +555,10 @@ RETURN c2, i2</code></pre>
|
||||
<div class="card-body">
|
||||
<p>Neo4j native full-text index for keyword matching (BM25-equivalent).</p>
|
||||
<pre class="bg-light p-2 rounded"><code>CALL db.index.fulltext.queryNodes(
|
||||
'chunk_fulltext',
|
||||
'chunk_text_fulltext',
|
||||
$query_text
|
||||
) YIELD node, score</code></pre>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Mnemosyne Integration — Daedalus & Pallas Reference
|
||||
|
||||
This document summarises the Mnemosyne-specific implementation required for integration with the Daedalus & Pallas architecture. The full specification lives in [`daedalus/docs/mnemosyne_integration.md`](../../daedalus/docs/mnemosyne_integration.md).
|
||||
This document describes Mnemosyne's role in the Daedalus + Pallas architecture and what's actually built today. The Daedalus-side spec lives in [`daedalus/docs/mnemosyne_integration.md`](../../daedalus/docs/mnemosyne_integration.md).
|
||||
|
||||
---
|
||||
|
||||
@@ -8,49 +8,62 @@ This document summarises the Mnemosyne-specific implementation required for inte
|
||||
|
||||
Mnemosyne exposes two interfaces for the wider Ouranos ecosystem:
|
||||
|
||||
1. **MCP Server** (port 22091) — consumed by Pallas agents for synchronous search, browse, and retrieval operations
|
||||
2. **REST Ingest API** — consumed by the Daedalus backend for asynchronous file ingestion and embedding job lifecycle management
|
||||
1. **REST API** (`/library/api/*`) — consumed by the Daedalus backend authenticated as the owning Mnemosyne user via a per-user `UserToken` (`Authorization: Bearer <plaintext>`, minted at `/profile/tokens/`) for workspace lifecycle and asynchronous file ingestion. Phase 1, **implemented**.
|
||||
2. **MCP Server** (port 22091 internal, `/mcp/` via nginx on 23090) — exposes search, browse, and retrieval tools. Phase 5 of Mnemosyne's own roadmap, **implemented** with workspace-scoped access control via long-lived team JWTs. Consumed by Pallas FastAgents in production (Daedalus integration Phase 2, **implemented** — see [Phase 3 of this doc](#3-phase-3-long-lived-team-jwt-access-control-for-pallas-instances)).
|
||||
|
||||
### Phase status
|
||||
|
||||
| Phase | What | Status |
|
||||
|-------|------|--------|
|
||||
| 1. REST workspace + ingest API for Daedalus | `POST /workspaces/`, `DELETE /workspaces/{id}/`, `POST /ingest/`, `GET /jobs/{id}/` | **Implemented** |
|
||||
| 2. MCP Server (Mnemosyne roadmap Phase 5) | `search`, `get_chunk`, `list_libraries`, `list_collections`, `list_items`, `get_health` | **Implemented** (workspace_id scoping enforced in Cypher) |
|
||||
| 3. Long-lived team JWT access control for Pallas instances | Mnemosyne mints a 10-year HS256 JWT per Pallas instance (Team); Daedalus stores it encrypted and the operator pastes the plaintext into `fastagent.secrets.yaml`. Mnemosyne scopes search to the team's assigned workspaces via `TeamWorkspaceAssignment`. | **Implemented** |
|
||||
|
||||
---
|
||||
|
||||
## 1. MCP Server (Phase 5)
|
||||
## 1. MCP Server
|
||||
|
||||
### Port & URL
|
||||
|
||||
| Service | Port | URL |
|
||||
|---------|------|-----|
|
||||
| Mnemosyne MCP | 22091 | `http://puck.incus:22091/mcp` |
|
||||
| Health check | 22091 | `http://puck.incus:22091/mcp/health` |
|
||||
| Endpoint | Internal (container) | Public (via nginx on host port 23181) |
|
||||
|---|---|---|
|
||||
| Django REST API | `http://app:8000/` | `https://mnemosyne.ouranos.helu.ca/` |
|
||||
| MCP server | `http://mcp:8001/mcp/` | `https://mnemosyne.ouranos.helu.ca/mcp/` |
|
||||
| MCP health | `http://mcp:8001/mcp/health` | `https://mnemosyne.ouranos.helu.ca/healthz` |
|
||||
| Django liveness | `http://app:8000/live/` | internal only |
|
||||
| Django readiness | `http://app:8000/ready/` | internal only |
|
||||
|
||||
### Project Structure
|
||||
### Project structure (as built)
|
||||
|
||||
Following the [Django MCP Pattern](Pattern_Django-MCP_V1-00.md):
|
||||
Follows the [Django MCP Pattern](Pattern_Django-MCP_V1-00.md):
|
||||
|
||||
```
|
||||
mnemosyne/mnemosyne/mcp_server/
|
||||
├── __init__.py
|
||||
├── server.py # FastMCP instance + tool registration
|
||||
├── asgi.py # Starlette ASGI mount at /mcp
|
||||
├── middleware.py # MCPAuthMiddleware (disabled for internal use)
|
||||
├── auth.py # MCPAuthMiddleware
|
||||
├── context.py # get_mcp_user(), get_mcp_token()
|
||||
└── tools/
|
||||
├── __init__.py
|
||||
├── search.py # register_search_tools(mcp) → search_knowledge, search_by_category
|
||||
├── browse.py # register_browse_tools(mcp) → list_libraries, list_collections, get_item, get_concepts
|
||||
├── search.py # register_search_tools(mcp) → search, get_chunk
|
||||
├── discovery.py # register_discovery_tools(mcp) → list_libraries, list_collections, list_items
|
||||
└── health.py # register_health_tools(mcp) → get_health
|
||||
```
|
||||
|
||||
### Tools to Implement
|
||||
The ASGI mount lives at `mnemosyne/mnemosyne/asgi.py` (project-level) — it composes the FastMCP app at `/mcp/` with a 307 redirect from bare `/mcp` so MCP clients that omit the trailing slash still land correctly.
|
||||
|
||||
### Tools (as implemented)
|
||||
|
||||
| Tool | Module | Description |
|
||||
|------|--------|-------------|
|
||||
| `search_knowledge` | `search.py` | Hybrid vector + full-text + graph search → re-rank → return chunks with citations |
|
||||
| `search_by_category` | `search.py` | Same as above, scoped to a specific `library_type` |
|
||||
| `list_libraries` | `browse.py` | List all libraries with type, description, counts |
|
||||
| `list_collections` | `browse.py` | List collections within a library |
|
||||
| `get_item` | `browse.py` | Retrieve item detail with chunk previews and concept links |
|
||||
| `get_concepts` | `browse.py` | Traverse concept graph from a starting concept or item |
|
||||
| `get_health` | `health.py` | Check Neo4j, S3, embedding model reachability |
|
||||
| `search` | `search.py` | Hybrid vector + full-text + concept-graph search → fusion → optional Synesis re-rank. Accepts `library_uid`, `library_type`, `collection_uid`, and (system-injected, undocumented to LLM) `workspace_id` for scoping. |
|
||||
| `get_chunk` | `search.py` | Fetch full text of a chunk by uid (typically obtained from `search`). Honors workspace_id scoping. |
|
||||
| `list_libraries` | `discovery.py` | List libraries with uid, name, library_type, description. Workspace_id-aware. |
|
||||
| `list_collections` | `discovery.py` | List collections, optionally filtered by parent library. Workspace_id-aware. |
|
||||
| `list_items` | `discovery.py` | List items with chunk_count, image_count, embedding_status. Workspace_id-aware. |
|
||||
| `get_health` | `health.py` | Check Neo4j, S3, embedding model reachability. Used by Pallas health pollers. |
|
||||
|
||||
The `workspace_id` parameter is present on every search/discovery tool but is **deliberately undocumented in the LLM-facing tool description** — it's a system-injected field the calling LLM should never know about. A workspace-scoped query returns ONLY that workspace's content; an unscoped query (workspace_id is NULL) returns ONLY global libraries. There is no mode that mixes the two — see `library/services/search.py`, `_WORKSPACE_SCOPE_CLAUSE`.
|
||||
|
||||
### MCP Resources
|
||||
|
||||
@@ -61,17 +74,16 @@ mnemosyne/mnemosyne/mcp_server/
|
||||
|
||||
### Deployment
|
||||
|
||||
Separate Uvicorn process alongside Django's Gunicorn:
|
||||
Production runs as four containers from a single image via `docker-compose.yaml`. The nginx `web` container is the only publicly-exposed service, listening on **host port 23181**, which HAProxy on Titania reverse-proxies as `https://mnemosyne.ouranos.helu.ca`.
|
||||
|
||||
```bash
|
||||
# Django WSGI (existing)
|
||||
gunicorn --bind :22090 --workers 3 mnemosyne.wsgi
|
||||
| Container | Internal port | Role |
|
||||
|-----------|--------------|------|
|
||||
| `app` | 8000 | Django REST API + admin (gunicorn) |
|
||||
| `mcp` | 8001 | FastMCP ASGI server (uvicorn) |
|
||||
| `worker` | — | Celery worker (embedding/ingest/batch) |
|
||||
| `web` | 80 → host **23181** | nginx reverse proxy + static files |
|
||||
|
||||
# MCP ASGI (new)
|
||||
uvicorn mcp_server.asgi:app --host 0.0.0.0 --port 22091 --workers 1
|
||||
```
|
||||
|
||||
Auth is disabled (`MCP_REQUIRE_AUTH=False`) since all traffic is internal (10.10.0.0/24).
|
||||
Auth is controlled by `MCP_REQUIRE_AUTH` in `.env`. Production sets it to `True`; the internal validator and ad-hoc testing may use `False` on an isolated network.
|
||||
|
||||
### ⚠️ DEBUG LOG Points — MCP Server
|
||||
|
||||
@@ -91,22 +103,30 @@ Auth is disabled (`MCP_REQUIRE_AUTH=False`) since all traffic is internal (10.10
|
||||
|
||||
---
|
||||
|
||||
## 2. REST Ingest API
|
||||
## 2. REST API for Daedalus
|
||||
|
||||
### New Endpoints
|
||||
All endpoints require an `Authorization: Bearer <plaintext>` header carrying a `UserToken` belonging to the Mnemosyne user the workspace belongs to (minted at `/profile/tokens/`). Workspaces are scoped to their creating user via the `Library.owner_username` property; cross-user access returns 404. Anonymous requests get 401 with `WWW-Authenticate: Bearer`. These endpoints are consumed by the Daedalus FastAPI backend only — not by any frontend.
|
||||
|
||||
### Workspace lifecycle
|
||||
|
||||
| Method | Route | Purpose |
|
||||
|--------|-------|---------|
|
||||
| `POST` | `/api/v1/library/ingest` | Accept a file for ingestion + embedding |
|
||||
| `GET` | `/api/v1/library/jobs/{job_id}` | Poll job status |
|
||||
| `POST` | `/api/v1/library/jobs/{job_id}/retry` | Retry a failed job |
|
||||
| `GET` | `/api/v1/library/jobs` | List recent jobs (optional `?status=` filter) |
|
||||
| `POST` | `/library/api/workspaces/` | Create workspace Library. Body: `{workspace_id, name, library_type, description?}`. Idempotent on `workspace_id`. `library_type` frozen at create. |
|
||||
| `GET` | `/library/api/workspaces/{workspace_id}/` | Workspace status (item_count, chunk_count, library_uid). |
|
||||
| `DELETE` | `/library/api/workspaces/{workspace_id}/` | Delete workspace Library + reachable content. Concept-safe: orphan-only Concept GC; concepts referenced by other libraries survive. |
|
||||
|
||||
These endpoints are consumed by the **Daedalus FastAPI backend** only. Not by the frontend.
|
||||
### Ingest
|
||||
|
||||
### New Model: `IngestJob`
|
||||
| Method | Route | Purpose |
|
||||
|--------|-------|---------|
|
||||
| `POST` | `/library/api/ingest/` | Accept a file (already in S3) for ingestion + embedding |
|
||||
| `GET` | `/library/api/jobs/{job_id}/` | Poll job status |
|
||||
| `POST` | `/library/api/jobs/{job_id}/retry/` | Retry a failed job |
|
||||
| `GET` | `/library/api/jobs/?status=&library_uid=` | List recent jobs |
|
||||
|
||||
Add to `library/` app (Django ORM on PostgreSQL, not Neo4j):
|
||||
### Model: `IngestJob`
|
||||
|
||||
Lives in `library/models.py` (Django ORM on PostgreSQL, not Neo4j). Migration: `library/migrations/0001_initial.py`.
|
||||
|
||||
```python
|
||||
class IngestJob(models.Model):
|
||||
@@ -153,14 +173,16 @@ class IngestJob(models.Model):
|
||||
|
||||
### Ingest Request Schema
|
||||
|
||||
The target Library can be specified by either `workspace_id` (preferred for Daedalus) or `library_uid`. Idempotency key: `(library, source_ref, content_hash)`. Same triple → existing job returned. New `content_hash` for the same `source_ref` → supersedes the prior Item.
|
||||
|
||||
```json
|
||||
{
|
||||
"s3_key": "workspaces/ws_abc/files/f_def/report.pdf",
|
||||
"title": "Q4 Technical Report",
|
||||
"library_uid": "lib_technical_001",
|
||||
"collection_uid": "col_reports_2026",
|
||||
"workspace_id": "ws_abc",
|
||||
"file_type": "application/pdf",
|
||||
"file_size": 245000,
|
||||
"content_hash": "<sha256 hex, 64 chars>",
|
||||
"source": "daedalus",
|
||||
"source_ref": "ws_abc/f_def"
|
||||
}
|
||||
@@ -198,39 +220,34 @@ class IngestJob(models.Model):
|
||||
|
||||
## 3. Celery Embedding Pipeline
|
||||
|
||||
### New Task: `embed_item`
|
||||
### Task: `ingest_from_daedalus`
|
||||
|
||||
Defined in `library/tasks.py`. Routed to the `embedding` queue (per `CELERY_TASK_ROUTES["library.tasks.ingest_*"]`). Wraps the existing `EmbeddingPipeline.process_item`.
|
||||
|
||||
```python
|
||||
@shared_task(
|
||||
name="library.embed_item",
|
||||
name="library.tasks.ingest_from_daedalus",
|
||||
bind=True,
|
||||
queue="embedding",
|
||||
max_retries=3,
|
||||
default_retry_delay=60,
|
||||
autoretry_for=(S3ConnectionError, EmbeddingModelError),
|
||||
retry_backoff=True,
|
||||
retry_backoff_max=600,
|
||||
acks_late=True,
|
||||
queue="embedding",
|
||||
)
|
||||
def embed_item(self, job_id, item_uid):
|
||||
...
|
||||
def ingest_from_daedalus(self, job_id: str): ...
|
||||
```
|
||||
|
||||
### Task Flow
|
||||
### Task flow (as built)
|
||||
|
||||
1. Update job → `processing` / `fetching`
|
||||
2. Fetch file from Daedalus S3 bucket (cross-bucket read)
|
||||
3. Copy to Mnemosyne's own S3 bucket
|
||||
4. Load library type → chunking config
|
||||
5. Chunk content per strategy
|
||||
6. Store chunk text in S3
|
||||
7. Generate embeddings (Arke/vLLM batch call)
|
||||
8. Write Chunk nodes + vectors to Neo4j
|
||||
9. Extract concepts (LLM call)
|
||||
10. Build graph relationships
|
||||
11. Update job → `completed`
|
||||
1. Mark job `processing`, set `started_at`.
|
||||
2. Resolve target Library by `library_uid`.
|
||||
3. If a prior Item exists for this Library with the same `source_ref` but a *different* `content_hash`, delete it (chunks + images + embeddings) before continuing.
|
||||
4. Fetch file bytes from the Daedalus S3 bucket via `library.services.daedalus_s3.fetch_from_daedalus`.
|
||||
5. Create the `Item` neomodel node with `s3_key=items/{item_uid}/original.{ext}` and copy bytes into Mnemosyne's own bucket.
|
||||
6. Connect to a default Collection for the Library (auto-created on first ingest).
|
||||
7. Run `EmbeddingPipeline.process_item(item.uid)` — chunk per `library_type`, embed via the configured model, write Chunks + Concepts to Neo4j.
|
||||
8. Mark job `completed` with `chunks_created`, `concepts_extracted`, `embedding_model`, `completed_at`.
|
||||
|
||||
On failure at any step: update job → `failed` with error message.
|
||||
On any exception with retries remaining: re-raise via `self.retry()` (exponential backoff). On terminal failure: mark job `failed` with the exception text.
|
||||
|
||||
### ⚠️ DEBUG LOG Points — Celery Worker (Critical)
|
||||
|
||||
@@ -329,23 +346,86 @@ mnemosyne_s3_operations_total{operation,status} counter
|
||||
|
||||
## 6. Implementation Phases (Mnemosyne-specific)
|
||||
|
||||
### Phase 1 — REST Ingest API
|
||||
- [ ] Create `IngestJob` model + Django migration
|
||||
- [ ] Implement `POST /api/v1/library/ingest` endpoint
|
||||
- [ ] Implement `GET /api/v1/library/jobs/{job_id}` endpoint
|
||||
- [ ] Implement `POST /api/v1/library/jobs/{job_id}/retry` endpoint
|
||||
- [ ] Implement `GET /api/v1/library/jobs` list endpoint
|
||||
- [ ] Implement `embed_item` Celery task with full debug logging
|
||||
- [ ] Add S3 cross-bucket copy logic
|
||||
- [ ] Add ingest API serializers and URL routing
|
||||
### Phase 1 — REST API for Daedalus (workspace + ingest) ✅ Implemented
|
||||
- [x] `Library.workspace_id` + `library_type` enum (added `business`, `finance`)
|
||||
- [x] `IngestJob` Django ORM model + migration `0001_initial.py`
|
||||
- [x] `POST /library/api/workspaces/`, `GET /library/api/workspaces/{id}/`, `DELETE /library/api/workspaces/{id}/` (concept-safe)
|
||||
- [x] `POST /library/api/ingest/` with `(library, source_ref, content_hash)` idempotency
|
||||
- [x] `GET /library/api/jobs/{job_id}/`, `POST .../retry/`, `GET /library/api/jobs/`
|
||||
- [x] `library.tasks.ingest_from_daedalus` Celery task with content-hash-aware supersede logic
|
||||
- [x] `library.services.daedalus_s3` cross-bucket fetch + copy
|
||||
- [x] Per-user `UserToken` auth (`Authorization: Bearer <plaintext>`, minted at `/profile/tokens/`); workspaces scoped to the owning user via `Library.owner_username`
|
||||
|
||||
### Phase 2 — MCP Server (Phase 5 of Mnemosyne roadmap)
|
||||
- [ ] Create `mcp_server/` module following Django MCP Pattern
|
||||
- [ ] Implement `search_knowledge` tool (hybrid search + re-rank)
|
||||
- [ ] Implement `search_by_category` tool
|
||||
- [ ] Implement `list_libraries`, `list_collections`, `get_item`, `get_concepts` tools
|
||||
- [ ] Implement `get_health` tool per Pallas health spec
|
||||
- [ ] Register MCP resources (`mnemosyne://library-types`, `mnemosyne://libraries`)
|
||||
- [ ] ASGI mount + Uvicorn deployment on port 22091
|
||||
- [ ] Systemd service for MCP Uvicorn process
|
||||
- [ ] Add Prometheus metrics
|
||||
### Phase 2 — MCP Server (Mnemosyne roadmap Phase 5) ✅ Implemented
|
||||
- [x] `mcp_server/` module following the [Django MCP Pattern](Pattern_Django-MCP_V1-00.md)
|
||||
- [x] `search` tool (hybrid vector + fulltext + concept-graph + Synesis re-rank)
|
||||
- [x] `get_chunk` tool (full text by chunk_uid)
|
||||
- [x] `list_libraries`, `list_collections`, `list_items` discovery tools
|
||||
- [x] `get_health` tool (Neo4j + S3 + embedding model probes)
|
||||
- [x] Workspace_id parameter on every search/discovery tool (undocumented to LLM, scoping enforced in Cypher)
|
||||
- [x] Single-mode rule: workspace-scoped vs global, never both in one query
|
||||
- [x] ASGI mount + uvicorn deployment on port 22091; nginx proxies via `/mcp/` on 23090
|
||||
- [x] Prometheus metrics (`mnemosyne_mcp_*`)
|
||||
|
||||
### Phase 3 — Long-lived team JWT access control for Pallas instances ✅ Implemented
|
||||
|
||||
Each Pallas instance registered in Daedalus is mirrored as a Mnemosyne **Team**. Mnemosyne mints a long-lived (10-year) HS256 JWT for the team; the operator pastes the plaintext into the Pallas instance's `fastagent.secrets.yaml`. Every MCP call from that Pallas instance carries the team JWT as a static `Authorization: Bearer` header. Mnemosyne validates the JWT and scopes search to the workspaces assigned to that team.
|
||||
|
||||
**Mnemosyne-side components:**
|
||||
|
||||
- [x] `MCPSigningKey` model — stores active HS256 secrets keyed by `kid`. Managed via `manage.py seed_signing_key --kid <kid>`. The hex stays in Mnemosyne's DB; Daedalus never sees it.
|
||||
- [x] `Team` model — one row per Pallas instance. `id` = `PallasInstance.id` on the Daedalus side (stable UUID). `active_jti` identifies the single currently-valid JWT; rotation changes this field, immediately invalidating the old token.
|
||||
- [x] `TeamWorkspaceAssignment` model — maps a `Team` to a set of Daedalus workspace UUIDs. Updated by Daedalus via `PUT /mcp_server/api/teams/{id}/workspaces/` whenever workspace attachments change.
|
||||
- [x] `resolve_mcp_jwt(token_string)` in `mcp_server/auth.py` — validates signature, `exp`, `iss`. For team JWTs (`iss=mnemosyne`, `typ=team`): parses `sub=team:<uuid>` → `claims["team_id"]`; bypasses the per-turn JTI replay cache (team tokens are intentionally reused).
|
||||
- [x] `_libraries_for_team(team_id, jti)` — looks up the `Team` row, verifies `active=True` and `active_jti == jti`, then translates `TeamWorkspaceAssignment` rows into Library UIDs via a single Cypher query.
|
||||
- [x] `MCPAuthMiddleware.on_call_tool` — routes team JWTs through `_libraries_for_team`; routes legacy per-turn JWTs through `_scope_from_claims` (backward-compatible).
|
||||
- [x] REST control plane at `/mcp_server/api/teams/`:
|
||||
- `POST /` — create team by UUID; mints JWT, returns plaintext once.
|
||||
- `GET /{id}/` — team state (workspace_ids, active status).
|
||||
- `DELETE /{id}/` — soft-delete (`active=False`); all JWTs immediately invalid.
|
||||
- `PUT /{id}/workspaces/` — replace workspace assignment list (idempotent).
|
||||
- `POST /{id}/rotate/` — mint new JWT with new `active_jti`; returns plaintext once.
|
||||
|
||||
**Team JWT format (HS256):**
|
||||
|
||||
```json
|
||||
{
|
||||
"iss": "mnemosyne",
|
||||
"aud": "mnemosyne",
|
||||
"typ": "team",
|
||||
"sub": "team:<pallas-instance-uuid>",
|
||||
"iat": 1746000000,
|
||||
"exp": 2061360000,
|
||||
"jti": "<active_jti uuid>"
|
||||
}
|
||||
```
|
||||
|
||||
**Provisioning (once per Pallas instance):**
|
||||
|
||||
```bash
|
||||
# 1. Seed the MCPSigningKey on Mnemosyne (once per deployment, not per instance):
|
||||
docker compose exec app python manage.py seed_signing_key --kid daedalus-1 --retire-other
|
||||
# The hex stays in Mnemosyne's DB — no operator action required.
|
||||
|
||||
# 2. Register the Pallas instance in Daedalus admin UI (/admin/pallas/).
|
||||
# Daedalus calls POST /mcp_server/api/teams/ automatically.
|
||||
# The team JWT is minted and stored encrypted in Daedalus.
|
||||
|
||||
# 3. Reveal the JWT via Daedalus admin UI (one-shot):
|
||||
# GET /api/v1/pallas/{id}/team-jwt
|
||||
# Copy the returned JWT string.
|
||||
|
||||
# 4. Paste into fastagent.secrets.yaml on the Pallas host:
|
||||
# mcp:
|
||||
# servers:
|
||||
# mnemosyne:
|
||||
# headers:
|
||||
# Authorization: "Bearer <JWT>"
|
||||
|
||||
# 5. Restart the Pallas agent processes.
|
||||
|
||||
# 6. Attach workspaces in Daedalus workspace settings UI.
|
||||
# Daedalus calls PUT /mcp_server/api/teams/{id}/workspaces/ automatically.
|
||||
```
|
||||
|
||||
See the Daedalus-side spec [§9](../../daedalus/docs/mnemosyne_integration.md#9-phase-2--workspace-scoped-mcp-search-implemented) for the full operator walkthrough including JWT rotation and disaster recovery.
|
||||
|
||||
240
docs/mnemosyne_mcp.md
Normal file
240
docs/mnemosyne_mcp.md
Normal file
@@ -0,0 +1,240 @@
|
||||
# Mnemosyne MCP Server Tools
|
||||
|
||||
Mnemosyne exposes a retrieval surface via the [Model Context Protocol](https://modelcontextprotocol.io/) using [FastMCP](https://github.com/jlowin/fastmcp). The server is a **retrieval surface, not a RAG pipeline**: it returns ranked evidence and the calling LLM is responsible for synthesis and citation.
|
||||
|
||||
## Concepts
|
||||
|
||||
**Library** — the top-level container. Each library has a `library_type` that drives chunking, embedding, and re-ranking strategy:
|
||||
|
||||
| `library_type` | Content |
|
||||
|---|---|
|
||||
| `fiction` | Novels, short stories. Cover art available. |
|
||||
| `nonfiction` | General non-fiction prose. |
|
||||
| `technical` | Manuals, textbooks, docs. Diagrams and code-like content. |
|
||||
| `music` | Lyrics, liner notes, album artwork. |
|
||||
| `film` | Scripts, synopses, stills. |
|
||||
| `art` | Catalogs, descriptions, artwork itself. |
|
||||
| `journal` | Personal entries; temporal/reflective. |
|
||||
| `business` | Proposals, marketing, sales, strategy. Commercial context. |
|
||||
| `finance` | Statements, tax, market commentary. Quote figures exactly. |
|
||||
|
||||
**Collection** — a named group of items inside a library (e.g. a novel series, a multi-volume manual).
|
||||
|
||||
**Item** — an indexed document or file. Only items with `embedding_status = "completed"` appear in search results.
|
||||
|
||||
**Chunk** — a text segment of an item, stored in S3. Search returns a `text_preview` (~500 chars); use `get_chunk` to fetch the full text.
|
||||
|
||||
## Recommended Workflow
|
||||
|
||||
```
|
||||
list_libraries
|
||||
→ search(query, library_type=..., library_uid=...)
|
||||
→ get_chunk(chunk_uid) # only when text_preview is insufficient
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Tools
|
||||
|
||||
### `search`
|
||||
|
||||
Hybrid retrieval: vector + full-text + concept-graph candidates fused by RRF (Reciprocal Rank Fusion), with optional Synesis re-ranking.
|
||||
|
||||
**Parameters**
|
||||
|
||||
| Name | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `query` | `str` | required | The search query. |
|
||||
| `library_uid` | `str \| None` | `None` | Restrict to one library by UID. |
|
||||
| `library_type` | `str \| None` | `None` | Restrict by library type (see table above). |
|
||||
| `collection_uid` | `str \| None` | `None` | Restrict to one collection by UID. |
|
||||
| `limit` | `int` | `20` | Maximum candidates to return. |
|
||||
| `rerank` | `bool` | `True` | Apply Synesis re-ranking. Set `False` to skip. |
|
||||
| `include_images` | `bool` | `True` | Include matching images in the response. |
|
||||
| `search_types` | `list[str] \| None` | `["vector", "fulltext", "graph"]` | Which retrieval strategies to run. |
|
||||
|
||||
**Response**
|
||||
|
||||
```json
|
||||
{
|
||||
"query": "...",
|
||||
"candidates": [
|
||||
{
|
||||
"chunk_uid": "...",
|
||||
"item_uid": "...",
|
||||
"item_title": "...",
|
||||
"library_type": "...",
|
||||
"text_preview": "... (~500 chars) ...",
|
||||
"score": 0.92,
|
||||
"source": "vector|fulltext|graph"
|
||||
}
|
||||
],
|
||||
"images": [...],
|
||||
"total_candidates": 42,
|
||||
"search_time_ms": 85,
|
||||
"reranker_used": true,
|
||||
"reranker_model": "...",
|
||||
"search_types_used": ["vector", "fulltext", "graph"]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `get_chunk`
|
||||
|
||||
Fetch the full text of a single chunk by its UID. Use this when the `text_preview` returned by `search` is not enough.
|
||||
|
||||
**Parameters**
|
||||
|
||||
| Name | Type | Description |
|
||||
|---|---|---|
|
||||
| `chunk_uid` | `str` | The chunk UID from a `search` result. |
|
||||
|
||||
**Response**
|
||||
|
||||
```json
|
||||
{
|
||||
"chunk_uid": "...",
|
||||
"chunk_index": 3,
|
||||
"item_uid": "...",
|
||||
"item_title": "...",
|
||||
"library_type": "...",
|
||||
"text": "Full chunk text..."
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `list_libraries`
|
||||
|
||||
Enumerate libraries the caller is authorized to read. Use the returned `uid` or `library_type` to scope a subsequent `search`.
|
||||
|
||||
**Parameters**
|
||||
|
||||
| Name | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `limit` | `int` | `50` | Max libraries to return (capped at 200). |
|
||||
| `offset` | `int` | `0` | Pagination offset. |
|
||||
|
||||
**Response**
|
||||
|
||||
```json
|
||||
{
|
||||
"libraries": [
|
||||
{
|
||||
"uid": "...",
|
||||
"name": "...",
|
||||
"library_type": "fiction",
|
||||
"description": "..."
|
||||
}
|
||||
],
|
||||
"limit": 50,
|
||||
"offset": 0
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `list_collections`
|
||||
|
||||
Enumerate collections, optionally filtered to a single library. Use the returned `uid` to scope `search` or `list_items` to one collection.
|
||||
|
||||
**Parameters**
|
||||
|
||||
| Name | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `library_uid` | `str \| None` | `None` | Filter to one parent library. |
|
||||
| `limit` | `int` | `50` | Max collections to return (capped at 200). |
|
||||
| `offset` | `int` | `0` | Pagination offset. |
|
||||
|
||||
**Response**
|
||||
|
||||
```json
|
||||
{
|
||||
"collections": [
|
||||
{
|
||||
"uid": "...",
|
||||
"name": "...",
|
||||
"description": "...",
|
||||
"library_uid": "...",
|
||||
"library_name": "..."
|
||||
}
|
||||
],
|
||||
"limit": 50,
|
||||
"offset": 0
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `list_items`
|
||||
|
||||
Enumerate indexed documents/files, optionally filtered by library or collection. Check `embedding_status` before searching — only `"completed"` items appear in search results. Use `chunk_count` to gauge document size.
|
||||
|
||||
**Parameters**
|
||||
|
||||
| Name | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `collection_uid` | `str \| None` | `None` | Filter to one collection. |
|
||||
| `library_uid` | `str \| None` | `None` | Filter to one library. |
|
||||
| `limit` | `int` | `50` | Max items to return (capped at 200). |
|
||||
| `offset` | `int` | `0` | Pagination offset. |
|
||||
|
||||
**Response**
|
||||
|
||||
```json
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"uid": "...",
|
||||
"title": "...",
|
||||
"item_type": "...",
|
||||
"file_type": "...",
|
||||
"chunk_count": 120,
|
||||
"image_count": 4,
|
||||
"embedding_status": "completed"
|
||||
}
|
||||
],
|
||||
"limit": 50,
|
||||
"offset": 0
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `get_health`
|
||||
|
||||
Health check for infrastructure pollers (Pallas, Daedalus). Does not require authentication.
|
||||
|
||||
Returns a Pallas-compatible status object. `neo4j` and `s3` failures result in `"error"` (critical). A missing or unconfigured embedding model results in `"degraded"` (non-critical).
|
||||
|
||||
**Parameters:** none
|
||||
|
||||
**Response**
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok | degraded | error",
|
||||
"checks": {
|
||||
"neo4j": { "status": "ok", "duration_ms": 2.1 },
|
||||
"s3": { "status": "ok", "duration_ms": 8.4 },
|
||||
"embedding": { "status": "ok", "model": "...", "duration_ms": 0.3 }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
All tools except `get_health` require a `Bearer` token in the `Authorization` header. Three credential types are accepted:
|
||||
|
||||
| Type | Issued by | Lifetime | Scope |
|
||||
|---|---|---|---|
|
||||
| **Opaque `MCPToken`** | Mnemosyne admin | Long-lived (optional expiry) | `allowed_libraries` list on the token row. Per-tool ACL available. |
|
||||
| **Per-turn JWT** (`iss=daedalus`) | Daedalus chat | ≤10 minutes | `libs` claim (list of Library UIDs). |
|
||||
| **Team JWT** (`iss=mnemosyne`, `typ=team`) | Mnemosyne | 10-year lifetime | Resolved live from `TeamWorkspaceAssignment` → Neo4j `Library.workspace_id`. Revoked via `active_jti` rotation. |
|
||||
|
||||
Every authenticated request resolves to a `resolved_libraries` list — the set of Library UIDs the caller may read. Tools enforce this list at the query layer; an empty list means the caller is authenticated but sees nothing (fail-closed). `None` (no auth) is also fail-closed.
|
||||
|
||||
The `MCP_REQUIRE_AUTH` Django setting (default `True`) controls whether unauthenticated requests are rejected.
|
||||
335
docs/ouranos.md
335
docs/ouranos.md
@@ -1,335 +0,0 @@
|
||||
# Ouranos Lab
|
||||
|
||||
Infrastructure-as-Code project managing the **Ouranos Lab** — a development sandbox at [ouranos.helu.ca](https://ouranos.helu.ca). Uses **Terraform** for container provisioning and **Ansible** for configuration management, themed around the moons of Uranus.
|
||||
|
||||
---
|
||||
|
||||
## Project Overview
|
||||
|
||||
| Component | Purpose |
|
||||
|-----------|---------|
|
||||
| **Terraform** | Provisions 10 specialised Incus containers (LXC) with DNS-resolved networking, security policies, and resource dependencies |
|
||||
| **Ansible** | Deploys Docker, databases (PostgreSQL, Neo4j), observability stack (Prometheus, Grafana, Loki), and application runtimes across all hosts |
|
||||
|
||||
> **DNS Domain**: Incus resolves containers via the `.incus` domain suffix (e.g., `oberon.incus`, `portia.incus`). IPv4 addresses are dynamically assigned — always use DNS names, never hardcode IPs.
|
||||
|
||||
---
|
||||
|
||||
## Uranian Host Architecture
|
||||
|
||||
All containers are named after moons of Uranus and resolved via the `.incus` DNS suffix.
|
||||
|
||||
| Name | Role | Description | Nesting |
|
||||
|------|------|-------------|---------|
|
||||
| **ariel** | graph_database | Neo4j — Ethereal graph connections | ✔ |
|
||||
| **caliban** | agent_automation | Agent S MCP Server with MATE Desktop | ✔ |
|
||||
| **miranda** | mcp_docker_host | Dedicated Docker Host for MCP Servers | ✔ |
|
||||
| **oberon** | container_orchestration | Docker Host — MCP Switchboard, RabbitMQ, Open WebUI | ✔ |
|
||||
| **portia** | database | PostgreSQL — Relational database host | ❌ |
|
||||
| **prospero** | observability | PPLG stack — Prometheus, Grafana, Loki, PgAdmin | ❌ |
|
||||
| **puck** | application_runtime | Python App Host — JupyterLab, Django apps, Gitea Runner | ✔ |
|
||||
| **rosalind** | collaboration | Gitea, LobeChat, Nextcloud, AnythingLLM | ✔ |
|
||||
| **sycorax** | language_models | Arke LLM Proxy | ✔ |
|
||||
| **titania** | proxy_sso | HAProxy TLS termination + Casdoor SSO | ✔ |
|
||||
|
||||
### oberon — Container Orchestration
|
||||
|
||||
King of the Fairies orchestrating containers and managing MCP infrastructure.
|
||||
|
||||
- Docker engine
|
||||
- MCP Switchboard (port 22785) — Django app routing MCP tool calls
|
||||
- RabbitMQ message queue
|
||||
- Open WebUI LLM interface (port 22088, PostgreSQL backend on Portia)
|
||||
- SearXNG privacy search (port 22083, behind OAuth2-Proxy)
|
||||
- smtp4dev SMTP test server (port 22025)
|
||||
|
||||
### portia — Relational Database
|
||||
|
||||
Intelligent and resourceful — the reliability of relational databases.
|
||||
|
||||
- PostgreSQL 17 (port 5432)
|
||||
- Databases: `arke`, `anythingllm`, `gitea`, `hass`, `lobechat`, `mcp_switchboard`, `nextcloud`, `openwebui`, `periplus`, `spelunker`
|
||||
|
||||
### ariel — Graph Database
|
||||
|
||||
Air spirit — ethereal, interconnected nature mirroring graph relationships.
|
||||
|
||||
- Neo4j 5.26.0 (Docker)
|
||||
- HTTP API: port 25584
|
||||
- Bolt: port 25554
|
||||
|
||||
### puck — Application Runtime
|
||||
|
||||
Shape-shifting trickster embodying Python's versatility.
|
||||
|
||||
- Docker engine
|
||||
- JupyterLab (port 22071 via OAuth2-Proxy)
|
||||
- Gitea Runner (CI/CD agent)
|
||||
- Home Assistant (port 8123)
|
||||
- Django applications: Angelia (22281), Athena (22481), Kairos (22581), Icarlos (22681), Spelunker (22881), Peitho (22981)
|
||||
|
||||
### prospero — Observability Stack
|
||||
|
||||
Master magician observing all events.
|
||||
|
||||
- PPLG stack via Docker Compose: Prometheus, Loki, Grafana, PgAdmin
|
||||
- Internal HAProxy with OAuth2-Proxy for all dashboards
|
||||
- AlertManager with Pushover notifications
|
||||
- Prometheus metrics collection (`node-exporter`, HAProxy, Loki)
|
||||
- Loki log aggregation via Alloy (all hosts)
|
||||
- Grafana dashboard suite with Casdoor SSO integration
|
||||
|
||||
### miranda — MCP Docker Host
|
||||
|
||||
Curious bridge between worlds — hosting MCP server containers.
|
||||
|
||||
- Docker engine (API exposed on port 2375 for MCP Switchboard)
|
||||
- MCPO OpenAI-compatible MCP proxy
|
||||
- Grafana MCP Server (port 25533)
|
||||
- Gitea MCP Server (port 25535)
|
||||
- Neo4j MCP Server
|
||||
- Argos MCP Server — web search via SearXNG (port 25534)
|
||||
|
||||
### sycorax — Language Models
|
||||
|
||||
Original magical power wielding language magic.
|
||||
|
||||
- Arke LLM API Proxy (port 25540)
|
||||
- Multi-provider support (OpenAI, Anthropic, etc.)
|
||||
- Session management with Memcached
|
||||
- Database backend on Portia
|
||||
|
||||
### caliban — Agent Automation
|
||||
|
||||
Autonomous computer agent learning through environmental interaction.
|
||||
|
||||
- Docker engine
|
||||
- Agent S MCP Server (MATE desktop, AT-SPI automation)
|
||||
- Kernos MCP Shell Server (port 22021)
|
||||
- GPU passthrough for vision tasks
|
||||
- RDP access (port 25521)
|
||||
|
||||
### rosalind — Collaboration Services
|
||||
|
||||
Witty and resourceful moon for PHP, Go, and Node.js runtimes.
|
||||
|
||||
- Gitea self-hosted Git (port 22082, SSH on 22022)
|
||||
- LobeChat AI chat interface (port 22081)
|
||||
- Nextcloud file sharing and collaboration (port 22083)
|
||||
- AnythingLLM document AI workspace (port 22084)
|
||||
- Nextcloud data on dedicated Incus storage volume
|
||||
|
||||
### titania — Proxy & SSO Services
|
||||
|
||||
Queen of the Fairies managing access control and authentication.
|
||||
|
||||
- HAProxy 3.x with TLS termination (port 443)
|
||||
- Let's Encrypt wildcard certificate via certbot DNS-01 (Namecheap)
|
||||
- HTTP to HTTPS redirect (port 80)
|
||||
- Gitea SSH proxy (port 22022)
|
||||
- Casdoor SSO (port 22081, local PostgreSQL)
|
||||
- Prometheus metrics at `:8404/metrics`
|
||||
|
||||
---
|
||||
|
||||
## External Access via HAProxy
|
||||
|
||||
Titania provides TLS termination and reverse proxy for all services.
|
||||
|
||||
- **Base domain**: `ouranos.helu.ca`
|
||||
- **HTTPS**: port 443 (standard)
|
||||
- **HTTP**: port 80 (redirects to HTTPS)
|
||||
- **Certificate**: Let's Encrypt wildcard via certbot DNS-01
|
||||
|
||||
### Route Table
|
||||
|
||||
| Subdomain | Backend | Service |
|
||||
|-----------|---------|---------|
|
||||
| `ouranos.helu.ca` (root) | puck.incus:22281 | Angelia (Django) |
|
||||
| `alertmanager.ouranos.helu.ca` | prospero.incus:443 (SSL) | AlertManager |
|
||||
| `angelia.ouranos.helu.ca` | puck.incus:22281 | Angelia (Django) |
|
||||
| `anythingllm.ouranos.helu.ca` | rosalind.incus:22084 | AnythingLLM |
|
||||
| `arke.ouranos.helu.ca` | sycorax.incus:25540 | Arke LLM Proxy |
|
||||
| `athena.ouranos.helu.ca` | puck.incus:22481 | Athena (Django) |
|
||||
| `gitea.ouranos.helu.ca` | rosalind.incus:22082 | Gitea |
|
||||
| `grafana.ouranos.helu.ca` | prospero.incus:443 (SSL) | Grafana |
|
||||
| `hass.ouranos.helu.ca` | oberon.incus:8123 | Home Assistant |
|
||||
| `id.ouranos.helu.ca` | titania.incus:22081 | Casdoor SSO |
|
||||
| `icarlos.ouranos.helu.ca` | puck.incus:22681 | Icarlos (Django) |
|
||||
| `jupyterlab.ouranos.helu.ca` | puck.incus:22071 | JupyterLab (OAuth2-Proxy) |
|
||||
| `kairos.ouranos.helu.ca` | puck.incus:22581 | Kairos (Django) |
|
||||
| `lobechat.ouranos.helu.ca` | rosalind.incus:22081 | LobeChat |
|
||||
| `loki.ouranos.helu.ca` | prospero.incus:443 (SSL) | Loki |
|
||||
| `mcp-switchboard.ouranos.helu.ca` | oberon.incus:22785 | MCP Switchboard |
|
||||
| `nextcloud.ouranos.helu.ca` | rosalind.incus:22083 | Nextcloud |
|
||||
| `openwebui.ouranos.helu.ca` | oberon.incus:22088 | Open WebUI |
|
||||
| `peitho.ouranos.helu.ca` | puck.incus:22981 | Peitho (Django) |
|
||||
| `pgadmin.ouranos.helu.ca` | prospero.incus:443 (SSL) | PgAdmin 4 |
|
||||
| `prometheus.ouranos.helu.ca` | prospero.incus:443 (SSL) | Prometheus |
|
||||
| `freecad-mcp.ouranos.helu.ca` | caliban.incus:22032 | FreeCAD Robust MCP Server |
|
||||
| `rommie.ouranos.helu.ca` | caliban.incus:22031 | Rommie MCP Server (Agent S GUI automation) |
|
||||
| `searxng.ouranos.helu.ca` | oberon.incus:22073 | SearXNG (OAuth2-Proxy) |
|
||||
| `smtp4dev.ouranos.helu.ca` | oberon.incus:22085 | smtp4dev |
|
||||
| `spelunker.ouranos.helu.ca` | puck.incus:22881 | Spelunker (Django) |
|
||||
|
||||
---
|
||||
|
||||
## Infrastructure Management
|
||||
|
||||
### Quick Start
|
||||
|
||||
```bash
|
||||
# Provision containers
|
||||
cd terraform
|
||||
terraform init
|
||||
terraform plan
|
||||
terraform apply
|
||||
|
||||
# Start all containers
|
||||
cd ../ansible
|
||||
source ~/env/agathos/bin/activate
|
||||
ansible-playbook sandbox_up.yml
|
||||
|
||||
# Deploy all services
|
||||
ansible-playbook site.yml
|
||||
|
||||
# Stop all containers
|
||||
ansible-playbook sandbox_down.yml
|
||||
```
|
||||
|
||||
### Terraform Workflow
|
||||
|
||||
1. **Define** — Containers, networks, and resources in `*.tf` files
|
||||
2. **Plan** — Review changes with `terraform plan`
|
||||
3. **Apply** — Provision with `terraform apply`
|
||||
4. **Verify** — Check outputs and container status
|
||||
|
||||
### Ansible Workflow
|
||||
|
||||
1. **Bootstrap** — Update packages, install essentials (`apt_update.yml`)
|
||||
2. **Agents** — Deploy Alloy (log/metrics) and Node Exporter on all hosts
|
||||
3. **Services** — Configure databases, Docker, applications, observability
|
||||
4. **Verify** — Check service health and connectivity
|
||||
|
||||
### Vault Management
|
||||
|
||||
```bash
|
||||
# Edit secrets
|
||||
ansible-vault edit inventory/group_vars/all/vault.yml
|
||||
|
||||
# View secrets
|
||||
ansible-vault view inventory/group_vars/all/vault.yml
|
||||
|
||||
# Encrypt a new file
|
||||
ansible-vault encrypt new_secrets.yml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## S3 Storage Provisioning
|
||||
|
||||
Terraform provisions Incus S3 buckets for services requiring object storage:
|
||||
|
||||
| Service | Host | Purpose |
|
||||
|---------|------|---------|
|
||||
| **Casdoor** | Titania | User avatars and SSO resource storage |
|
||||
| **LobeChat** | Rosalind | File uploads and attachments |
|
||||
|
||||
> S3 credentials (access key, secret key, endpoint) are stored as sensitive Terraform outputs and managed in Ansible Vault with the `vault_*_s3_*` prefix.
|
||||
|
||||
---
|
||||
|
||||
## Ansible Automation
|
||||
|
||||
### Full Deployment (`site.yml`)
|
||||
|
||||
Playbooks run in dependency order:
|
||||
|
||||
| Playbook | Hosts | Purpose |
|
||||
|----------|-------|---------|
|
||||
| `apt_update.yml` | All | Update packages and install essentials |
|
||||
| `alloy/deploy.yml` | All | Grafana Alloy log/metrics collection |
|
||||
| `prometheus/node_deploy.yml` | All | Node Exporter metrics |
|
||||
| `docker/deploy.yml` | Oberon, Ariel, Miranda, Puck, Rosalind, Sycorax, Caliban, Titania | Docker engine |
|
||||
| `smtp4dev/deploy.yml` | Oberon | SMTP test server |
|
||||
| `pplg/deploy.yml` | Prospero | Full observability stack + HAProxy + OAuth2-Proxy |
|
||||
| `postgresql/deploy.yml` | Portia | PostgreSQL with all databases |
|
||||
| `postgresql_ssl/deploy.yml` | Titania | Dedicated PostgreSQL for Casdoor |
|
||||
| `neo4j/deploy.yml` | Ariel | Neo4j graph database |
|
||||
| `searxng/deploy.yml` | Oberon | SearXNG privacy search |
|
||||
| `haproxy/deploy.yml` | Titania | HAProxy TLS termination and routing |
|
||||
| `casdoor/deploy.yml` | Titania | Casdoor SSO |
|
||||
| `mcpo/deploy.yml` | Miranda | MCPO MCP proxy |
|
||||
| `openwebui/deploy.yml` | Oberon | Open WebUI LLM interface |
|
||||
| `hass/deploy.yml` | Oberon | Home Assistant |
|
||||
| `gitea/deploy.yml` | Rosalind | Gitea self-hosted Git |
|
||||
| `nextcloud/deploy.yml` | Rosalind | Nextcloud collaboration |
|
||||
|
||||
### Individual Service Deployments
|
||||
|
||||
Services with standalone deploy playbooks (not in `site.yml`):
|
||||
|
||||
| Playbook | Host | Service |
|
||||
|----------|------|---------|
|
||||
| `anythingllm/deploy.yml` | Rosalind | AnythingLLM document AI |
|
||||
| `arke/deploy.yml` | Sycorax | Arke LLM proxy |
|
||||
| `argos/deploy.yml` | Miranda | Argos MCP web search server |
|
||||
| `caliban/deploy.yml` | Caliban | Agent S MCP Server |
|
||||
| `certbot/deploy.yml` | Titania | Let's Encrypt certificate renewal |
|
||||
| `gitea_mcp/deploy.yml` | Miranda | Gitea MCP Server |
|
||||
| `gitea_runner/deploy.yml` | Puck | Gitea CI/CD runner |
|
||||
| `grafana_mcp/deploy.yml` | Miranda | Grafana MCP Server |
|
||||
| `jupyterlab/deploy.yml` | Puck | JupyterLab + OAuth2-Proxy |
|
||||
| `kernos/deploy.yml` | Caliban | Kernos MCP shell server |
|
||||
| `lobechat/deploy.yml` | Rosalind | LobeChat AI chat |
|
||||
| `neo4j_mcp/deploy.yml` | Miranda | Neo4j MCP Server |
|
||||
| `rabbitmq/deploy.yml` | Oberon | RabbitMQ message queue |
|
||||
|
||||
### Lifecycle Playbooks
|
||||
|
||||
| Playbook | Purpose |
|
||||
|----------|---------|
|
||||
| `sandbox_up.yml` | Start all Uranian host containers |
|
||||
| `sandbox_down.yml` | Gracefully stop all containers |
|
||||
| `apt_update.yml` | Update packages on all hosts |
|
||||
| `site.yml` | Full deployment orchestration |
|
||||
|
||||
---
|
||||
|
||||
## Data Flow Architecture
|
||||
|
||||
### Observability Pipeline
|
||||
|
||||
```
|
||||
All Hosts Prospero Alerts
|
||||
Alloy + Node Exporter → Prometheus + Loki + Grafana → AlertManager + Pushover
|
||||
collect metrics & logs storage & visualisation notifications
|
||||
```
|
||||
|
||||
### Integration Points
|
||||
|
||||
| Consumer | Provider | Connection |
|
||||
|----------|----------|-----------|
|
||||
| All LLM apps | Arke (Sycorax) | `http://sycorax.incus:25540` |
|
||||
| Open WebUI, Arke, Gitea, Nextcloud, LobeChat | PostgreSQL (Portia) | `portia.incus:5432` |
|
||||
| Neo4j MCP | Neo4j (Ariel) | `ariel.incus:7687` (Bolt) |
|
||||
| MCP Switchboard | Docker API (Miranda) | `tcp://miranda.incus:2375` |
|
||||
| MCP Switchboard | RabbitMQ (Oberon) | `oberon.incus:5672` |
|
||||
| Kairos, Spelunker | RabbitMQ (Oberon) | `oberon.incus:5672` |
|
||||
| SMTP (all apps) | smtp4dev (Oberon) | `oberon.incus:22025` |
|
||||
| All hosts | Loki (Prospero) | `http://prospero.incus:3100` |
|
||||
| All hosts | Prometheus (Prospero) | `http://prospero.incus:9090` |
|
||||
|
||||
---
|
||||
|
||||
## Important Notes
|
||||
|
||||
⚠️ **Alloy Host Variables Required** — Every host with `alloy` in its `services` list must define `alloy_log_level` in `inventory/host_vars/<host>.incus.yml`. The playbook will fail with an undefined variable error if this is missing.
|
||||
|
||||
⚠️ **Alloy Syslog Listeners Required for Docker Services** — Any Docker Compose service using the syslog logging driver must have a corresponding `loki.source.syslog` listener in the host's Alloy config template (`ansible/alloy/<hostname>/config.alloy.j2`). Missing listeners cause Docker containers to fail on start.
|
||||
|
||||
⚠️ **Local Terraform State** — This project uses local Terraform state (no remote backend). Do not run `terraform apply` from multiple machines simultaneously.
|
||||
|
||||
⚠️ **Nested Docker** — Docker runs inside Incus containers (nested), requiring `security.nesting = true` and `lxc.apparmor.profile=unconfined` AppArmor override on all Docker-enabled hosts.
|
||||
|
||||
⚠️ **Deployment Order** — Prospero (observability) must be fully deployed before other hosts, as Alloy on every host pushes logs and metrics to `prospero.incus`. Run `pplg/deploy.yml` before `site.yml` on a fresh environment.
|
||||
42
docs/regenerate_docs.sh
Executable file
42
docs/regenerate_docs.sh
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regenerate Sphinx API reference for every Mnemosyne app, then build HTML.
|
||||
# Drives both local development and the CI pipeline.
|
||||
set -euo pipefail
|
||||
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
APPS=(themis library llm_manager mcp_server)
|
||||
SOURCE_REF=source/reference/apps
|
||||
PACKAGE_ROOT=../mnemosyne
|
||||
|
||||
make clean
|
||||
|
||||
mkdir -p "$SOURCE_REF"
|
||||
|
||||
# Per-app subdir so each app gets its own modules.rst (sphinx-apidoc
|
||||
# overwrites the file otherwise, leaving only the last app in the index).
|
||||
for app in "${APPS[@]}"; do
|
||||
sphinx-apidoc \
|
||||
--force \
|
||||
--separate \
|
||||
--module-first \
|
||||
--output-dir "$SOURCE_REF/$app" \
|
||||
"$PACKAGE_ROOT/$app" \
|
||||
"$PACKAGE_ROOT/$app/migrations" \
|
||||
"$PACKAGE_ROOT/$app/tests"
|
||||
done
|
||||
|
||||
# Write a top-level apps.rst that toctree's every app's modules.rst.
|
||||
{
|
||||
echo "Applications"
|
||||
echo "============"
|
||||
echo
|
||||
echo ".. toctree::"
|
||||
echo " :maxdepth: 2"
|
||||
echo
|
||||
for app in "${APPS[@]}"; do
|
||||
echo " $app/modules"
|
||||
done
|
||||
} > "$SOURCE_REF/index.rst"
|
||||
|
||||
make html
|
||||
0
docs/source/_static/.gitkeep
Normal file
0
docs/source/_static/.gitkeep
Normal file
97
docs/source/conf.py
Normal file
97
docs/source/conf.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import os
|
||||
import sys
|
||||
import tomllib
|
||||
|
||||
# The Django package lives at <repo>/mnemosyne/<inner mnemosyne>/. Adding the
|
||||
# outer mnemosyne/ directory to sys.path lets autodoc resolve every app
|
||||
# (themis, library, llm_manager, mcp_server) and the project settings module.
|
||||
sys.path.insert(0, os.path.abspath('../../mnemosyne'))
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mnemosyne.settings')
|
||||
|
||||
# Load real .env if present (local dev). In CI there is none and that's fine —
|
||||
# settings.py provides a default for every env var it reads, so the import
|
||||
# succeeds either way.
|
||||
_repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||
_env_file = os.path.join(_repo_root, 'mnemosyne', '.env')
|
||||
if os.path.exists(_env_file):
|
||||
with open(_env_file) as _f:
|
||||
for _line in _f:
|
||||
_line = _line.strip()
|
||||
if not _line or _line.startswith('#') or '=' not in _line:
|
||||
continue
|
||||
_key, _val = _line.split('=', 1)
|
||||
os.environ.setdefault(_key.strip(), _val.strip())
|
||||
|
||||
import django # noqa: E402
|
||||
|
||||
django.setup()
|
||||
|
||||
# Sphinx autodoc calls repr() on every class attribute it documents. Django's
|
||||
# QuerySet.__repr__ executes a SELECT against the database — which doc builds
|
||||
# have no business doing. Intercept object_description so QuerySet instances
|
||||
# render as a static string instead. Mnemosyne's themis app has at least one
|
||||
# DRF viewset with a class-level queryset attribute, so this matters.
|
||||
from django.db.models.query import QuerySet # noqa: E402
|
||||
import sphinx.util.inspect as _sphinx_inspect # noqa: E402
|
||||
|
||||
_orig_object_description = _sphinx_inspect.object_description
|
||||
|
||||
|
||||
def _safe_object_description(obj, *args, **kwargs):
|
||||
if isinstance(obj, QuerySet):
|
||||
return f'<QuerySet [{obj.model.__name__}]>'
|
||||
return _orig_object_description(obj, *args, **kwargs)
|
||||
|
||||
|
||||
_sphinx_inspect.object_description = _safe_object_description
|
||||
|
||||
# ── Sphinx configuration ──────────────────────────────────────────────────
|
||||
project = 'Mnemosyne'
|
||||
copyright = '2026, Mnemosyne Team'
|
||||
author = 'Mnemosyne Team'
|
||||
|
||||
with open(os.path.join(_repo_root, 'pyproject.toml'), 'rb') as _f:
|
||||
release = tomllib.load(_f)['project']['version']
|
||||
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx.ext.napoleon',
|
||||
'sphinx.ext.intersphinx',
|
||||
'sphinx_autodoc_typehints',
|
||||
'sphinxcontrib.httpdomain',
|
||||
'sphinxcontrib.mermaid',
|
||||
'myst_parser',
|
||||
]
|
||||
|
||||
source_suffix = {'.rst': 'restructuredtext', '.md': 'markdown'}
|
||||
|
||||
myst_enable_extensions = ['colon_fence', 'deflist', 'tasklist', 'attrs_inline']
|
||||
myst_heading_anchors = 4
|
||||
|
||||
autodoc_default_options = {
|
||||
'members': True,
|
||||
'member-order': 'bysource',
|
||||
'special-members': '__init__',
|
||||
'undoc-members': True,
|
||||
'exclude-members': '__weakref__',
|
||||
}
|
||||
autodoc_inherit_docstrings = False
|
||||
napoleon_use_ivar = True
|
||||
|
||||
intersphinx_mapping = {
|
||||
'python': ('https://docs.python.org/3', None),
|
||||
'django': ('https://docs.djangoproject.com/en/stable/',
|
||||
'https://docs.djangoproject.com/en/stable/_objects/'),
|
||||
}
|
||||
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
html_static_path = ['_static']
|
||||
html_theme_options = {
|
||||
'navigation_depth': 4,
|
||||
'collapse_navigation': False,
|
||||
'sticky_navigation': True,
|
||||
'includehidden': True,
|
||||
'titles_only': False,
|
||||
}
|
||||
17
docs/source/index.rst
Normal file
17
docs/source/index.rst
Normal file
@@ -0,0 +1,17 @@
|
||||
Mnemosyne Documentation
|
||||
=======================
|
||||
|
||||
Content-type-aware, multimodal personal knowledge management system.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: API Reference
|
||||
|
||||
reference/apps/index
|
||||
|
||||
Indices
|
||||
-------
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
10
docs/source/reference/apps/index.rst
Normal file
10
docs/source/reference/apps/index.rst
Normal file
@@ -0,0 +1,10 @@
|
||||
Applications
|
||||
============
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
themis/modules
|
||||
library/modules
|
||||
llm_manager/modules
|
||||
mcp_server/modules
|
||||
7
docs/source/reference/apps/library/library.admin.rst
Normal file
7
docs/source/reference/apps/library/library.admin.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.admin module
|
||||
====================
|
||||
|
||||
.. automodule:: library.admin
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
18
docs/source/reference/apps/library/library.api.rst
Normal file
18
docs/source/reference/apps/library/library.api.rst
Normal file
@@ -0,0 +1,18 @@
|
||||
library.api package
|
||||
===================
|
||||
|
||||
.. automodule:: library.api
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
library.api.serializers
|
||||
library.api.urls
|
||||
library.api.views
|
||||
library.api.workspaces
|
||||
@@ -0,0 +1,7 @@
|
||||
library.api.serializers module
|
||||
==============================
|
||||
|
||||
.. automodule:: library.api.serializers
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/library/library.api.urls.rst
Normal file
7
docs/source/reference/apps/library/library.api.urls.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.api.urls module
|
||||
=======================
|
||||
|
||||
.. automodule:: library.api.urls
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/library/library.api.views.rst
Normal file
7
docs/source/reference/apps/library/library.api.views.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.api.views module
|
||||
========================
|
||||
|
||||
.. automodule:: library.api.views
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.api.workspaces module
|
||||
=============================
|
||||
|
||||
.. automodule:: library.api.workspaces
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/library/library.apps.rst
Normal file
7
docs/source/reference/apps/library/library.apps.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.apps module
|
||||
===================
|
||||
|
||||
.. automodule:: library.apps
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.content\_types module
|
||||
=============================
|
||||
|
||||
.. automodule:: library.content_types
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/library/library.forms.rst
Normal file
7
docs/source/reference/apps/library/library.forms.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.forms module
|
||||
====================
|
||||
|
||||
.. automodule:: library.forms
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.management.commands.embed\_collection module
|
||||
====================================================
|
||||
|
||||
.. automodule:: library.management.commands.embed_collection
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.management.commands.embed\_item module
|
||||
==============================================
|
||||
|
||||
.. automodule:: library.management.commands.embed_item
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.management.commands.embedding\_status module
|
||||
====================================================
|
||||
|
||||
.. automodule:: library.management.commands.embedding_status
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.management.commands.load\_library\_types module
|
||||
=======================================================
|
||||
|
||||
.. automodule:: library.management.commands.load_library_types
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,21 @@
|
||||
library.management.commands package
|
||||
===================================
|
||||
|
||||
.. automodule:: library.management.commands
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
library.management.commands.embed_collection
|
||||
library.management.commands.embed_item
|
||||
library.management.commands.embedding_status
|
||||
library.management.commands.load_library_types
|
||||
library.management.commands.search
|
||||
library.management.commands.search_stats
|
||||
library.management.commands.setup_neo4j_indexes
|
||||
@@ -0,0 +1,7 @@
|
||||
library.management.commands.search module
|
||||
=========================================
|
||||
|
||||
.. automodule:: library.management.commands.search
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.management.commands.search\_stats module
|
||||
================================================
|
||||
|
||||
.. automodule:: library.management.commands.search_stats
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.management.commands.setup\_neo4j\_indexes module
|
||||
========================================================
|
||||
|
||||
.. automodule:: library.management.commands.setup_neo4j_indexes
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
15
docs/source/reference/apps/library/library.management.rst
Normal file
15
docs/source/reference/apps/library/library.management.rst
Normal file
@@ -0,0 +1,15 @@
|
||||
library.management package
|
||||
==========================
|
||||
|
||||
.. automodule:: library.management
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
library.management.commands
|
||||
7
docs/source/reference/apps/library/library.metrics.rst
Normal file
7
docs/source/reference/apps/library/library.metrics.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.metrics module
|
||||
======================
|
||||
|
||||
.. automodule:: library.metrics
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/library/library.models.rst
Normal file
7
docs/source/reference/apps/library/library.models.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.models module
|
||||
=====================
|
||||
|
||||
.. automodule:: library.models
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
34
docs/source/reference/apps/library/library.rst
Normal file
34
docs/source/reference/apps/library/library.rst
Normal file
@@ -0,0 +1,34 @@
|
||||
library package
|
||||
===============
|
||||
|
||||
.. automodule:: library
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
library.api
|
||||
library.management
|
||||
library.services
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
library.admin
|
||||
library.apps
|
||||
library.content_types
|
||||
library.forms
|
||||
library.metrics
|
||||
library.models
|
||||
library.tasks
|
||||
library.urls
|
||||
library.utils
|
||||
library.views
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.chunker module
|
||||
===============================
|
||||
|
||||
.. automodule:: library.services.chunker
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.concepts module
|
||||
================================
|
||||
|
||||
.. automodule:: library.services.concepts
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.daedalus\_s3 module
|
||||
====================================
|
||||
|
||||
.. automodule:: library.services.daedalus_s3
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.embedding\_client module
|
||||
=========================================
|
||||
|
||||
.. automodule:: library.services.embedding_client
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.fusion module
|
||||
==============================
|
||||
|
||||
.. automodule:: library.services.fusion
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.parsers module
|
||||
===============================
|
||||
|
||||
.. automodule:: library.services.parsers
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.pipeline module
|
||||
================================
|
||||
|
||||
.. automodule:: library.services.pipeline
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.reranker module
|
||||
================================
|
||||
|
||||
.. automodule:: library.services.reranker
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
25
docs/source/reference/apps/library/library.services.rst
Normal file
25
docs/source/reference/apps/library/library.services.rst
Normal file
@@ -0,0 +1,25 @@
|
||||
library.services package
|
||||
========================
|
||||
|
||||
.. automodule:: library.services
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
library.services.chunker
|
||||
library.services.concepts
|
||||
library.services.daedalus_s3
|
||||
library.services.embedding_client
|
||||
library.services.fusion
|
||||
library.services.parsers
|
||||
library.services.pipeline
|
||||
library.services.reranker
|
||||
library.services.search
|
||||
library.services.text_utils
|
||||
library.services.vision
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.search module
|
||||
==============================
|
||||
|
||||
.. automodule:: library.services.search
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.text\_utils module
|
||||
===================================
|
||||
|
||||
.. automodule:: library.services.text_utils
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
library.services.vision module
|
||||
==============================
|
||||
|
||||
.. automodule:: library.services.vision
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/library/library.tasks.rst
Normal file
7
docs/source/reference/apps/library/library.tasks.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.tasks module
|
||||
====================
|
||||
|
||||
.. automodule:: library.tasks
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/library/library.urls.rst
Normal file
7
docs/source/reference/apps/library/library.urls.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.urls module
|
||||
===================
|
||||
|
||||
.. automodule:: library.urls
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/library/library.utils.rst
Normal file
7
docs/source/reference/apps/library/library.utils.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.utils module
|
||||
====================
|
||||
|
||||
.. automodule:: library.utils
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/library/library.views.rst
Normal file
7
docs/source/reference/apps/library/library.views.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library.views module
|
||||
====================
|
||||
|
||||
.. automodule:: library.views
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/library/modules.rst
Normal file
7
docs/source/reference/apps/library/modules.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
library
|
||||
=======
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
library
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.admin module
|
||||
=========================
|
||||
|
||||
.. automodule:: llm_manager.admin
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
17
docs/source/reference/apps/llm_manager/llm_manager.api.rst
Normal file
17
docs/source/reference/apps/llm_manager/llm_manager.api.rst
Normal file
@@ -0,0 +1,17 @@
|
||||
llm\_manager.api package
|
||||
========================
|
||||
|
||||
.. automodule:: llm_manager.api
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
llm_manager.api.serializers
|
||||
llm_manager.api.urls
|
||||
llm_manager.api.views
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.api.serializers module
|
||||
===================================
|
||||
|
||||
.. automodule:: llm_manager.api.serializers
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.api.urls module
|
||||
============================
|
||||
|
||||
.. automodule:: llm_manager.api.urls
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.api.views module
|
||||
=============================
|
||||
|
||||
.. automodule:: llm_manager.api.views
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.apps module
|
||||
========================
|
||||
|
||||
.. automodule:: llm_manager.apps
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.encryption module
|
||||
==============================
|
||||
|
||||
.. automodule:: llm_manager.encryption
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.forms module
|
||||
=========================
|
||||
|
||||
.. automodule:: llm_manager.forms
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.management.commands.load\_default\_llm\_models module
|
||||
==================================================================
|
||||
|
||||
.. automodule:: llm_manager.management.commands.load_default_llm_models
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,15 @@
|
||||
llm\_manager.management.commands package
|
||||
========================================
|
||||
|
||||
.. automodule:: llm_manager.management.commands
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
llm_manager.management.commands.load_default_llm_models
|
||||
@@ -0,0 +1,15 @@
|
||||
llm\_manager.management package
|
||||
===============================
|
||||
|
||||
.. automodule:: llm_manager.management
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
llm_manager.management.commands
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.models module
|
||||
==========================
|
||||
|
||||
.. automodule:: llm_manager.models
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
32
docs/source/reference/apps/llm_manager/llm_manager.rst
Normal file
32
docs/source/reference/apps/llm_manager/llm_manager.rst
Normal file
@@ -0,0 +1,32 @@
|
||||
llm\_manager package
|
||||
====================
|
||||
|
||||
.. automodule:: llm_manager
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
llm_manager.api
|
||||
llm_manager.management
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
llm_manager.admin
|
||||
llm_manager.apps
|
||||
llm_manager.encryption
|
||||
llm_manager.forms
|
||||
llm_manager.models
|
||||
llm_manager.services
|
||||
llm_manager.tasks
|
||||
llm_manager.urls
|
||||
llm_manager.views
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.services module
|
||||
============================
|
||||
|
||||
.. automodule:: llm_manager.services
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.tasks module
|
||||
=========================
|
||||
|
||||
.. automodule:: llm_manager.tasks
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.urls module
|
||||
========================
|
||||
|
||||
.. automodule:: llm_manager.urls
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
llm\_manager.views module
|
||||
=========================
|
||||
|
||||
.. automodule:: llm_manager.views
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
7
docs/source/reference/apps/llm_manager/modules.rst
Normal file
7
docs/source/reference/apps/llm_manager/modules.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
llm_manager
|
||||
===========
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
llm_manager
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.admin module
|
||||
========================
|
||||
|
||||
.. automodule:: mcp_server.admin
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
17
docs/source/reference/apps/mcp_server/mcp_server.api.rst
Normal file
17
docs/source/reference/apps/mcp_server/mcp_server.api.rst
Normal file
@@ -0,0 +1,17 @@
|
||||
mcp\_server.api package
|
||||
=======================
|
||||
|
||||
.. automodule:: mcp_server.api
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
mcp_server.api.serializers
|
||||
mcp_server.api.teams
|
||||
mcp_server.api.urls
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.api.serializers module
|
||||
==================================
|
||||
|
||||
.. automodule:: mcp_server.api.serializers
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.api.teams module
|
||||
============================
|
||||
|
||||
.. automodule:: mcp_server.api.teams
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.api.urls module
|
||||
===========================
|
||||
|
||||
.. automodule:: mcp_server.api.urls
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.apps module
|
||||
=======================
|
||||
|
||||
.. automodule:: mcp_server.apps
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.auth module
|
||||
=======================
|
||||
|
||||
.. automodule:: mcp_server.auth
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.context module
|
||||
==========================
|
||||
|
||||
.. automodule:: mcp_server.context
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.forms module
|
||||
========================
|
||||
|
||||
.. automodule:: mcp_server.forms
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.management.commands.backfill\_library\_memberships module
|
||||
=====================================================================
|
||||
|
||||
.. automodule:: mcp_server.management.commands.backfill_library_memberships
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.management.commands.create\_mcp\_token module
|
||||
=========================================================
|
||||
|
||||
.. automodule:: mcp_server.management.commands.create_mcp_token
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,17 @@
|
||||
mcp\_server.management.commands package
|
||||
=======================================
|
||||
|
||||
.. automodule:: mcp_server.management.commands
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
mcp_server.management.commands.backfill_library_memberships
|
||||
mcp_server.management.commands.create_mcp_token
|
||||
mcp_server.management.commands.seed_signing_key
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.management.commands.seed\_signing\_key module
|
||||
=========================================================
|
||||
|
||||
.. automodule:: mcp_server.management.commands.seed_signing_key
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,15 @@
|
||||
mcp\_server.management package
|
||||
==============================
|
||||
|
||||
.. automodule:: mcp_server.management
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
mcp_server.management.commands
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.metrics module
|
||||
==========================
|
||||
|
||||
.. automodule:: mcp_server.metrics
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
@@ -0,0 +1,7 @@
|
||||
mcp\_server.models module
|
||||
=========================
|
||||
|
||||
.. automodule:: mcp_server.models
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
35
docs/source/reference/apps/mcp_server/mcp_server.rst
Normal file
35
docs/source/reference/apps/mcp_server/mcp_server.rst
Normal file
@@ -0,0 +1,35 @@
|
||||
mcp\_server package
|
||||
===================
|
||||
|
||||
.. automodule:: mcp_server
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
mcp_server.api
|
||||
mcp_server.management
|
||||
mcp_server.tools
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
mcp_server.admin
|
||||
mcp_server.apps
|
||||
mcp_server.auth
|
||||
mcp_server.context
|
||||
mcp_server.forms
|
||||
mcp_server.metrics
|
||||
mcp_server.models
|
||||
mcp_server.server
|
||||
mcp_server.teams
|
||||
mcp_server.urls
|
||||
mcp_server.views
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user