feat: add init sidecar for migrations and setup on compose up

Introduces a one-shot `init` service in docker-compose that runs Postgres migrations, Neo4j index setup, and library-type seeding on every `up`. Long-running services (`app`, `mcp`, `worker`) now depend on its successful completion via `service_completed_successfully`, blocking the stack on configuration errors (missing embedding model, dimension mismatch, unreachable DB) rather than serving silent zero-result searches. Also standardizes reranker test fixtures to use the `/v1` OpenAI-style base URL convention used across other service clients.
2026-05-10 08:01:58 -04:00
parent 9ceb01f829
commit a945b382e6
15 changed files with 821 additions and 65 deletions
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -24,10 +24,17 @@
 #
 # Run:
 #   docker compose up -d
-#   docker compose run --rm app migrate    # one-shot DB migrate
+#
-#   docker compose run --rm app setup      # Neo4j indexes + library types
+# The `init` sidecar (below) runs Postgres migrations, Neo4j index setup,
 # and library-type seeding on every `up`. Long-running services wait for
 # it via `depends_on: init: service_completed_successfully` — so a failure
 # there (missing embedding model, dimension mismatch, unreachable DB)
 # blocks the stack rather than letting it serve silent zero-result
 # searches. The standalone `migrate` / `setup` entrypoint commands remain
 # available for ad-hoc ops work.
 # =============================================================================
 services:
  # ── Static-file seeder: copies /app/staticfiles into the shared volume on
  # every `up`. Runs once and exits. Without this, the named volume is only
@@ -41,6 +48,41 @@ services:
      - mnemosyne-static:/shared-static
    restart: "no"
  # ── Init sidecar: one-shot Postgres migrate + Neo4j index setup + library
  # type seed. Runs on every `up` and exits. Long-running services below
  # depend on `service_completed_successfully`, so a failure here (no system
  # embedding model configured, dimension mismatch, unreachable DB) blocks
  # `app`/`mcp`/`worker` from starting — which is the whole point. All three
  # commands are idempotent: re-running is a no-op unless state actually
  # needs to change.
  #
  # This sidecar only needs Postgres, Neo4j, and logging env — no S3, no
  # Celery, no LLM encryption key. Keep it that way.
  init:
    image: git.helu.ca/r/mnemosyne:latest
    pull_policy: always
    command: ["init"]
    environment:
      # Django core (settings import)
      - DJANGO_SETTINGS_MODULE=mnemosyne.settings
      - SECRET_KEY=${SECRET_KEY}
      - DEBUG=${DEBUG}
      - TIME_ZONE=${TIME_ZONE}
      - LANGUAGE_CODE=${LANGUAGE_CODE}
      # Postgres (migrate)
      - APP_DB_NAME=${APP_DB_NAME}
      - APP_DB_USER=${APP_DB_USER}
      - APP_DB_PASSWORD=${APP_DB_PASSWORD}
      - DB_HOST=${DB_HOST}
      - DB_PORT=${DB_PORT}
      # Neo4j (setup_neo4j_indexes + load_library_types)
      - NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
      # Logging
      - LOGGING_LEVEL=${LOGGING_LEVEL}
      - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
    restart: "no"
  # ── App: Django REST API + admin ──────────────────────────────────────────
  # Serves /library/api/*, /admin/, /live/, /ready/, /metrics. Enqueues
  # Celery tasks (hence CELERY_BROKER_URL is required here too — Django is
@@ -103,6 +145,8 @@ services:
    depends_on:
      static-init:
        condition: service_completed_successfully
      init:
        condition: service_completed_successfully
    volumes:
      - mnemosyne-media:/app/media
    healthcheck:
@@ -112,6 +156,7 @@ services:
      retries: 3
      start_period: 30s
  # ── MCP server: FastMCP Streamable HTTP at /mcp/ ───────────────────────────
  # Read-only LLM-facing surface. Intentionally excluded:
  #   CELERY_BROKER_URL         — MCP must not enqueue tasks
@@ -171,6 +216,9 @@ services:
      - LOGGING_LEVEL=${LOGGING_LEVEL}
      - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
    restart: unless-stopped
    depends_on:
      init:
        condition: service_completed_successfully
    volumes:
      - mnemosyne-media:/app/media
    healthcheck:
@@ -180,6 +228,7 @@ services:
      retries: 3
      start_period: 30s
  # ── Celery worker: embedding + ingest + batch queues ───────────────────────
  # Consumer side of the queue. Needs the full S3 block (reads Daedalus's
  # bucket, writes to Mnemosyne's), the LLM API encryption key (ingest calls
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -55,6 +55,19 @@ case "$1" in
    python manage.py load_library_types
    ;;
  init)
    # Bundled one-shot init run by the `init` sidecar on every
    # `docker compose up`. Idempotent: re-runs are no-ops unless migrations
    # or indexes need to change. A non-zero exit here blocks `app`, `mcp`,
    # and `worker` from starting, which is the point — we'd rather fail
    # loudly than serve silent zero-result searches.
    set -e
    python manage.py migrate --noinput
    python manage.py setup_neo4j_indexes
    python manage.py load_library_types
    ;;
  shell)
    # Drop into the management shell for ad-hoc work.
    exec python manage.py shell
--- a/docs/PHASE_3_SEARCH_AND_RERANKING.md
+++ b/docs/PHASE_3_SEARCH_AND_RERANKING.md
@@ -61,6 +61,22 @@ POST http://pan.helu.ca:8400/v1/rerank
 }
 ```
 > **`LLMApi.base_url` convention.** Every Mnemosyne service client
 > (`EmbeddingClient`, `RerankerClient`, `vision.py`, `concepts.py`)
 > treats `base_url` as the **OpenAI-style `/v1` root** and appends a
 > path-only segment: `/embeddings`, `/rerank`, `/chat/completions`.
 > So a single `LLMApi` row with `base_url=http://pan.helu.ca:8400/v1`
 > serves both the embedding and the reranker endpoints — no per-purpose
 > duplication needed.
 >
 > Get this wrong (e.g. set `base_url=http://pan.helu.ca:8400` with no
 > `/v1`, or have a client prepend `/v1` locally) and you get a
 > double-prefixed URL like `…/v1/v1/rerank` that 404s silently —
 > `SearchService._rerank` catches the exception, the UI shows
 > "Re-rank: Skipped", and the search falls back to raw RRF order.
 > Check `results.reranker_skip_reason` on the search page for the
 > specific error.
 ## Deliverables
 ### 1. Search Service (`library/services/search.py`)
--- a/docs/mnemosyne.html
+++ b/docs/mnemosyne.html
@@ -294,31 +294,37 @@ graph LR
            </div>
            <div class="alert alert-warning border-start border-4 border-warning">
-                <h4><i class="bi bi-lightning"></i> Neo4j Vector Indexes</h4>
+                <h4><i class="bi bi-lightning"></i> Neo4j Indexes (managed by <code>setup_neo4j_indexes</code>)</h4>
-                <pre class="bg-light p-3 rounded mb-0"><code>// Chunk text+image embeddings (4096 dimensions, no pgvector limits!)
+                <p>Created by the <code>init</code> sidecar on every <code>docker compose up</code>. Vector dimensions come from the system embedding model's <code>vector_dimensions</code> field — the command fails if no model is configured. Current production model: <strong>Pan Synesis · qwen3-vl-embedding-2b · 2048d</strong>.</p>
-CREATE VECTOR INDEX chunk_embedding FOR (c:Chunk)
+                <pre class="bg-light p-3 rounded mb-0"><code>// Chunk text+image embeddings (dimensions read from system embedding model)
 CREATE VECTOR INDEX chunk_embedding_index FOR (c:Chunk)
 ON (c.embedding) OPTIONS {indexConfig: {
-  `vector.dimensions`: 4096,
+  `vector.dimensions`: 2048,
  `vector.similarity_function`: 'cosine'
 }}
 // Concept embeddings for semantic concept search
-CREATE VECTOR INDEX concept_embedding FOR (con:Concept)
+CREATE VECTOR INDEX concept_embedding_index FOR (con:Concept)
 ON (con.embedding) OPTIONS {indexConfig: {
-  `vector.dimensions`: 4096,
+  `vector.dimensions`: 2048,
  `vector.similarity_function`: 'cosine'
 }}
 // Image multimodal embeddings
-CREATE VECTOR INDEX image_embedding FOR (ie:ImageEmbedding)
+CREATE VECTOR INDEX image_embedding_index FOR (ie:ImageEmbedding)
 ON (ie.embedding) OPTIONS {indexConfig: {
-  `vector.dimensions`: 4096,
+  `vector.dimensions`: 2048,
  `vector.similarity_function`: 'cosine'
 }}
-// Full-text index for keyword/BM25-style search
+// Full-text indexes (BM25-style keyword search)
-CREATE FULLTEXT INDEX chunk_fulltext FOR (c:Chunk) ON EACH [c.text_preview]</code></pre>
+CREATE FULLTEXT INDEX chunk_text_fulltext    FOR (c:Chunk)   ON EACH [c.text_preview]
 CREATE FULLTEXT INDEX concept_name_fulltext  FOR (c:Concept) ON EACH [c.name]
 CREATE FULLTEXT INDEX item_title_fulltext    FOR (i:Item)    ON EACH [i.title]
 CREATE FULLTEXT INDEX library_name_fulltext  FOR (l:Library) ON EACH [l.name]</code></pre>
                <p class="mb-0 mt-3"><strong>Changing the embedding model or dimensions is a re-embedding event.</strong> Drop + recreate the vector indexes (<code>setup_neo4j_indexes --drop</code>) and re-queue all content for embedding. Old vectors at the previous dimension remain on the nodes until overwritten but are no longer indexed.</p>
            </div>
        </section>
        <!-- SECTION: CONTENT TYPES -->
@@ -521,10 +527,11 @@ flowchart TD
                        <div class="card-body">
                            <p>Cosine similarity via Neo4j vector index on Chunk and ImageEmbedding nodes.</p>
                            <pre class="bg-light p-2 rounded"><code>CALL db.index.vector.queryNodes(
-  'chunk_embedding', 30,
+  'chunk_embedding_index', 30,
  $query_vector
 ) YIELD node, score
 WHERE score > $threshold</code></pre>
                        </div>
                    </div>
                </div>
@@ -548,9 +555,10 @@ RETURN c2, i2</code></pre>
                        <div class="card-body">
                            <p>Neo4j native full-text index for keyword matching (BM25-equivalent).</p>
                            <pre class="bg-light p-2 rounded"><code>CALL db.index.fulltext.queryNodes(
-  'chunk_fulltext',
+  'chunk_text_fulltext',
  $query_text
 ) YIELD node, score</code></pre>
                        </div>
                    </div>
                </div>
--- a/mnemosyne/library/api/serializers.py
+++ b/mnemosyne/library/api/serializers.py
@@ -150,6 +150,16 @@ class SearchResponseSerializer(serializers.Serializer):
    reranker_used = serializers.BooleanField()
    reranker_model = serializers.CharField(allow_null=True)
    search_types_used = serializers.ListField(child=serializers.CharField())
    # Populated when ``rerank=True`` was requested but the re-ranking
    # step did not run — e.g. no system reranker model configured
    # (``no_system_model``), the Synesis call raised
    # (``api_error: ...``), or fusion produced zero candidates
    # (``no_candidates``). ``null`` means either success or that
    # re-ranking was not requested. ``required=False`` keeps old
    # clients happy.
    reranker_skip_reason = serializers.CharField(
        allow_null=True, required=False, default=None
    )
 # --- Workspace lifecycle (Daedalus integration) ---
--- a/mnemosyne/library/apps.py
+++ b/mnemosyne/library/apps.py
@@ -1,7 +1,202 @@
 """
 Django AppConfig for the ``library`` app.
 Registers a startup probe that runs once per Python process and yells if
 Mnemosyne is misconfigured in a way that would cause silent zero-result
 searches — missing embedding model, missing Neo4j index, or a dimension
 mismatch between the model and an existing index. Loud ERROR lines are the
 only defence against "search works but returns nothing", which is
 indistinguishable from "search works and matched nothing" unless you read
 the stderr of a different container.
 The probe is deliberately best-effort: it cannot crash the process even if
 Neo4j is unreachable, because a transient DB blip on startup should not
 take down the whole app. The `init` sidecar is the hard gate; this is the
 second line of defence for long-running containers.
 """
 import logging
 import os
 import sys
 from django.apps import AppConfig
 logger = logging.getLogger(__name__)
 # Index names we expect setup_neo4j_indexes to have created. Kept in sync
 # with library/management/commands/setup_neo4j_indexes.py. A test asserts
 # they stay in sync.
 _EXPECTED_VECTOR_INDEXES = (
    "chunk_embedding_index",
    "concept_embedding_index",
    "image_embedding_index",
 )
 _EXPECTED_FULLTEXT_INDEXES = (
    "chunk_text_fulltext",
    "concept_name_fulltext",
    "item_title_fulltext",
    "library_name_fulltext",
 )
 def _should_skip_probe() -> bool:
    """
    Decide whether to skip the startup probe.
    Skip when:
    - Running a management command other than the long-running servers
      (migrate, makemigrations, setup_neo4j_indexes, load_library_types,
      collectstatic, test, shell). The probe would just spam stderr for
      ops work that doesn't care about Neo4j index state.
    - Neo4j bolt URL is unset (build-time ``collectstatic`` stubs, CI
      unit tests without real infra).
    - Running under pytest (any argv contains ``pytest`` or a test runner
      env var is set).
    """
    if "pytest" in sys.argv[0] or "PYTEST_CURRENT_TEST" in os.environ:
        return True
    if os.environ.get("DJANGO_SKIP_STARTUP_PROBE") == "1":
        return True
    # Typical Django command invocations where the probe is noise.
    skip_commands = {
        "migrate",
        "makemigrations",
        "setup_neo4j_indexes",
        "load_library_types",
        "collectstatic",
        "test",
        "shell",
        "check",
        "dbshell",
        "showmigrations",
        "squashmigrations",
        "createsuperuser",
        "help",
    }
    if len(sys.argv) >= 2 and sys.argv[1] in skip_commands:
        return True
    # No Neo4j endpoint configured — probably a build or local dev without
    # graph infrastructure. Don't pretend we can check.
    if not os.environ.get("NEOMODEL_NEO4J_BOLT_URL"):
        return True
    return False
 def _run_startup_probe():
    """
    Emit ERROR/WARNING logs if the stack is misconfigured for search.
    Each check is individually guarded so a single unreachable dependency
    doesn't mask the other findings. Returns nothing; side effect is log
    output.
    """
    from neomodel import db
    # --- 1. System embedding model --------------------------------------
    embedding_dim = None
    embedding_model_label = "<unknown>"
    try:
        from llm_manager.models import LLMModel
        model = LLMModel.get_system_embedding_model()
        if not model:
            logger.error(
                "No system embedding model configured. Search will return "
                "zero results until one is set in the LLM admin."
            )
        elif not model.vector_dimensions:
            logger.error(
                "System embedding model '%s: %s' has no vector_dimensions "
                "set. Neo4j vector indexes cannot be validated and search "
                "quality will be unpredictable.",
                model.api.name,
                model.name,
            )
            embedding_model_label = f"{model.api.name}: {model.name}"
        else:
            embedding_dim = model.vector_dimensions
            embedding_model_label = f"{model.api.name}: {model.name}"
            logger.info(
                "System embedding model: %s (%dd)",
                embedding_model_label,
                embedding_dim,
            )
    except Exception as exc:
        logger.warning(
            "Startup probe could not read system embedding model: %s", exc
        )
    # --- 2. Neo4j indexes present & correctly dimensioned ---------------
    try:
        results, _ = db.cypher_query(
            "SHOW INDEXES YIELD name, type, options RETURN name, type, options"
        )
    except Exception as exc:
        logger.warning(
            "Startup probe could not list Neo4j indexes: %s. Search "
            "degradation will only surface at query time.",
            exc,
        )
        return
    present = {}
    for row in results:
        name, idx_type, options = row[0], row[1], row[2]
        present[name] = (idx_type, options)
    # Missing vector indexes
    for name in _EXPECTED_VECTOR_INDEXES:
        if name not in present:
            logger.error(
                "Neo4j vector index '%s' is missing. Run "
                "'docker compose run --rm init' (or 'python manage.py "
                "setup_neo4j_indexes') to rebuild.",
                name,
            )
            continue
        # Dimension check against the embedding model.
        if embedding_dim is None:
            continue
        idx_type, options = present[name]
        config = (options or {}).get("indexConfig") or {}
        raw_dim = config.get("vector.dimensions")
        try:
            existing_dim = int(raw_dim) if raw_dim is not None else None
        except (TypeError, ValueError):
            existing_dim = None
        if existing_dim is not None and existing_dim != embedding_dim:
            logger.error(
                "Neo4j index '%s' has %d dimensions but system embedding "
                "model %s reports %d. Re-run 'setup_neo4j_indexes --drop' "
                "and re-embed all content — search will return empty or "
                "wrong results until this is fixed.",
                name,
                existing_dim,
                embedding_model_label,
                embedding_dim,
            )
    # Missing fulltext indexes
    for name in _EXPECTED_FULLTEXT_INDEXES:
        if name not in present:
            logger.error(
                "Neo4j full-text index '%s' is missing. Full-text search "
                "will silently return no matches. Run 'setup_neo4j_indexes'.",
                name,
            )
 class LibraryConfig(AppConfig):
    default_auto_field = "django.db.models.BigAutoField"
    name = "library"
    verbose_name = "Library"
    def ready(self):
        if _should_skip_probe():
            return
        try:
            _run_startup_probe()
        except Exception as exc:
            # Never let the probe itself take down the process.
            logger.warning("Startup probe crashed: %s", exc, exc_info=True)
--- a/mnemosyne/library/management/commands/setup_neo4j_indexes.py
+++ b/mnemosyne/library/management/commands/setup_neo4j_indexes.py
@@ -2,21 +2,28 @@
 Management command to create Neo4j indexes for Mnemosyne content graph.
 Creates:
- Vector indexes (dynamic dimensions from system embedding model) for Chunk, Concept, and ImageEmbedding
+- Vector indexes for Chunk, Concept, and ImageEmbedding. Dimensions are read
- Full-text indexes for text search on Chunk.text_preview and Concept.name
+  from the system embedding model — NOT a settings default. If no model is
- Constraint indexes enforced by neomodel (unique properties)
+  configured and no ``--dimensions`` override is supplied, the command
  fails: a wrong dimension is worse than a clear error.
 - Full-text indexes for text search on Chunk.text_preview, Concept.name,
  Item.title, Library.name.
 - Constraint indexes enforced by neomodel (unique properties).
 When run via the `init` sidecar, a non-zero exit here blocks ``app``/``mcp``/
 ``worker`` from starting. That is deliberate: silent zero-result searches
 are the failure mode we are trying to prevent.
 """
 import logging
 import re
 import sys
-from django.core.management.base import BaseCommand
+from django.core.management.base import BaseCommand, CommandError
 from neomodel import db
 logger = logging.getLogger(__name__)
 # Default vector dimensions (used when no system embedding model is configured)
 DEFAULT_VECTOR_DIMENSIONS = 4096
 # Full-text index definitions: (index_name, label, properties)
 FULLTEXT_INDEXES = [
    ("chunk_text_fulltext", "Chunk", ["text_preview"]),
@@ -26,57 +33,122 @@ FULLTEXT_INDEXES = [
 ]
-def _get_vector_dimensions():
+def _get_vector_dimensions(override: int = 0):
    """
-    Get vector dimensions from the system embedding model.
+    Resolve the dimension count to build vector indexes at.
-    Falls back to DEFAULT_VECTOR_DIMENSIONS if no model is configured
+    Precedence (top wins):
-    or the model has no vector_dimensions set.
+      1. ``--dimensions`` CLI override (positive int)
      2. System embedding model's ``vector_dimensions`` field
-    :returns: Tuple of (dimensions, source_description).
+    If neither is available, returns ``(None, reason)`` and the caller must
    abort. There is deliberately no hardcoded fallback — an index built at
    the wrong dimension silently breaks search forever.
    :param override: Value from the ``--dimensions`` CLI flag (0 if unset).
    :returns: Tuple of ``(dimensions, source_description)``. ``dimensions``
              is ``None`` when the value cannot be resolved.
    """
    if override > 0:
        return override, f"CLI override (--dimensions={override})"
    try:
        from llm_manager.models import LLMModel
        model = LLMModel.get_system_embedding_model()
-        if model and model.vector_dimensions:
+    except Exception as exc:  # pragma: no cover - DB unreachable path
-            return model.vector_dimensions, f"{model.api.name}: {model.name}"
+        return None, f"LLMModel lookup failed: {exc}"
    except Exception:
        pass
-    return DEFAULT_VECTOR_DIMENSIONS, "default (no system embedding model)"
+    if not model:
        return None, "no system embedding model configured"
    if not model.vector_dimensions:
        return (
            None,
            f"system embedding model '{model.api.name}: {model.name}' has "
            "no vector_dimensions set",
        )
    return model.vector_dimensions, f"{model.api.name}: {model.name}"
 def _existing_vector_index_dimensions(name: str):
    """
    Return the configured dimension count of an existing vector index.
    Neo4j's ``SHOW INDEXES`` returns ``options`` as a map whose
    ``indexConfig`` entry holds ``vector.dimensions``. Returns ``None`` if
    the index doesn't exist, isn't a vector index, or the dimension cannot
    be parsed (e.g. Neo4j version differences).
    """
    try:
        results, _ = db.cypher_query(
            "SHOW INDEXES YIELD name, type, options "
            "WHERE name = $name RETURN type, options",
            {"name": name},
        )
    except Exception:
        return None
    if not results:
        return None
    idx_type, options = results[0]
    if (idx_type or "").upper() != "VECTOR":
        return None
    # ``options`` comes back as a dict in the Python driver.
    config = (options or {}).get("indexConfig") or {}
    dims = config.get("vector.dimensions")
    if dims is None:
        return None
    try:
        return int(dims)
    except (TypeError, ValueError):
        # Neo4j sometimes returns the value as a string like "4096".
        m = re.search(r"\d+", str(dims))
        return int(m.group(0)) if m else None
 class Command(BaseCommand):
    help = (
-        "Create Neo4j vector, full-text, and constraint indexes "
+        "Create Neo4j vector, full-text, and constraint indexes for the "
-        "for the Mnemosyne content graph. Vector dimensions are read "
+        "Mnemosyne content graph. Vector dimensions are read from the "
-        "from the system embedding model."
+        "system embedding model; the command fails if the model is not "
        "configured (pass --dimensions only for explicit overrides)."
    )
    def add_arguments(self, parser):
        parser.add_argument(
            "--drop",
            action="store_true",
-            help="Drop existing indexes before recreating them",
+            help="Drop existing managed indexes before recreating them.",
        )
        parser.add_argument(
            "--dimensions",
            type=int,
            default=0,
-            help="Override vector dimensions (default: read from system embedding model)",
+            help=(
                "Override vector dimensions (normally read from the system "
                "embedding model). Use with care — an incorrect value "
                "silently breaks search."
            ),
        )
    def handle(self, *args, **options):
        drop = options["drop"]
        override_dims = options["dimensions"]
-        # Resolve vector dimensions
+        dimensions, source = _get_vector_dimensions(override_dims)
-        if override_dims > 0:
+
-            dimensions = override_dims
+        if dimensions is None:
-            source = f"CLI override ({override_dims})"
+            # Fail loudly. Returning success while skipping vector-index
-        else:
+            # creation is exactly how `app`/`mcp`/`worker` end up running
-            dimensions, source = _get_vector_dimensions()
+            # against an un-indexed Neo4j and serving empty search results.
            raise CommandError(
                "Cannot create vector indexes: "
                f"{source}. Configure a system embedding model with "
                "vector_dimensions set, or pass --dimensions N explicitly."
            )
        self.stdout.write(
            self.style.HTTP_INFO(
@@ -84,7 +156,7 @@ class Command(BaseCommand):
            )
        )
-        # Vector index definitions (dynamic dimensions)
+        # Vector index definitions (dimensions resolved above)
        vector_indexes = [
            ("chunk_embedding_index", "Chunk", "embedding", dimensions, "cosine"),
            ("concept_embedding_index", "Concept", "embedding", dimensions, "cosine"),
@@ -98,11 +170,27 @@ class Command(BaseCommand):
            self._drop_indexes(existing_indexes, vector_indexes)
            existing_indexes = self._get_existing_indexes()
        dim_mismatch = False
        # Create vector indexes
        for name, label, prop, dims, similarity in vector_indexes:
            if name in existing_indexes:
                existing_dim = _existing_vector_index_dimensions(name)
                if existing_dim is not None and existing_dim != dims:
                    dim_mismatch = True
                    self.stderr.write(
                        self.style.ERROR(
                            f"Vector index '{name}' exists at {existing_dim} "
                            f"dimensions but the system model requires {dims}. "
                            "Re-run with --drop and re-embed all content."
                        )
                    )
                    continue
                self.stdout.write(
-                    self.style.NOTICE(f"Vector index '{name}' already exists, skipping")
+                    self.style.NOTICE(
                        f"Vector index '{name}' already exists "
                        f"({existing_dim or 'unknown'}d), skipping"
                    )
                )
                continue
            try:
@@ -164,6 +252,20 @@ class Command(BaseCommand):
                self.style.ERROR(f"Failed to install neomodel labels: {e}")
            )
        if dim_mismatch:
            # Exit non-zero so the `init` sidecar's compose dependency marks
            # the stack startup as failed. Admin must re-run with --drop and
            # re-embed content.
            self.stderr.write(
                self.style.ERROR(
                    "\nOne or more vector indexes exist at the wrong "
                    "dimension. Search will return empty or garbage results "
                    "until you run: setup_neo4j_indexes --drop "
                    "and re-embed all content."
                )
            )
            sys.exit(2)
        self.stdout.write(self.style.SUCCESS("\nNeo4j index setup complete."))
    def _get_existing_indexes(self):
--- a/mnemosyne/library/services/reranker.py
+++ b/mnemosyne/library/services/reranker.py
@@ -79,7 +79,13 @@ class RerankerClient:
                query, candidates, instruction, top_n, query_image
            )
-            url = f"{self.base_url}/v1/rerank"
+            # Convention shared with every other service client in Mnemosyne
            # (embedding_client, vision, concepts): ``base_url`` is the
            # OpenAI-style ``/v1`` root (e.g. ``http://pan.helu.ca:8400/v1``),
            # and each client appends a path-only segment. Prepending
            # ``/v1`` here would build ``…/v1/v1/rerank`` and 404 against
            # Synesis.
            url = f"{self.base_url}/rerank"
            headers = {"Content-Type": "application/json"}
            if self.api.api_key:
                headers["Authorization"] = f"Bearer {self.api.api_key}"
--- a/mnemosyne/library/services/search.py
+++ b/mnemosyne/library/services/search.py
@@ -99,7 +99,17 @@ class SearchRequest:
@dataclass
 class SearchResponse:
-    """Results from a search query."""
+    """Results from a search query.
    ``reranker_skip_reason`` is set when the caller requested
    ``rerank=True`` but the re-ranking step did not actually run — for
    example because no system reranker model is configured, the Synesis
    HTTP call raised, or the fused candidate list was empty. It is
    ``None`` both when re-ranking succeeded (``reranker_used=True``) and
    when the caller asked for ``rerank=False`` — callers / templates
    should distinguish those two "no-reason" cases by looking at the
    original request's ``rerank`` flag.
    """
    query: str
    candidates: list[SearchCandidate]
@@ -109,6 +119,7 @@ class SearchResponse:
    reranker_used: bool
    reranker_model: Optional[str]
    search_types_used: list[str]
    reranker_skip_reason: Optional[str] = None
 class SearchService:
@@ -186,13 +197,19 @@ class SearchService:
        # --- Re-rank ---
        reranker_used = False
        reranker_model_name = None
        reranker_skip_reason: Optional[str] = None
-        if request.rerank and fused:
+        if request.rerank:
-            reranked, model_name = self._rerank(request, fused)
+            if not fused:
-            if reranked is not None:
+                reranker_skip_reason = "no_candidates"
-                fused = reranked
+            else:
-                reranker_used = True
+                reranked, model_name, skip_reason = self._rerank(request, fused)
-                reranker_model_name = model_name
+                if reranked is not None:
                    fused = reranked
                    reranker_used = True
                    reranker_model_name = model_name
                else:
                    reranker_skip_reason = skip_reason
        # Trim to limit
        fused = fused[: request.limit]
@@ -225,6 +242,7 @@ class SearchService:
            reranker_used=reranker_used,
            reranker_model=reranker_model_name,
            search_types_used=search_types_used,
            reranker_skip_reason=reranker_skip_reason,
        )
    # ------------------------------------------------------------------
@@ -720,13 +738,28 @@ class SearchService:
    def _rerank(
        self, request: SearchRequest, candidates: list[SearchCandidate]
-    ) -> tuple[Optional[list[SearchCandidate]], Optional[str]]:
+    ) -> tuple[Optional[list[SearchCandidate]], Optional[str], Optional[str]]:
        """
        Re-rank candidates via Synesis.
        :param request: SearchRequest.
        :param candidates: Fused candidates to re-rank.
-        :returns: Tuple of (reranked_candidates, model_name) or (None, None).
+        :returns: Tuple of ``(reranked_candidates, model_name, skip_reason)``.
            On success the first element is the reranked list, the second
            is the model name, and the third is ``None``.
            On skip the first two are ``None`` and the third is a short
            machine-readable reason:
            * ``"no_system_model"`` — no ``LLMModel.is_system_reranker_model``
              configured.
            * ``"api_error: <truncated msg>"`` — ``RerankerClient.rerank``
              raised (HTTP error, network error, malformed response).
            The reason is intended for display on the search page so the
            user can see *why* re-ranking didn't happen, without having to
            grep server logs.
        """
        from llm_manager.models import LLMModel
@@ -735,7 +768,7 @@ class SearchService:
        reranker_model = LLMModel.get_system_reranker_model()
        if not reranker_model:
            logger.debug("No system reranker model — skipping re-ranking")
-            return None, None
+            return None, None, "no_system_model"
        # Get content-type reranker instruction
        instruction = self._get_reranker_instruction(request, candidates)
@@ -755,12 +788,17 @@ class SearchService:
                instruction=instruction,
                query_image=request.query_image,
            )
-            return reranked, reranker_model.name
+            return reranked, reranker_model.name, None
        except Exception as exc:
            logger.warning(
                "Re-ranking failed, returning fusion results: %s", exc
            )
-            return None, None
+            # Truncate the exception message to keep the UI tooltip /
            # JSON payload bounded; full detail is in the WARNING log.
            msg = str(exc)
            if len(msg) > 200:
                msg = msg[:197] + "..."
            return None, None, f"api_error: {msg}"
    # ------------------------------------------------------------------
    # Helpers
--- a/mnemosyne/library/templates/library/library_detail.html
+++ b/mnemosyne/library/templates/library/library_detail.html
@@ -146,9 +146,23 @@
                    {% if results_reranked.reranker_used %}
                        <span class="badge badge-success">{{ results_reranked.reranker_model|default:"on" }}</span>
                    {% else %}
-                        <span class="badge badge-warning">unavailable</span>
+                        {# The A/B page always requests rerank=True, so a
                           negative here is always a skip with a reason —
                           surface the reason in a tooltip so the user
                           knows *why* the B side fell back to fusion
                           order. #}
                        <span class="badge badge-warning"
                              title="{{ results_reranked.reranker_skip_reason|default:'unknown' }}">
                            unavailable
                        </span>
                    {% endif %}
                </div>
                {% if not results_reranked.reranker_used and results_reranked.reranker_skip_reason %}
                <div class="stat-desc text-warning text-xs mt-1 max-w-xs truncate"
                     title="{{ results_reranked.reranker_skip_reason }}">
                    {{ results_reranked.reranker_skip_reason }}
                </div>
                {% endif %}
            </div>
        </div>
--- a/mnemosyne/library/templates/library/search.html
+++ b/mnemosyne/library/templates/library/search.html
@@ -81,12 +81,30 @@
        <div class="stat">
            <div class="stat-title">Re-ranked</div>
            <div class="stat-value text-lg">
                {# Three-state indicator:
                   - Yes: re-ranker ran successfully
                   - Skipped (+ tooltip): re-rank was requested but the
                     pipeline bailed — badge colour is a warning so the
                     user knows something didn't work
                   - Off: user unchecked the re-rank box
                #}
                {% if results.reranker_used %}
                    <span class="badge badge-success">Yes</span>
                {% elif results.reranker_skip_reason %}
                    <span class="badge badge-warning"
                          title="{{ results.reranker_skip_reason }}">
                        Skipped
                    </span>
                {% else %}
-                    <span class="badge badge-ghost">No</span>
+                    <span class="badge badge-ghost">Off</span>
                {% endif %}
            </div>
            {% if results.reranker_skip_reason %}
            <div class="stat-desc text-warning text-xs mt-1 max-w-xs truncate"
                 title="{{ results.reranker_skip_reason }}">
                {{ results.reranker_skip_reason }}
            </div>
            {% endif %}
        </div>
        <div class="stat">
            <div class="stat-title">Search Types</div>
--- a/mnemosyne/library/tests/test_reranker.py
+++ b/mnemosyne/library/tests/test_reranker.py
@@ -29,10 +29,15 @@ def _make_candidate(chunk_uid: str, text_preview: str = "Some text", **kwargs):
 def _mock_reranker_model():
-    """Create a mock LLMModel for reranking."""
+    """Create a mock LLMModel for reranking.
    ``base_url`` follows the project-wide convention: it is the
    OpenAI-style ``/v1`` root. Every service client (embedding, vision,
    concepts, reranker) appends a path-only segment to it.
    """
    model = MagicMock()
    model.name = "qwen3-vl-reranker-2b"
-    model.api.base_url = "http://pan.helu.ca:8400"
+    model.api.base_url = "http://pan.helu.ca:8400/v1"
    model.api.api_key = ""
    model.api.timeout_seconds = 30
    model.input_cost_per_1k = Decimal("0")
@@ -49,7 +54,7 @@ class RerankerClientInitTest(TestCase):
        client = RerankerClient(model)
        self.assertEqual(client.model_name, "qwen3-vl-reranker-2b")
-        self.assertEqual(client.base_url, "http://pan.helu.ca:8400")
+        self.assertEqual(client.base_url, "http://pan.helu.ca:8400/v1")
 class RerankerClientRerankTest(TestCase):
@@ -198,6 +203,33 @@ class RerankerClientRerankTest(TestCase):
        with self.assertRaises(Exception):
            client.rerank(query="test", candidates=candidates)
    @patch("library.services.reranker.requests.post")
    def test_request_url_is_base_plus_rerank(self, mock_post):
        """URL is ``{base_url}/rerank`` — base_url already contains ``/v1``.
        Regression guard: earlier versions prepended ``/v1/`` inside the
        client, which produced ``…/v1/v1/rerank`` and 404 against Synesis
        when the ``LLMApi.base_url`` followed the same convention as the
        embedding / chat / vision clients.
        """
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.json.return_value = {
            "results": [{"index": 0, "score": 0.5}],
        }
        mock_post.return_value = mock_response
        model = _mock_reranker_model()
        client = RerankerClient(model)
        candidates = [_make_candidate("a")]
        client.rerank(query="test", candidates=candidates)
        # First positional arg to requests.post is the URL.
        called_url = mock_post.call_args.args[0] if mock_post.call_args.args \
            else mock_post.call_args.kwargs.get("url")
        self.assertEqual(called_url, "http://pan.helu.ca:8400/v1/rerank")
    @patch("library.services.reranker.requests.post")
    def test_no_instruction_omits_field(self, mock_post):
        """Empty instruction is not sent in payload."""
--- a/mnemosyne/library/tests/test_search.py
+++ b/mnemosyne/library/tests/test_search.py
@@ -52,7 +52,7 @@ class SearchServiceSearchTest(TestCase):
        ]
        mock_fulltext.return_value = []
        mock_graph.return_value = []
-        mock_rerank.return_value = (None, None)
+        mock_rerank.return_value = (None, None, "no_system_model")
        mock_image.return_value = []
        request = SearchRequest(
@@ -110,7 +110,9 @@ class SearchServiceSearchTest(TestCase):
            chunk_s3_key="s3/key", chunk_index=0, score=0.95,
            source="fulltext",
        )
-        mock_rerank.return_value = ([reranked_candidate], "qwen3-vl-reranker-2b")
+        mock_rerank.return_value = (
            [reranked_candidate], "qwen3-vl-reranker-2b", None,
        )
        request = SearchRequest(
            query="test",
@@ -125,6 +127,8 @@ class SearchServiceSearchTest(TestCase):
        self.assertTrue(response.reranker_used)
        self.assertEqual(response.reranker_model, "qwen3-vl-reranker-2b")
        self.assertAlmostEqual(response.candidates[0].score, 0.95)
        # Successful rerank → no skip reason surfaced to UI / API.
        self.assertIsNone(response.reranker_skip_reason)
    @patch("library.services.search.SearchService._fulltext_search")
    @patch("library.services.search.SearchService._embed_query")
@@ -152,6 +156,103 @@ class SearchServiceSearchTest(TestCase):
        self.assertFalse(response.reranker_used)
        self.assertIsNone(response.reranker_model)
        # ``rerank=False`` means "not requested", not "skipped" — so no
        # reason is reported. Template distinguishes this from the
        # skip case by looking at the original request's rerank flag.
        self.assertIsNone(response.reranker_skip_reason)
    @patch("library.services.search.SearchService._rerank")
    @patch("library.services.search.SearchService._fulltext_search")
    @patch("library.services.search.SearchService._embed_query")
    def test_search_reports_skip_reason_no_system_model(
        self, mock_embed, mock_fulltext, mock_rerank
    ):
        """Rerank requested but no system model → ``no_system_model`` surfaced."""
        mock_embed.return_value = None
        mock_fulltext.return_value = [
            SearchCandidate(
                chunk_uid="c1", item_uid="i1", item_title="Test",
                library_type="technical", text_preview="preview",
                chunk_s3_key="s3/key", chunk_index=0, score=0.5,
                source="fulltext",
            )
        ]
        mock_rerank.return_value = (None, None, "no_system_model")
        request = SearchRequest(
            query="test",
            search_types=["fulltext"],
            rerank=True,
            include_images=False,
        )
        service = SearchService()
        response = service.search(request)
        self.assertFalse(response.reranker_used)
        self.assertIsNone(response.reranker_model)
        self.assertEqual(response.reranker_skip_reason, "no_system_model")
    @patch("library.services.search.SearchService._rerank")
    @patch("library.services.search.SearchService._fulltext_search")
    @patch("library.services.search.SearchService._embed_query")
    def test_search_reports_skip_reason_api_error(
        self, mock_embed, mock_fulltext, mock_rerank
    ):
        """Rerank API raising → ``api_error: ...`` surfaced in response."""
        mock_embed.return_value = None
        mock_fulltext.return_value = [
            SearchCandidate(
                chunk_uid="c1", item_uid="i1", item_title="Test",
                library_type="technical", text_preview="preview",
                chunk_s3_key="s3/key", chunk_index=0, score=0.5,
                source="fulltext",
            )
        ]
        mock_rerank.return_value = (
            None, None,
            "api_error: 404 Client Error: Not Found for url: "
            "http://pan.helu.ca:8400/v1/v1/rerank",
        )
        request = SearchRequest(
            query="test",
            search_types=["fulltext"],
            rerank=True,
            include_images=False,
        )
        service = SearchService()
        response = service.search(request)
        self.assertFalse(response.reranker_used)
        self.assertIsNotNone(response.reranker_skip_reason)
        self.assertTrue(
            response.reranker_skip_reason.startswith("api_error:"),
            f"expected api_error: prefix, got {response.reranker_skip_reason!r}",
        )
    @patch("library.services.search.SearchService._fulltext_search")
    @patch("library.services.search.SearchService._embed_query")
    def test_search_reports_skip_reason_no_candidates(
        self, mock_embed, mock_fulltext
    ):
        """Rerank requested but fusion produced nothing → ``no_candidates``."""
        mock_embed.return_value = None
        mock_fulltext.return_value = []
        request = SearchRequest(
            query="test",
            search_types=["fulltext"],
            rerank=True,
            include_images=False,
        )
        service = SearchService()
        response = service.search(request)
        self.assertFalse(response.reranker_used)
        self.assertEqual(response.reranker_skip_reason, "no_candidates")
    @patch("library.services.search.SearchService._fulltext_search")
    @patch("library.services.search.SearchService._embed_query")
--- a/mnemosyne/library/tests/test_search_api.py
+++ b/mnemosyne/library/tests/test_search_api.py
@@ -208,6 +208,41 @@ class SearchAPIResponseTest(TestCase):
        self.assertEqual(call_args.search_types, ["fulltext"])
        self.assertFalse(call_args.rerank)
    @patch("library.api.views.SearchService")
    def test_reranker_skip_reason_surfaced_in_json(self, MockService):
        """``reranker_skip_reason`` propagates through the JSON API."""
        mock_response = SearchResponse(
            query="neural networks",
            candidates=[],
            images=[],
            total_candidates=0,
            search_time_ms=10.0,
            reranker_used=False,
            reranker_model=None,
            search_types_used=[],
            reranker_skip_reason=(
                "api_error: 404 Client Error: Not Found for url: "
                "http://pan.helu.ca:8400/v1/v1/rerank"
            ),
        )
        mock_instance = MockService.return_value
        mock_instance.search.return_value = mock_response
        response = self.client.post(
            "/library/api/search/",
            {"query": "neural networks"},
            format="json",
        )
        self.assertEqual(response.status_code, 200)
        data = response.json()
        self.assertFalse(data["reranker_used"])
        self.assertIn("reranker_skip_reason", data)
        self.assertTrue(
            data["reranker_skip_reason"].startswith("api_error:"),
            f"got {data['reranker_skip_reason']!r}",
        )
 class ConceptAPITest(TestCase):
    """Tests for concept API endpoints."""
--- a/mnemosyne/library/tests/test_search_views_admin_scope.py
+++ b/mnemosyne/library/tests/test_search_views_admin_scope.py
@@ -117,11 +117,15 @@ class SearchPageAllowedLibrariesTests(TestCase):
        )
        self.client.force_login(self.user)
-    def _patched_search(self):
+    def _patched_search(self, reranker_skip_reason=None):
        """Return a (request_capture, patch_context) pair.
        The patch captures the ``SearchRequest`` that ``SearchService.search``
        is called with so assertions can run after the view returns.
        :param reranker_skip_reason: Value to set on the stub response's
            ``reranker_skip_reason`` attribute, for tests that want to
            exercise the "Skipped" badge rendering path.
        """
        capture: dict = {}
@@ -137,6 +141,7 @@ class SearchPageAllowedLibrariesTests(TestCase):
                reranker_used=False,
                reranker_model=None,
                search_types_used=[],
                reranker_skip_reason=reranker_skip_reason,
            )
        return capture, patch(
@@ -178,3 +183,117 @@ class SearchPageAllowedLibrariesTests(TestCase):
            )
        self.assertIsNone(capture["request"].allowed_libraries)
 # ---------------------------------------------------------------------------
 # search_page rerank-status rendering
 # ---------------------------------------------------------------------------
 class SearchPageRerankBadgeTests(TestCase):
    """Verify the three-state Re-ranked indicator on the search page.
    The badge must distinguish:
    * Success (``reranker_used=True``) — green "Yes"
    * Skipped (``rerank=True`` requested but ``reranker_skip_reason`` set)
      — warning "Skipped" with the reason shown
    * Off (user unchecked the re-rank box) — ghost "Off"
    This guards the regression that surfaced when Synesis returned 404
    on a mis-constructed rerank URL: the UI said "No" and gave no hint
    the re-ranker had actually failed.
    """
    def setUp(self):
        self.user = User.objects.create_user(
            username="admin", email="a@example.com", password="pw"
        )
        self.client.force_login(self.user)
    def _run(self, rerank_value, reranker_used, reranker_skip_reason):
        capture: dict = {}
        def fake_search(self, request):
            capture["request"] = request
            return SimpleNamespace(
                query=request.query,
                candidates=[],
                images=[],
                total_candidates=0,
                search_time_ms=0.0,
                reranker_used=reranker_used,
                reranker_model=None,
                search_types_used=[],
                reranker_skip_reason=reranker_skip_reason,
            )
        post_data = {"query": "postgresql"}
        if rerank_value is not None:
            post_data["rerank"] = rerank_value
        with patch("library.views._all_library_uids", return_value=[]), \
             patch("library.services.search.SearchService.search", fake_search):
            response = self.client.post(reverse("library:search"), post_data)
        self.assertEqual(response.status_code, 200)
        return response, capture
    def test_badge_shows_yes_when_rerank_succeeded(self):
        response, _ = self._run(
            rerank_value="on",
            reranker_used=True,
            reranker_skip_reason=None,
        )
        body = response.content.decode()
        self.assertIn("badge-success", body)
        self.assertIn(">Yes<", body)
        self.assertNotIn(">Skipped<", body)
        self.assertNotIn(">Off<", body)
    def test_badge_shows_skipped_with_reason_on_api_error(self):
        reason = (
            "api_error: 404 Client Error: Not Found for url: "
            "http://pan.helu.ca:8400/v1/v1/rerank"
        )
        response, capture = self._run(
            rerank_value="on",
            reranker_used=False,
            reranker_skip_reason=reason,
        )
        # Sanity: the view actually requested re-ranking.
        self.assertTrue(capture["request"].rerank)
        body = response.content.decode()
        self.assertIn(">Skipped", body)
        # Reason shown in-page so the user can debug without grepping logs.
        # Django auto-escapes the colon-space and URL, which is fine.
        self.assertIn("api_error:", body)
        self.assertIn("404", body)
        # Must not claim success.
        self.assertNotIn(">Yes<", body)
    def test_badge_shows_skipped_on_no_system_model(self):
        response, _ = self._run(
            rerank_value="on",
            reranker_used=False,
            reranker_skip_reason="no_system_model",
        )
        body = response.content.decode()
        self.assertIn(">Skipped", body)
        self.assertIn("no_system_model", body)
    def test_badge_shows_off_when_rerank_unchecked(self):
        # HTML checkbox form: unchecked checkboxes are simply omitted
        # from the POST body, so we pass rerank_value=None (not "off").
        response, capture = self._run(
            rerank_value=None,
            reranker_used=False,
            reranker_skip_reason=None,
        )
        self.assertFalse(capture["request"].rerank)
        body = response.content.decode()
        self.assertIn(">Off<", body)
        self.assertNotIn(">Skipped", body)
        self.assertNotIn(">Yes<", body)