feat: add init sidecar for migrations and setup on compose up

Introduces a one-shot `init` service in docker-compose that runs Postgres migrations, Neo4j index setup, and library-type seeding on every `up`. Long-running services (`app`, `mcp`, `worker`) now depend on its successful completion via `service_completed_successfully`, blocking the stack on configuration errors (missing embedding model, dimension mismatch, unreachable DB) rather than serving silent zero-result searches. Also standardizes reranker test fixtures to use the `/v1` OpenAI-style base URL convention used across other service clients.
2026-05-10 08:01:58 -04:00
parent 9ceb01f829
commit a945b382e6
15 changed files with 821 additions and 65 deletions
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -24,10 +24,17 @@
 #
 # Run:
 #   docker compose up -d
-#   docker compose run --rm app migrate    # one-shot DB migrate
-#   docker compose run --rm app setup      # Neo4j indexes + library types
+#
+# The `init` sidecar (below) runs Postgres migrations, Neo4j index setup,
+# and library-type seeding on every `up`. Long-running services wait for
+# it via `depends_on: init: service_completed_successfully` — so a failure
+# there (missing embedding model, dimension mismatch, unreachable DB)
+# blocks the stack rather than letting it serve silent zero-result
+# searches. The standalone `migrate` / `setup` entrypoint commands remain
+# available for ad-hoc ops work.
 # =============================================================================

+
 services:
  # ── Static-file seeder: copies /app/staticfiles into the shared volume on
  # every `up`. Runs once and exits. Without this, the named volume is only
@@ -41,6 +48,41 @@ services:
      - mnemosyne-static:/shared-static
    restart: "no"

+  # ── Init sidecar: one-shot Postgres migrate + Neo4j index setup + library
+  # type seed. Runs on every `up` and exits. Long-running services below
+  # depend on `service_completed_successfully`, so a failure here (no system
+  # embedding model configured, dimension mismatch, unreachable DB) blocks
+  # `app`/`mcp`/`worker` from starting — which is the whole point. All three
+  # commands are idempotent: re-running is a no-op unless state actually
+  # needs to change.
+  #
+  # This sidecar only needs Postgres, Neo4j, and logging env — no S3, no
+  # Celery, no LLM encryption key. Keep it that way.
+  init:
+    image: git.helu.ca/r/mnemosyne:latest
+    pull_policy: always
+    command: ["init"]
+    environment:
+      # Django core (settings import)
+      - DJANGO_SETTINGS_MODULE=mnemosyne.settings
+      - SECRET_KEY=${SECRET_KEY}
+      - DEBUG=${DEBUG}
+      - TIME_ZONE=${TIME_ZONE}
+      - LANGUAGE_CODE=${LANGUAGE_CODE}
+      # Postgres (migrate)
+      - APP_DB_NAME=${APP_DB_NAME}
+      - APP_DB_USER=${APP_DB_USER}
+      - APP_DB_PASSWORD=${APP_DB_PASSWORD}
+      - DB_HOST=${DB_HOST}
+      - DB_PORT=${DB_PORT}
+      # Neo4j (setup_neo4j_indexes + load_library_types)
+      - NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
+      # Logging
+      - LOGGING_LEVEL=${LOGGING_LEVEL}
+      - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
+    restart: "no"
+
+
  # ── App: Django REST API + admin ──────────────────────────────────────────
  # Serves /library/api/*, /admin/, /live/, /ready/, /metrics. Enqueues
  # Celery tasks (hence CELERY_BROKER_URL is required here too — Django is
@@ -103,6 +145,8 @@ services:
    depends_on:
      static-init:
        condition: service_completed_successfully
+      init:
+        condition: service_completed_successfully
    volumes:
      - mnemosyne-media:/app/media
    healthcheck:
@@ -112,6 +156,7 @@ services:
      retries: 3
      start_period: 30s

+
  # ── MCP server: FastMCP Streamable HTTP at /mcp/ ───────────────────────────
  # Read-only LLM-facing surface. Intentionally excluded:
  #   CELERY_BROKER_URL         — MCP must not enqueue tasks
@@ -171,6 +216,9 @@ services:
      - LOGGING_LEVEL=${LOGGING_LEVEL}
      - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
    restart: unless-stopped
+    depends_on:
+      init:
+        condition: service_completed_successfully
    volumes:
      - mnemosyne-media:/app/media
    healthcheck:
@@ -180,6 +228,7 @@ services:
      retries: 3
      start_period: 30s

+
  # ── Celery worker: embedding + ingest + batch queues ───────────────────────
  # Consumer side of the queue. Needs the full S3 block (reads Daedalus's
  # bucket, writes to Mnemosyne's), the LLM API encryption key (ingest calls
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -55,6 +55,19 @@ case "$1" in
    python manage.py load_library_types
    ;;

+  init)
+    # Bundled one-shot init run by the `init` sidecar on every
+    # `docker compose up`. Idempotent: re-runs are no-ops unless migrations
+    # or indexes need to change. A non-zero exit here blocks `app`, `mcp`,
+    # and `worker` from starting, which is the point — we'd rather fail
+    # loudly than serve silent zero-result searches.
+    set -e
+    python manage.py migrate --noinput
+    python manage.py setup_neo4j_indexes
+    python manage.py load_library_types
+    ;;
+
+
  shell)
    # Drop into the management shell for ad-hoc work.
    exec python manage.py shell
--- a/docs/PHASE_3_SEARCH_AND_RERANKING.md
+++ b/docs/PHASE_3_SEARCH_AND_RERANKING.md
@@ -61,6 +61,22 @@ POST http://pan.helu.ca:8400/v1/rerank
 }
 ```

+> **`LLMApi.base_url` convention.** Every Mnemosyne service client
+> (`EmbeddingClient`, `RerankerClient`, `vision.py`, `concepts.py`)
+> treats `base_url` as the **OpenAI-style `/v1` root** and appends a
+> path-only segment: `/embeddings`, `/rerank`, `/chat/completions`.
+> So a single `LLMApi` row with `base_url=http://pan.helu.ca:8400/v1`
+> serves both the embedding and the reranker endpoints — no per-purpose
+> duplication needed.
+>
+> Get this wrong (e.g. set `base_url=http://pan.helu.ca:8400` with no
+> `/v1`, or have a client prepend `/v1` locally) and you get a
+> double-prefixed URL like `…/v1/v1/rerank` that 404s silently —
+> `SearchService._rerank` catches the exception, the UI shows
+> "Re-rank: Skipped", and the search falls back to raw RRF order.
+> Check `results.reranker_skip_reason` on the search page for the
+> specific error.
+
 ## Deliverables

 ### 1. Search Service (`library/services/search.py`)
--- a/docs/mnemosyne.html
+++ b/docs/mnemosyne.html
@@ -294,31 +294,37 @@ graph LR
            </div>

            <div class="alert alert-warning border-start border-4 border-warning">
-                <h4><i class="bi bi-lightning"></i> Neo4j Vector Indexes</h4>
-                <pre class="bg-light p-3 rounded mb-0"><code>// Chunk text+image embeddings (4096 dimensions, no pgvector limits!)
-CREATE VECTOR INDEX chunk_embedding FOR (c:Chunk)
+                <h4><i class="bi bi-lightning"></i> Neo4j Indexes (managed by <code>setup_neo4j_indexes</code>)</h4>
+                <p>Created by the <code>init</code> sidecar on every <code>docker compose up</code>. Vector dimensions come from the system embedding model's <code>vector_dimensions</code> field — the command fails if no model is configured. Current production model: <strong>Pan Synesis · qwen3-vl-embedding-2b · 2048d</strong>.</p>
+                <pre class="bg-light p-3 rounded mb-0"><code>// Chunk text+image embeddings (dimensions read from system embedding model)
+CREATE VECTOR INDEX chunk_embedding_index FOR (c:Chunk)
 ON (c.embedding) OPTIONS {indexConfig: {
-  `vector.dimensions`: 4096,
+  `vector.dimensions`: 2048,
  `vector.similarity_function`: 'cosine'
 }}

 // Concept embeddings for semantic concept search
-CREATE VECTOR INDEX concept_embedding FOR (con:Concept)
+CREATE VECTOR INDEX concept_embedding_index FOR (con:Concept)
 ON (con.embedding) OPTIONS {indexConfig: {
-  `vector.dimensions`: 4096,
+  `vector.dimensions`: 2048,
  `vector.similarity_function`: 'cosine'
 }}

 // Image multimodal embeddings
-CREATE VECTOR INDEX image_embedding FOR (ie:ImageEmbedding)
+CREATE VECTOR INDEX image_embedding_index FOR (ie:ImageEmbedding)
 ON (ie.embedding) OPTIONS {indexConfig: {
-  `vector.dimensions`: 4096,
+  `vector.dimensions`: 2048,
  `vector.similarity_function`: 'cosine'
 }}

-// Full-text index for keyword/BM25-style search
-CREATE FULLTEXT INDEX chunk_fulltext FOR (c:Chunk) ON EACH [c.text_preview]</code></pre>
+// Full-text indexes (BM25-style keyword search)
+CREATE FULLTEXT INDEX chunk_text_fulltext    FOR (c:Chunk)   ON EACH [c.text_preview]
+CREATE FULLTEXT INDEX concept_name_fulltext  FOR (c:Concept) ON EACH [c.name]
+CREATE FULLTEXT INDEX item_title_fulltext    FOR (i:Item)    ON EACH [i.title]
+CREATE FULLTEXT INDEX library_name_fulltext  FOR (l:Library) ON EACH [l.name]</code></pre>
+                <p class="mb-0 mt-3"><strong>Changing the embedding model or dimensions is a re-embedding event.</strong> Drop + recreate the vector indexes (<code>setup_neo4j_indexes --drop</code>) and re-queue all content for embedding. Old vectors at the previous dimension remain on the nodes until overwritten but are no longer indexed.</p>
            </div>
+
        </section>

        <!-- SECTION: CONTENT TYPES -->
@@ -521,10 +527,11 @@ flowchart TD
                        <div class="card-body">
                            <p>Cosine similarity via Neo4j vector index on Chunk and ImageEmbedding nodes.</p>
                            <pre class="bg-light p-2 rounded"><code>CALL db.index.vector.queryNodes(
-  'chunk_embedding', 30,
+  'chunk_embedding_index', 30,
  $query_vector
 ) YIELD node, score
 WHERE score > $threshold</code></pre>
+
                        </div>
                    </div>
                </div>
@@ -548,9 +555,10 @@ RETURN c2, i2</code></pre>
                        <div class="card-body">
                            <p>Neo4j native full-text index for keyword matching (BM25-equivalent).</p>
                            <pre class="bg-light p-2 rounded"><code>CALL db.index.fulltext.queryNodes(
-  'chunk_fulltext',
+  'chunk_text_fulltext',
  $query_text
 ) YIELD node, score</code></pre>
+
                        </div>
                    </div>
                </div>
--- a/mnemosyne/library/api/serializers.py
+++ b/mnemosyne/library/api/serializers.py
@@ -150,6 +150,16 @@ class SearchResponseSerializer(serializers.Serializer):
    reranker_used = serializers.BooleanField()
    reranker_model = serializers.CharField(allow_null=True)
    search_types_used = serializers.ListField(child=serializers.CharField())
+    # Populated when ``rerank=True`` was requested but the re-ranking
+    # step did not run — e.g. no system reranker model configured
+    # (``no_system_model``), the Synesis call raised
+    # (``api_error: ...``), or fusion produced zero candidates
+    # (``no_candidates``). ``null`` means either success or that
+    # re-ranking was not requested. ``required=False`` keeps old
+    # clients happy.
+    reranker_skip_reason = serializers.CharField(
+        allow_null=True, required=False, default=None
+    )


 # --- Workspace lifecycle (Daedalus integration) ---
--- a/mnemosyne/library/apps.py
+++ b/mnemosyne/library/apps.py
@@ -1,7 +1,202 @@
+"""
+Django AppConfig for the ``library`` app.
+
+Registers a startup probe that runs once per Python process and yells if
+Mnemosyne is misconfigured in a way that would cause silent zero-result
+searches — missing embedding model, missing Neo4j index, or a dimension
+mismatch between the model and an existing index. Loud ERROR lines are the
+only defence against "search works but returns nothing", which is
+indistinguishable from "search works and matched nothing" unless you read
+the stderr of a different container.
+
+The probe is deliberately best-effort: it cannot crash the process even if
+Neo4j is unreachable, because a transient DB blip on startup should not
+take down the whole app. The `init` sidecar is the hard gate; this is the
+second line of defence for long-running containers.
+"""
+
+import logging
+import os
+import sys
+
 from django.apps import AppConfig

+logger = logging.getLogger(__name__)
+
+
+# Index names we expect setup_neo4j_indexes to have created. Kept in sync
+# with library/management/commands/setup_neo4j_indexes.py. A test asserts
+# they stay in sync.
+_EXPECTED_VECTOR_INDEXES = (
+    "chunk_embedding_index",
+    "concept_embedding_index",
+    "image_embedding_index",
+)
+_EXPECTED_FULLTEXT_INDEXES = (
+    "chunk_text_fulltext",
+    "concept_name_fulltext",
+    "item_title_fulltext",
+    "library_name_fulltext",
+)
+
+
+def _should_skip_probe() -> bool:
+    """
+    Decide whether to skip the startup probe.
+
+    Skip when:
+    - Running a management command other than the long-running servers
+      (migrate, makemigrations, setup_neo4j_indexes, load_library_types,
+      collectstatic, test, shell). The probe would just spam stderr for
+      ops work that doesn't care about Neo4j index state.
+    - Neo4j bolt URL is unset (build-time ``collectstatic`` stubs, CI
+      unit tests without real infra).
+    - Running under pytest (any argv contains ``pytest`` or a test runner
+      env var is set).
+    """
+    if "pytest" in sys.argv[0] or "PYTEST_CURRENT_TEST" in os.environ:
+        return True
+    if os.environ.get("DJANGO_SKIP_STARTUP_PROBE") == "1":
+        return True
+    # Typical Django command invocations where the probe is noise.
+    skip_commands = {
+        "migrate",
+        "makemigrations",
+        "setup_neo4j_indexes",
+        "load_library_types",
+        "collectstatic",
+        "test",
+        "shell",
+        "check",
+        "dbshell",
+        "showmigrations",
+        "squashmigrations",
+        "createsuperuser",
+        "help",
+    }
+    if len(sys.argv) >= 2 and sys.argv[1] in skip_commands:
+        return True
+    # No Neo4j endpoint configured — probably a build or local dev without
+    # graph infrastructure. Don't pretend we can check.
+    if not os.environ.get("NEOMODEL_NEO4J_BOLT_URL"):
+        return True
+    return False
+
+
+def _run_startup_probe():
+    """
+    Emit ERROR/WARNING logs if the stack is misconfigured for search.
+
+    Each check is individually guarded so a single unreachable dependency
+    doesn't mask the other findings. Returns nothing; side effect is log
+    output.
+    """
+    from neomodel import db
+
+    # --- 1. System embedding model --------------------------------------
+    embedding_dim = None
+    embedding_model_label = "<unknown>"
+    try:
+        from llm_manager.models import LLMModel
+
+        model = LLMModel.get_system_embedding_model()
+        if not model:
+            logger.error(
+                "No system embedding model configured. Search will return "
+                "zero results until one is set in the LLM admin."
+            )
+        elif not model.vector_dimensions:
+            logger.error(
+                "System embedding model '%s: %s' has no vector_dimensions "
+                "set. Neo4j vector indexes cannot be validated and search "
+                "quality will be unpredictable.",
+                model.api.name,
+                model.name,
+            )
+            embedding_model_label = f"{model.api.name}: {model.name}"
+        else:
+            embedding_dim = model.vector_dimensions
+            embedding_model_label = f"{model.api.name}: {model.name}"
+            logger.info(
+                "System embedding model: %s (%dd)",
+                embedding_model_label,
+                embedding_dim,
+            )
+    except Exception as exc:
+        logger.warning(
+            "Startup probe could not read system embedding model: %s", exc
+        )
+
+    # --- 2. Neo4j indexes present & correctly dimensioned ---------------
+    try:
+        results, _ = db.cypher_query(
+            "SHOW INDEXES YIELD name, type, options RETURN name, type, options"
+        )
+    except Exception as exc:
+        logger.warning(
+            "Startup probe could not list Neo4j indexes: %s. Search "
+            "degradation will only surface at query time.",
+            exc,
+        )
+        return
+
+    present = {}
+    for row in results:
+        name, idx_type, options = row[0], row[1], row[2]
+        present[name] = (idx_type, options)
+
+    # Missing vector indexes
+    for name in _EXPECTED_VECTOR_INDEXES:
+        if name not in present:
+            logger.error(
+                "Neo4j vector index '%s' is missing. Run "
+                "'docker compose run --rm init' (or 'python manage.py "
+                "setup_neo4j_indexes') to rebuild.",
+                name,
+            )
+            continue
+        # Dimension check against the embedding model.
+        if embedding_dim is None:
+            continue
+        idx_type, options = present[name]
+        config = (options or {}).get("indexConfig") or {}
+        raw_dim = config.get("vector.dimensions")
+        try:
+            existing_dim = int(raw_dim) if raw_dim is not None else None
+        except (TypeError, ValueError):
+            existing_dim = None
+        if existing_dim is not None and existing_dim != embedding_dim:
+            logger.error(
+                "Neo4j index '%s' has %d dimensions but system embedding "
+                "model %s reports %d. Re-run 'setup_neo4j_indexes --drop' "
+                "and re-embed all content — search will return empty or "
+                "wrong results until this is fixed.",
+                name,
+                existing_dim,
+                embedding_model_label,
+                embedding_dim,
+            )
+
+    # Missing fulltext indexes
+    for name in _EXPECTED_FULLTEXT_INDEXES:
+        if name not in present:
+            logger.error(
+                "Neo4j full-text index '%s' is missing. Full-text search "
+                "will silently return no matches. Run 'setup_neo4j_indexes'.",
+                name,
+            )
+

 class LibraryConfig(AppConfig):
    default_auto_field = "django.db.models.BigAutoField"
    name = "library"
    verbose_name = "Library"
+
+    def ready(self):
+        if _should_skip_probe():
+            return
+        try:
+            _run_startup_probe()
+        except Exception as exc:
+            # Never let the probe itself take down the process.
+            logger.warning("Startup probe crashed: %s", exc, exc_info=True)
--- a/mnemosyne/library/management/commands/setup_neo4j_indexes.py
+++ b/mnemosyne/library/management/commands/setup_neo4j_indexes.py
@@ -2,21 +2,28 @@
 Management command to create Neo4j indexes for Mnemosyne content graph.

 Creates:
- Vector indexes (dynamic dimensions from system embedding model) for Chunk, Concept, and ImageEmbedding
- Full-text indexes for text search on Chunk.text_preview and Concept.name
- Constraint indexes enforced by neomodel (unique properties)
+- Vector indexes for Chunk, Concept, and ImageEmbedding. Dimensions are read
+  from the system embedding model — NOT a settings default. If no model is
+  configured and no ``--dimensions`` override is supplied, the command
+  fails: a wrong dimension is worse than a clear error.
+- Full-text indexes for text search on Chunk.text_preview, Concept.name,
+  Item.title, Library.name.
+- Constraint indexes enforced by neomodel (unique properties).
+
+When run via the `init` sidecar, a non-zero exit here blocks ``app``/``mcp``/
+``worker`` from starting. That is deliberate: silent zero-result searches
+are the failure mode we are trying to prevent.
 """

 import logging
+import re
+import sys

-from django.core.management.base import BaseCommand
+from django.core.management.base import BaseCommand, CommandError
 from neomodel import db

 logger = logging.getLogger(__name__)

-# Default vector dimensions (used when no system embedding model is configured)
-DEFAULT_VECTOR_DIMENSIONS = 4096
-
 # Full-text index definitions: (index_name, label, properties)
 FULLTEXT_INDEXES = [
    ("chunk_text_fulltext", "Chunk", ["text_preview"]),
@@ -26,57 +33,122 @@ FULLTEXT_INDEXES = [
 ]


-def _get_vector_dimensions():
+def _get_vector_dimensions(override: int = 0):
    """
-    Get vector dimensions from the system embedding model.
+    Resolve the dimension count to build vector indexes at.

-    Falls back to DEFAULT_VECTOR_DIMENSIONS if no model is configured
-    or the model has no vector_dimensions set.
+    Precedence (top wins):
+      1. ``--dimensions`` CLI override (positive int)
+      2. System embedding model's ``vector_dimensions`` field

-    :returns: Tuple of (dimensions, source_description).
+    If neither is available, returns ``(None, reason)`` and the caller must
+    abort. There is deliberately no hardcoded fallback — an index built at
+    the wrong dimension silently breaks search forever.
+
+    :param override: Value from the ``--dimensions`` CLI flag (0 if unset).
+    :returns: Tuple of ``(dimensions, source_description)``. ``dimensions``
+              is ``None`` when the value cannot be resolved.
    """
+    if override > 0:
+        return override, f"CLI override (--dimensions={override})"
+
    try:
        from llm_manager.models import LLMModel

        model = LLMModel.get_system_embedding_model()
-        if model and model.vector_dimensions:
-            return model.vector_dimensions, f"{model.api.name}: {model.name}"
-    except Exception:
-        pass
+    except Exception as exc:  # pragma: no cover - DB unreachable path
+        return None, f"LLMModel lookup failed: {exc}"

-    return DEFAULT_VECTOR_DIMENSIONS, "default (no system embedding model)"
+    if not model:
+        return None, "no system embedding model configured"
+    if not model.vector_dimensions:
+        return (
+            None,
+            f"system embedding model '{model.api.name}: {model.name}' has "
+            "no vector_dimensions set",
+        )
+
+    return model.vector_dimensions, f"{model.api.name}: {model.name}"
+
+
+def _existing_vector_index_dimensions(name: str):
+    """
+    Return the configured dimension count of an existing vector index.
+
+    Neo4j's ``SHOW INDEXES`` returns ``options`` as a map whose
+    ``indexConfig`` entry holds ``vector.dimensions``. Returns ``None`` if
+    the index doesn't exist, isn't a vector index, or the dimension cannot
+    be parsed (e.g. Neo4j version differences).
+    """
+    try:
+        results, _ = db.cypher_query(
+            "SHOW INDEXES YIELD name, type, options "
+            "WHERE name = $name RETURN type, options",
+            {"name": name},
+        )
+    except Exception:
+        return None
+
+    if not results:
+        return None
+
+    idx_type, options = results[0]
+    if (idx_type or "").upper() != "VECTOR":
+        return None
+
+    # ``options`` comes back as a dict in the Python driver.
+    config = (options or {}).get("indexConfig") or {}
+    dims = config.get("vector.dimensions")
+    if dims is None:
+        return None
+    try:
+        return int(dims)
+    except (TypeError, ValueError):
+        # Neo4j sometimes returns the value as a string like "4096".
+        m = re.search(r"\d+", str(dims))
+        return int(m.group(0)) if m else None


 class Command(BaseCommand):
    help = (
-        "Create Neo4j vector, full-text, and constraint indexes "
-        "for the Mnemosyne content graph. Vector dimensions are read "
-        "from the system embedding model."
+        "Create Neo4j vector, full-text, and constraint indexes for the "
+        "Mnemosyne content graph. Vector dimensions are read from the "
+        "system embedding model; the command fails if the model is not "
+        "configured (pass --dimensions only for explicit overrides)."
    )

    def add_arguments(self, parser):
        parser.add_argument(
            "--drop",
            action="store_true",
-            help="Drop existing indexes before recreating them",
+            help="Drop existing managed indexes before recreating them.",
        )
        parser.add_argument(
            "--dimensions",
            type=int,
            default=0,
-            help="Override vector dimensions (default: read from system embedding model)",
+            help=(
+                "Override vector dimensions (normally read from the system "
+                "embedding model). Use with care — an incorrect value "
+                "silently breaks search."
+            ),
        )

    def handle(self, *args, **options):
        drop = options["drop"]
        override_dims = options["dimensions"]

-        # Resolve vector dimensions
-        if override_dims > 0:
-            dimensions = override_dims
-            source = f"CLI override ({override_dims})"
-        else:
-            dimensions, source = _get_vector_dimensions()
+        dimensions, source = _get_vector_dimensions(override_dims)
+
+        if dimensions is None:
+            # Fail loudly. Returning success while skipping vector-index
+            # creation is exactly how `app`/`mcp`/`worker` end up running
+            # against an un-indexed Neo4j and serving empty search results.
+            raise CommandError(
+                "Cannot create vector indexes: "
+                f"{source}. Configure a system embedding model with "
+                "vector_dimensions set, or pass --dimensions N explicitly."
+            )

        self.stdout.write(
            self.style.HTTP_INFO(
@@ -84,7 +156,7 @@ class Command(BaseCommand):
            )
        )

-        # Vector index definitions (dynamic dimensions)
+        # Vector index definitions (dimensions resolved above)
        vector_indexes = [
            ("chunk_embedding_index", "Chunk", "embedding", dimensions, "cosine"),
            ("concept_embedding_index", "Concept", "embedding", dimensions, "cosine"),
@@ -98,11 +170,27 @@ class Command(BaseCommand):
            self._drop_indexes(existing_indexes, vector_indexes)
            existing_indexes = self._get_existing_indexes()

+        dim_mismatch = False
+
        # Create vector indexes
        for name, label, prop, dims, similarity in vector_indexes:
            if name in existing_indexes:
+                existing_dim = _existing_vector_index_dimensions(name)
+                if existing_dim is not None and existing_dim != dims:
+                    dim_mismatch = True
+                    self.stderr.write(
+                        self.style.ERROR(
+                            f"Vector index '{name}' exists at {existing_dim} "
+                            f"dimensions but the system model requires {dims}. "
+                            "Re-run with --drop and re-embed all content."
+                        )
+                    )
+                    continue
                self.stdout.write(
-                    self.style.NOTICE(f"Vector index '{name}' already exists, skipping")
+                    self.style.NOTICE(
+                        f"Vector index '{name}' already exists "
+                        f"({existing_dim or 'unknown'}d), skipping"
+                    )
                )
                continue
            try:
@@ -164,6 +252,20 @@ class Command(BaseCommand):
                self.style.ERROR(f"Failed to install neomodel labels: {e}")
            )

+        if dim_mismatch:
+            # Exit non-zero so the `init` sidecar's compose dependency marks
+            # the stack startup as failed. Admin must re-run with --drop and
+            # re-embed content.
+            self.stderr.write(
+                self.style.ERROR(
+                    "\nOne or more vector indexes exist at the wrong "
+                    "dimension. Search will return empty or garbage results "
+                    "until you run: setup_neo4j_indexes --drop "
+                    "and re-embed all content."
+                )
+            )
+            sys.exit(2)
+
        self.stdout.write(self.style.SUCCESS("\nNeo4j index setup complete."))

    def _get_existing_indexes(self):
--- a/mnemosyne/library/services/reranker.py
+++ b/mnemosyne/library/services/reranker.py
@@ -79,7 +79,13 @@ class RerankerClient:
                query, candidates, instruction, top_n, query_image
            )

-            url = f"{self.base_url}/v1/rerank"
+            # Convention shared with every other service client in Mnemosyne
+            # (embedding_client, vision, concepts): ``base_url`` is the
+            # OpenAI-style ``/v1`` root (e.g. ``http://pan.helu.ca:8400/v1``),
+            # and each client appends a path-only segment. Prepending
+            # ``/v1`` here would build ``…/v1/v1/rerank`` and 404 against
+            # Synesis.
+            url = f"{self.base_url}/rerank"
            headers = {"Content-Type": "application/json"}
            if self.api.api_key:
                headers["Authorization"] = f"Bearer {self.api.api_key}"
--- a/mnemosyne/library/services/search.py
+++ b/mnemosyne/library/services/search.py
@@ -99,7 +99,17 @@ class SearchRequest:

@dataclass
 class SearchResponse:
-    """Results from a search query."""
+    """Results from a search query.
+
+    ``reranker_skip_reason`` is set when the caller requested
+    ``rerank=True`` but the re-ranking step did not actually run — for
+    example because no system reranker model is configured, the Synesis
+    HTTP call raised, or the fused candidate list was empty. It is
+    ``None`` both when re-ranking succeeded (``reranker_used=True``) and
+    when the caller asked for ``rerank=False`` — callers / templates
+    should distinguish those two "no-reason" cases by looking at the
+    original request's ``rerank`` flag.
+    """

    query: str
    candidates: list[SearchCandidate]
@@ -109,6 +119,7 @@ class SearchResponse:
    reranker_used: bool
    reranker_model: Optional[str]
    search_types_used: list[str]
+    reranker_skip_reason: Optional[str] = None


 class SearchService:
@@ -186,13 +197,19 @@ class SearchService:
        # --- Re-rank ---
        reranker_used = False
        reranker_model_name = None
+        reranker_skip_reason: Optional[str] = None

-        if request.rerank and fused:
-            reranked, model_name = self._rerank(request, fused)
-            if reranked is not None:
-                fused = reranked
-                reranker_used = True
-                reranker_model_name = model_name
+        if request.rerank:
+            if not fused:
+                reranker_skip_reason = "no_candidates"
+            else:
+                reranked, model_name, skip_reason = self._rerank(request, fused)
+                if reranked is not None:
+                    fused = reranked
+                    reranker_used = True
+                    reranker_model_name = model_name
+                else:
+                    reranker_skip_reason = skip_reason

        # Trim to limit
        fused = fused[: request.limit]
@@ -225,6 +242,7 @@ class SearchService:
            reranker_used=reranker_used,
            reranker_model=reranker_model_name,
            search_types_used=search_types_used,
+            reranker_skip_reason=reranker_skip_reason,
        )

    # ------------------------------------------------------------------
@@ -720,13 +738,28 @@ class SearchService:

    def _rerank(
        self, request: SearchRequest, candidates: list[SearchCandidate]
-    ) -> tuple[Optional[list[SearchCandidate]], Optional[str]]:
+    ) -> tuple[Optional[list[SearchCandidate]], Optional[str], Optional[str]]:
        """
        Re-rank candidates via Synesis.

        :param request: SearchRequest.
        :param candidates: Fused candidates to re-rank.
-        :returns: Tuple of (reranked_candidates, model_name) or (None, None).
+        :returns: Tuple of ``(reranked_candidates, model_name, skip_reason)``.
+
+            On success the first element is the reranked list, the second
+            is the model name, and the third is ``None``.
+
+            On skip the first two are ``None`` and the third is a short
+            machine-readable reason:
+
+            * ``"no_system_model"`` — no ``LLMModel.is_system_reranker_model``
+              configured.
+            * ``"api_error: <truncated msg>"`` — ``RerankerClient.rerank``
+              raised (HTTP error, network error, malformed response).
+
+            The reason is intended for display on the search page so the
+            user can see *why* re-ranking didn't happen, without having to
+            grep server logs.
        """
        from llm_manager.models import LLMModel

@@ -735,7 +768,7 @@ class SearchService:
        reranker_model = LLMModel.get_system_reranker_model()
        if not reranker_model:
            logger.debug("No system reranker model — skipping re-ranking")
-            return None, None
+            return None, None, "no_system_model"

        # Get content-type reranker instruction
        instruction = self._get_reranker_instruction(request, candidates)
@@ -755,12 +788,17 @@ class SearchService:
                instruction=instruction,
                query_image=request.query_image,
            )
-            return reranked, reranker_model.name
+            return reranked, reranker_model.name, None
        except Exception as exc:
            logger.warning(
                "Re-ranking failed, returning fusion results: %s", exc
            )
-            return None, None
+            # Truncate the exception message to keep the UI tooltip /
+            # JSON payload bounded; full detail is in the WARNING log.
+            msg = str(exc)
+            if len(msg) > 200:
+                msg = msg[:197] + "..."
+            return None, None, f"api_error: {msg}"

    # ------------------------------------------------------------------
    # Helpers
--- a/mnemosyne/library/templates/library/library_detail.html
+++ b/mnemosyne/library/templates/library/library_detail.html
@@ -146,9 +146,23 @@
                    {% if results_reranked.reranker_used %}
                        <span class="badge badge-success">{{ results_reranked.reranker_model|default:"on" }}</span>
                    {% else %}
-                        <span class="badge badge-warning">unavailable</span>
+                        {# The A/B page always requests rerank=True, so a
+                           negative here is always a skip with a reason —
+                           surface the reason in a tooltip so the user
+                           knows *why* the B side fell back to fusion
+                           order. #}
+                        <span class="badge badge-warning"
+                              title="{{ results_reranked.reranker_skip_reason|default:'unknown' }}">
+                            unavailable
+                        </span>
                    {% endif %}
                </div>
+                {% if not results_reranked.reranker_used and results_reranked.reranker_skip_reason %}
+                <div class="stat-desc text-warning text-xs mt-1 max-w-xs truncate"
+                     title="{{ results_reranked.reranker_skip_reason }}">
+                    {{ results_reranked.reranker_skip_reason }}
+                </div>
+                {% endif %}
            </div>
        </div>

--- a/mnemosyne/library/templates/library/search.html
+++ b/mnemosyne/library/templates/library/search.html
@@ -81,12 +81,30 @@
        <div class="stat">
            <div class="stat-title">Re-ranked</div>
            <div class="stat-value text-lg">
+                {# Three-state indicator:
+                   - Yes: re-ranker ran successfully
+                   - Skipped (+ tooltip): re-rank was requested but the
+                     pipeline bailed — badge colour is a warning so the
+                     user knows something didn't work
+                   - Off: user unchecked the re-rank box
+                #}
                {% if results.reranker_used %}
                    <span class="badge badge-success">Yes</span>
+                {% elif results.reranker_skip_reason %}
+                    <span class="badge badge-warning"
+                          title="{{ results.reranker_skip_reason }}">
+                        Skipped
+                    </span>
                {% else %}
-                    <span class="badge badge-ghost">No</span>
+                    <span class="badge badge-ghost">Off</span>
                {% endif %}
            </div>
+            {% if results.reranker_skip_reason %}
+            <div class="stat-desc text-warning text-xs mt-1 max-w-xs truncate"
+                 title="{{ results.reranker_skip_reason }}">
+                {{ results.reranker_skip_reason }}
+            </div>
+            {% endif %}
        </div>
        <div class="stat">
            <div class="stat-title">Search Types</div>
--- a/mnemosyne/library/tests/test_reranker.py
+++ b/mnemosyne/library/tests/test_reranker.py
@@ -29,10 +29,15 @@ def _make_candidate(chunk_uid: str, text_preview: str = "Some text", **kwargs):


 def _mock_reranker_model():
-    """Create a mock LLMModel for reranking."""
+    """Create a mock LLMModel for reranking.
+
+    ``base_url`` follows the project-wide convention: it is the
+    OpenAI-style ``/v1`` root. Every service client (embedding, vision,
+    concepts, reranker) appends a path-only segment to it.
+    """
    model = MagicMock()
    model.name = "qwen3-vl-reranker-2b"
-    model.api.base_url = "http://pan.helu.ca:8400"
+    model.api.base_url = "http://pan.helu.ca:8400/v1"
    model.api.api_key = ""
    model.api.timeout_seconds = 30
    model.input_cost_per_1k = Decimal("0")
@@ -49,7 +54,7 @@ class RerankerClientInitTest(TestCase):
        client = RerankerClient(model)

        self.assertEqual(client.model_name, "qwen3-vl-reranker-2b")
-        self.assertEqual(client.base_url, "http://pan.helu.ca:8400")
+        self.assertEqual(client.base_url, "http://pan.helu.ca:8400/v1")


 class RerankerClientRerankTest(TestCase):
@@ -198,6 +203,33 @@ class RerankerClientRerankTest(TestCase):
        with self.assertRaises(Exception):
            client.rerank(query="test", candidates=candidates)

+    @patch("library.services.reranker.requests.post")
+    def test_request_url_is_base_plus_rerank(self, mock_post):
+        """URL is ``{base_url}/rerank`` — base_url already contains ``/v1``.
+
+        Regression guard: earlier versions prepended ``/v1/`` inside the
+        client, which produced ``…/v1/v1/rerank`` and 404 against Synesis
+        when the ``LLMApi.base_url`` followed the same convention as the
+        embedding / chat / vision clients.
+        """
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "results": [{"index": 0, "score": 0.5}],
+        }
+        mock_post.return_value = mock_response
+
+        model = _mock_reranker_model()
+        client = RerankerClient(model)
+        candidates = [_make_candidate("a")]
+
+        client.rerank(query="test", candidates=candidates)
+
+        # First positional arg to requests.post is the URL.
+        called_url = mock_post.call_args.args[0] if mock_post.call_args.args \
+            else mock_post.call_args.kwargs.get("url")
+        self.assertEqual(called_url, "http://pan.helu.ca:8400/v1/rerank")
+
    @patch("library.services.reranker.requests.post")
    def test_no_instruction_omits_field(self, mock_post):
        """Empty instruction is not sent in payload."""
--- a/mnemosyne/library/tests/test_search.py
+++ b/mnemosyne/library/tests/test_search.py
@@ -52,7 +52,7 @@ class SearchServiceSearchTest(TestCase):
        ]
        mock_fulltext.return_value = []
        mock_graph.return_value = []
-        mock_rerank.return_value = (None, None)
+        mock_rerank.return_value = (None, None, "no_system_model")
        mock_image.return_value = []

        request = SearchRequest(
@@ -110,7 +110,9 @@ class SearchServiceSearchTest(TestCase):
            chunk_s3_key="s3/key", chunk_index=0, score=0.95,
            source="fulltext",
        )
-        mock_rerank.return_value = ([reranked_candidate], "qwen3-vl-reranker-2b")
+        mock_rerank.return_value = (
+            [reranked_candidate], "qwen3-vl-reranker-2b", None,
+        )

        request = SearchRequest(
            query="test",
@@ -125,6 +127,8 @@ class SearchServiceSearchTest(TestCase):
        self.assertTrue(response.reranker_used)
        self.assertEqual(response.reranker_model, "qwen3-vl-reranker-2b")
        self.assertAlmostEqual(response.candidates[0].score, 0.95)
+        # Successful rerank → no skip reason surfaced to UI / API.
+        self.assertIsNone(response.reranker_skip_reason)

    @patch("library.services.search.SearchService._fulltext_search")
    @patch("library.services.search.SearchService._embed_query")
@@ -152,6 +156,103 @@ class SearchServiceSearchTest(TestCase):

        self.assertFalse(response.reranker_used)
        self.assertIsNone(response.reranker_model)
+        # ``rerank=False`` means "not requested", not "skipped" — so no
+        # reason is reported. Template distinguishes this from the
+        # skip case by looking at the original request's rerank flag.
+        self.assertIsNone(response.reranker_skip_reason)
+
+    @patch("library.services.search.SearchService._rerank")
+    @patch("library.services.search.SearchService._fulltext_search")
+    @patch("library.services.search.SearchService._embed_query")
+    def test_search_reports_skip_reason_no_system_model(
+        self, mock_embed, mock_fulltext, mock_rerank
+    ):
+        """Rerank requested but no system model → ``no_system_model`` surfaced."""
+        mock_embed.return_value = None
+        mock_fulltext.return_value = [
+            SearchCandidate(
+                chunk_uid="c1", item_uid="i1", item_title="Test",
+                library_type="technical", text_preview="preview",
+                chunk_s3_key="s3/key", chunk_index=0, score=0.5,
+                source="fulltext",
+            )
+        ]
+        mock_rerank.return_value = (None, None, "no_system_model")
+
+        request = SearchRequest(
+            query="test",
+            search_types=["fulltext"],
+            rerank=True,
+            include_images=False,
+        )
+
+        service = SearchService()
+        response = service.search(request)
+
+        self.assertFalse(response.reranker_used)
+        self.assertIsNone(response.reranker_model)
+        self.assertEqual(response.reranker_skip_reason, "no_system_model")
+
+    @patch("library.services.search.SearchService._rerank")
+    @patch("library.services.search.SearchService._fulltext_search")
+    @patch("library.services.search.SearchService._embed_query")
+    def test_search_reports_skip_reason_api_error(
+        self, mock_embed, mock_fulltext, mock_rerank
+    ):
+        """Rerank API raising → ``api_error: ...`` surfaced in response."""
+        mock_embed.return_value = None
+        mock_fulltext.return_value = [
+            SearchCandidate(
+                chunk_uid="c1", item_uid="i1", item_title="Test",
+                library_type="technical", text_preview="preview",
+                chunk_s3_key="s3/key", chunk_index=0, score=0.5,
+                source="fulltext",
+            )
+        ]
+        mock_rerank.return_value = (
+            None, None,
+            "api_error: 404 Client Error: Not Found for url: "
+            "http://pan.helu.ca:8400/v1/v1/rerank",
+        )
+
+        request = SearchRequest(
+            query="test",
+            search_types=["fulltext"],
+            rerank=True,
+            include_images=False,
+        )
+
+        service = SearchService()
+        response = service.search(request)
+
+        self.assertFalse(response.reranker_used)
+        self.assertIsNotNone(response.reranker_skip_reason)
+        self.assertTrue(
+            response.reranker_skip_reason.startswith("api_error:"),
+            f"expected api_error: prefix, got {response.reranker_skip_reason!r}",
+        )
+
+    @patch("library.services.search.SearchService._fulltext_search")
+    @patch("library.services.search.SearchService._embed_query")
+    def test_search_reports_skip_reason_no_candidates(
+        self, mock_embed, mock_fulltext
+    ):
+        """Rerank requested but fusion produced nothing → ``no_candidates``."""
+        mock_embed.return_value = None
+        mock_fulltext.return_value = []
+
+        request = SearchRequest(
+            query="test",
+            search_types=["fulltext"],
+            rerank=True,
+            include_images=False,
+        )
+
+        service = SearchService()
+        response = service.search(request)
+
+        self.assertFalse(response.reranker_used)
+        self.assertEqual(response.reranker_skip_reason, "no_candidates")

    @patch("library.services.search.SearchService._fulltext_search")
    @patch("library.services.search.SearchService._embed_query")
--- a/mnemosyne/library/tests/test_search_api.py
+++ b/mnemosyne/library/tests/test_search_api.py
@@ -208,6 +208,41 @@ class SearchAPIResponseTest(TestCase):
        self.assertEqual(call_args.search_types, ["fulltext"])
        self.assertFalse(call_args.rerank)

+    @patch("library.api.views.SearchService")
+    def test_reranker_skip_reason_surfaced_in_json(self, MockService):
+        """``reranker_skip_reason`` propagates through the JSON API."""
+        mock_response = SearchResponse(
+            query="neural networks",
+            candidates=[],
+            images=[],
+            total_candidates=0,
+            search_time_ms=10.0,
+            reranker_used=False,
+            reranker_model=None,
+            search_types_used=[],
+            reranker_skip_reason=(
+                "api_error: 404 Client Error: Not Found for url: "
+                "http://pan.helu.ca:8400/v1/v1/rerank"
+            ),
+        )
+        mock_instance = MockService.return_value
+        mock_instance.search.return_value = mock_response
+
+        response = self.client.post(
+            "/library/api/search/",
+            {"query": "neural networks"},
+            format="json",
+        )
+
+        self.assertEqual(response.status_code, 200)
+        data = response.json()
+        self.assertFalse(data["reranker_used"])
+        self.assertIn("reranker_skip_reason", data)
+        self.assertTrue(
+            data["reranker_skip_reason"].startswith("api_error:"),
+            f"got {data['reranker_skip_reason']!r}",
+        )
+

 class ConceptAPITest(TestCase):
    """Tests for concept API endpoints."""
--- a/mnemosyne/library/tests/test_search_views_admin_scope.py
+++ b/mnemosyne/library/tests/test_search_views_admin_scope.py
@@ -117,11 +117,15 @@ class SearchPageAllowedLibrariesTests(TestCase):
        )
        self.client.force_login(self.user)

-    def _patched_search(self):
+    def _patched_search(self, reranker_skip_reason=None):
        """Return a (request_capture, patch_context) pair.

        The patch captures the ``SearchRequest`` that ``SearchService.search``
        is called with so assertions can run after the view returns.
+
+        :param reranker_skip_reason: Value to set on the stub response's
+            ``reranker_skip_reason`` attribute, for tests that want to
+            exercise the "Skipped" badge rendering path.
        """
        capture: dict = {}

@@ -137,6 +141,7 @@ class SearchPageAllowedLibrariesTests(TestCase):
                reranker_used=False,
                reranker_model=None,
                search_types_used=[],
+                reranker_skip_reason=reranker_skip_reason,
            )

        return capture, patch(
@@ -178,3 +183,117 @@ class SearchPageAllowedLibrariesTests(TestCase):
            )

        self.assertIsNone(capture["request"].allowed_libraries)
+
+
+# ---------------------------------------------------------------------------
+# search_page rerank-status rendering
+# ---------------------------------------------------------------------------
+
+
+class SearchPageRerankBadgeTests(TestCase):
+    """Verify the three-state Re-ranked indicator on the search page.
+
+    The badge must distinguish:
+
+    * Success (``reranker_used=True``) — green "Yes"
+    * Skipped (``rerank=True`` requested but ``reranker_skip_reason`` set)
+      — warning "Skipped" with the reason shown
+    * Off (user unchecked the re-rank box) — ghost "Off"
+
+    This guards the regression that surfaced when Synesis returned 404
+    on a mis-constructed rerank URL: the UI said "No" and gave no hint
+    the re-ranker had actually failed.
+    """
+
+    def setUp(self):
+        self.user = User.objects.create_user(
+            username="admin", email="a@example.com", password="pw"
+        )
+        self.client.force_login(self.user)
+
+    def _run(self, rerank_value, reranker_used, reranker_skip_reason):
+        capture: dict = {}
+
+        def fake_search(self, request):
+            capture["request"] = request
+            return SimpleNamespace(
+                query=request.query,
+                candidates=[],
+                images=[],
+                total_candidates=0,
+                search_time_ms=0.0,
+                reranker_used=reranker_used,
+                reranker_model=None,
+                search_types_used=[],
+                reranker_skip_reason=reranker_skip_reason,
+            )
+
+        post_data = {"query": "postgresql"}
+        if rerank_value is not None:
+            post_data["rerank"] = rerank_value
+
+        with patch("library.views._all_library_uids", return_value=[]), \
+             patch("library.services.search.SearchService.search", fake_search):
+            response = self.client.post(reverse("library:search"), post_data)
+
+        self.assertEqual(response.status_code, 200)
+        return response, capture
+
+    def test_badge_shows_yes_when_rerank_succeeded(self):
+        response, _ = self._run(
+            rerank_value="on",
+            reranker_used=True,
+            reranker_skip_reason=None,
+        )
+        body = response.content.decode()
+        self.assertIn("badge-success", body)
+        self.assertIn(">Yes<", body)
+        self.assertNotIn(">Skipped<", body)
+        self.assertNotIn(">Off<", body)
+
+    def test_badge_shows_skipped_with_reason_on_api_error(self):
+        reason = (
+            "api_error: 404 Client Error: Not Found for url: "
+            "http://pan.helu.ca:8400/v1/v1/rerank"
+        )
+        response, capture = self._run(
+            rerank_value="on",
+            reranker_used=False,
+            reranker_skip_reason=reason,
+        )
+        # Sanity: the view actually requested re-ranking.
+        self.assertTrue(capture["request"].rerank)
+
+        body = response.content.decode()
+        self.assertIn(">Skipped", body)
+        # Reason shown in-page so the user can debug without grepping logs.
+        # Django auto-escapes the colon-space and URL, which is fine.
+        self.assertIn("api_error:", body)
+        self.assertIn("404", body)
+        # Must not claim success.
+        self.assertNotIn(">Yes<", body)
+
+    def test_badge_shows_skipped_on_no_system_model(self):
+        response, _ = self._run(
+            rerank_value="on",
+            reranker_used=False,
+            reranker_skip_reason="no_system_model",
+        )
+        body = response.content.decode()
+        self.assertIn(">Skipped", body)
+        self.assertIn("no_system_model", body)
+
+    def test_badge_shows_off_when_rerank_unchecked(self):
+        # HTML checkbox form: unchecked checkboxes are simply omitted
+        # from the POST body, so we pass rerank_value=None (not "off").
+        response, capture = self._run(
+            rerank_value=None,
+            reranker_used=False,
+            reranker_skip_reason=None,
+        )
+        self.assertFalse(capture["request"].rerank)
+
+        body = response.content.decode()
+        self.assertIn(">Off<", body)
+        self.assertNotIn(">Skipped", body)
+        self.assertNotIn(">Yes<", body)