From a945b382e61882bc284ffb7fa1c7f8ae4b3ce5d6 Mon Sep 17 00:00:00 2001 From: Robert Helewka Date: Sun, 10 May 2026 08:01:58 -0400 Subject: [PATCH] feat: add init sidecar for migrations and setup on compose up Introduces a one-shot `init` service in docker-compose that runs Postgres migrations, Neo4j index setup, and library-type seeding on every `up`. Long-running services (`app`, `mcp`, `worker`) now depend on its successful completion via `service_completed_successfully`, blocking the stack on configuration errors (missing embedding model, dimension mismatch, unreachable DB) rather than serving silent zero-result searches. Also standardizes reranker test fixtures to use the `/v1` OpenAI-style base URL convention used across other service clients. --- docker-compose.yaml | 53 ++++- docker/entrypoint.sh | 13 ++ docs/PHASE_3_SEARCH_AND_RERANKING.md | 16 ++ docs/mnemosyne.html | 32 +-- mnemosyne/library/api/serializers.py | 10 + mnemosyne/library/apps.py | 195 ++++++++++++++++++ .../commands/setup_neo4j_indexes.py | 162 ++++++++++++--- mnemosyne/library/services/reranker.py | 8 +- mnemosyne/library/services/search.py | 62 ++++-- .../templates/library/library_detail.html | 16 +- .../library/templates/library/search.html | 20 +- mnemosyne/library/tests/test_reranker.py | 38 +++- mnemosyne/library/tests/test_search.py | 105 +++++++++- mnemosyne/library/tests/test_search_api.py | 35 ++++ .../tests/test_search_views_admin_scope.py | 121 ++++++++++- 15 files changed, 821 insertions(+), 65 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 66eabef..39cc0f0 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -24,10 +24,17 @@ # # Run: # docker compose up -d -# docker compose run --rm app migrate # one-shot DB migrate -# docker compose run --rm app setup # Neo4j indexes + library types +# +# The `init` sidecar (below) runs Postgres migrations, Neo4j index setup, +# and library-type seeding on every `up`. Long-running services wait for +# it via `depends_on: init: service_completed_successfully` — so a failure +# there (missing embedding model, dimension mismatch, unreachable DB) +# blocks the stack rather than letting it serve silent zero-result +# searches. The standalone `migrate` / `setup` entrypoint commands remain +# available for ad-hoc ops work. # ============================================================================= + services: # ── Static-file seeder: copies /app/staticfiles into the shared volume on # every `up`. Runs once and exits. Without this, the named volume is only @@ -41,6 +48,41 @@ services: - mnemosyne-static:/shared-static restart: "no" + # ── Init sidecar: one-shot Postgres migrate + Neo4j index setup + library + # type seed. Runs on every `up` and exits. Long-running services below + # depend on `service_completed_successfully`, so a failure here (no system + # embedding model configured, dimension mismatch, unreachable DB) blocks + # `app`/`mcp`/`worker` from starting — which is the whole point. All three + # commands are idempotent: re-running is a no-op unless state actually + # needs to change. + # + # This sidecar only needs Postgres, Neo4j, and logging env — no S3, no + # Celery, no LLM encryption key. Keep it that way. + init: + image: git.helu.ca/r/mnemosyne:latest + pull_policy: always + command: ["init"] + environment: + # Django core (settings import) + - DJANGO_SETTINGS_MODULE=mnemosyne.settings + - SECRET_KEY=${SECRET_KEY} + - DEBUG=${DEBUG} + - TIME_ZONE=${TIME_ZONE} + - LANGUAGE_CODE=${LANGUAGE_CODE} + # Postgres (migrate) + - APP_DB_NAME=${APP_DB_NAME} + - APP_DB_USER=${APP_DB_USER} + - APP_DB_PASSWORD=${APP_DB_PASSWORD} + - DB_HOST=${DB_HOST} + - DB_PORT=${DB_PORT} + # Neo4j (setup_neo4j_indexes + load_library_types) + - NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL} + # Logging + - LOGGING_LEVEL=${LOGGING_LEVEL} + - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL} + restart: "no" + + # ── App: Django REST API + admin ────────────────────────────────────────── # Serves /library/api/*, /admin/, /live/, /ready/, /metrics. Enqueues # Celery tasks (hence CELERY_BROKER_URL is required here too — Django is @@ -103,6 +145,8 @@ services: depends_on: static-init: condition: service_completed_successfully + init: + condition: service_completed_successfully volumes: - mnemosyne-media:/app/media healthcheck: @@ -112,6 +156,7 @@ services: retries: 3 start_period: 30s + # ── MCP server: FastMCP Streamable HTTP at /mcp/ ─────────────────────────── # Read-only LLM-facing surface. Intentionally excluded: # CELERY_BROKER_URL — MCP must not enqueue tasks @@ -171,6 +216,9 @@ services: - LOGGING_LEVEL=${LOGGING_LEVEL} - DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL} restart: unless-stopped + depends_on: + init: + condition: service_completed_successfully volumes: - mnemosyne-media:/app/media healthcheck: @@ -180,6 +228,7 @@ services: retries: 3 start_period: 30s + # ── Celery worker: embedding + ingest + batch queues ─────────────────────── # Consumer side of the queue. Needs the full S3 block (reads Daedalus's # bucket, writes to Mnemosyne's), the LLM API encryption key (ingest calls diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index eca079a..2af9b0d 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -55,6 +55,19 @@ case "$1" in python manage.py load_library_types ;; + init) + # Bundled one-shot init run by the `init` sidecar on every + # `docker compose up`. Idempotent: re-runs are no-ops unless migrations + # or indexes need to change. A non-zero exit here blocks `app`, `mcp`, + # and `worker` from starting, which is the point — we'd rather fail + # loudly than serve silent zero-result searches. + set -e + python manage.py migrate --noinput + python manage.py setup_neo4j_indexes + python manage.py load_library_types + ;; + + shell) # Drop into the management shell for ad-hoc work. exec python manage.py shell diff --git a/docs/PHASE_3_SEARCH_AND_RERANKING.md b/docs/PHASE_3_SEARCH_AND_RERANKING.md index e51608d..71c1a22 100644 --- a/docs/PHASE_3_SEARCH_AND_RERANKING.md +++ b/docs/PHASE_3_SEARCH_AND_RERANKING.md @@ -61,6 +61,22 @@ POST http://pan.helu.ca:8400/v1/rerank } ``` +> **`LLMApi.base_url` convention.** Every Mnemosyne service client +> (`EmbeddingClient`, `RerankerClient`, `vision.py`, `concepts.py`) +> treats `base_url` as the **OpenAI-style `/v1` root** and appends a +> path-only segment: `/embeddings`, `/rerank`, `/chat/completions`. +> So a single `LLMApi` row with `base_url=http://pan.helu.ca:8400/v1` +> serves both the embedding and the reranker endpoints — no per-purpose +> duplication needed. +> +> Get this wrong (e.g. set `base_url=http://pan.helu.ca:8400` with no +> `/v1`, or have a client prepend `/v1` locally) and you get a +> double-prefixed URL like `…/v1/v1/rerank` that 404s silently — +> `SearchService._rerank` catches the exception, the UI shows +> "Re-rank: Skipped", and the search falls back to raw RRF order. +> Check `results.reranker_skip_reason` on the search page for the +> specific error. + ## Deliverables ### 1. Search Service (`library/services/search.py`) diff --git a/docs/mnemosyne.html b/docs/mnemosyne.html index e4c990b..34c7aee 100644 --- a/docs/mnemosyne.html +++ b/docs/mnemosyne.html @@ -294,31 +294,37 @@ graph LR
-

Neo4j Vector Indexes

-
// Chunk text+image embeddings (4096 dimensions, no pgvector limits!)
-CREATE VECTOR INDEX chunk_embedding FOR (c:Chunk)
+                

Neo4j Indexes (managed by setup_neo4j_indexes)

+

Created by the init sidecar on every docker compose up. Vector dimensions come from the system embedding model's vector_dimensions field — the command fails if no model is configured. Current production model: Pan Synesis · qwen3-vl-embedding-2b · 2048d.

+
// Chunk text+image embeddings (dimensions read from system embedding model)
+CREATE VECTOR INDEX chunk_embedding_index FOR (c:Chunk)
 ON (c.embedding) OPTIONS {indexConfig: {
-  `vector.dimensions`: 4096,
+  `vector.dimensions`: 2048,
   `vector.similarity_function`: 'cosine'
 }}
 
 // Concept embeddings for semantic concept search
-CREATE VECTOR INDEX concept_embedding FOR (con:Concept)
+CREATE VECTOR INDEX concept_embedding_index FOR (con:Concept)
 ON (con.embedding) OPTIONS {indexConfig: {
-  `vector.dimensions`: 4096,
+  `vector.dimensions`: 2048,
   `vector.similarity_function`: 'cosine'
 }}
 
 // Image multimodal embeddings
-CREATE VECTOR INDEX image_embedding FOR (ie:ImageEmbedding)
+CREATE VECTOR INDEX image_embedding_index FOR (ie:ImageEmbedding)
 ON (ie.embedding) OPTIONS {indexConfig: {
-  `vector.dimensions`: 4096,
+  `vector.dimensions`: 2048,
   `vector.similarity_function`: 'cosine'
 }}
 
-// Full-text index for keyword/BM25-style search
-CREATE FULLTEXT INDEX chunk_fulltext FOR (c:Chunk) ON EACH [c.text_preview]
+// Full-text indexes (BM25-style keyword search) +CREATE FULLTEXT INDEX chunk_text_fulltext FOR (c:Chunk) ON EACH [c.text_preview] +CREATE FULLTEXT INDEX concept_name_fulltext FOR (c:Concept) ON EACH [c.name] +CREATE FULLTEXT INDEX item_title_fulltext FOR (i:Item) ON EACH [i.title] +CREATE FULLTEXT INDEX library_name_fulltext FOR (l:Library) ON EACH [l.name]
+

Changing the embedding model or dimensions is a re-embedding event. Drop + recreate the vector indexes (setup_neo4j_indexes --drop) and re-queue all content for embedding. Old vectors at the previous dimension remain on the nodes until overwritten but are no longer indexed.

+ @@ -521,10 +527,11 @@ flowchart TD

Cosine similarity via Neo4j vector index on Chunk and ImageEmbedding nodes.

CALL db.index.vector.queryNodes(
-  'chunk_embedding', 30,
+  'chunk_embedding_index', 30,
   $query_vector
 ) YIELD node, score
 WHERE score > $threshold
+
@@ -548,9 +555,10 @@ RETURN c2, i2

Neo4j native full-text index for keyword matching (BM25-equivalent).

CALL db.index.fulltext.queryNodes(
-  'chunk_fulltext',
+  'chunk_text_fulltext',
   $query_text
 ) YIELD node, score
+
diff --git a/mnemosyne/library/api/serializers.py b/mnemosyne/library/api/serializers.py index ee5ca32..934724e 100644 --- a/mnemosyne/library/api/serializers.py +++ b/mnemosyne/library/api/serializers.py @@ -150,6 +150,16 @@ class SearchResponseSerializer(serializers.Serializer): reranker_used = serializers.BooleanField() reranker_model = serializers.CharField(allow_null=True) search_types_used = serializers.ListField(child=serializers.CharField()) + # Populated when ``rerank=True`` was requested but the re-ranking + # step did not run — e.g. no system reranker model configured + # (``no_system_model``), the Synesis call raised + # (``api_error: ...``), or fusion produced zero candidates + # (``no_candidates``). ``null`` means either success or that + # re-ranking was not requested. ``required=False`` keeps old + # clients happy. + reranker_skip_reason = serializers.CharField( + allow_null=True, required=False, default=None + ) # --- Workspace lifecycle (Daedalus integration) --- diff --git a/mnemosyne/library/apps.py b/mnemosyne/library/apps.py index 7f6df81..cc69117 100644 --- a/mnemosyne/library/apps.py +++ b/mnemosyne/library/apps.py @@ -1,7 +1,202 @@ +""" +Django AppConfig for the ``library`` app. + +Registers a startup probe that runs once per Python process and yells if +Mnemosyne is misconfigured in a way that would cause silent zero-result +searches — missing embedding model, missing Neo4j index, or a dimension +mismatch between the model and an existing index. Loud ERROR lines are the +only defence against "search works but returns nothing", which is +indistinguishable from "search works and matched nothing" unless you read +the stderr of a different container. + +The probe is deliberately best-effort: it cannot crash the process even if +Neo4j is unreachable, because a transient DB blip on startup should not +take down the whole app. The `init` sidecar is the hard gate; this is the +second line of defence for long-running containers. +""" + +import logging +import os +import sys + from django.apps import AppConfig +logger = logging.getLogger(__name__) + + +# Index names we expect setup_neo4j_indexes to have created. Kept in sync +# with library/management/commands/setup_neo4j_indexes.py. A test asserts +# they stay in sync. +_EXPECTED_VECTOR_INDEXES = ( + "chunk_embedding_index", + "concept_embedding_index", + "image_embedding_index", +) +_EXPECTED_FULLTEXT_INDEXES = ( + "chunk_text_fulltext", + "concept_name_fulltext", + "item_title_fulltext", + "library_name_fulltext", +) + + +def _should_skip_probe() -> bool: + """ + Decide whether to skip the startup probe. + + Skip when: + - Running a management command other than the long-running servers + (migrate, makemigrations, setup_neo4j_indexes, load_library_types, + collectstatic, test, shell). The probe would just spam stderr for + ops work that doesn't care about Neo4j index state. + - Neo4j bolt URL is unset (build-time ``collectstatic`` stubs, CI + unit tests without real infra). + - Running under pytest (any argv contains ``pytest`` or a test runner + env var is set). + """ + if "pytest" in sys.argv[0] or "PYTEST_CURRENT_TEST" in os.environ: + return True + if os.environ.get("DJANGO_SKIP_STARTUP_PROBE") == "1": + return True + # Typical Django command invocations where the probe is noise. + skip_commands = { + "migrate", + "makemigrations", + "setup_neo4j_indexes", + "load_library_types", + "collectstatic", + "test", + "shell", + "check", + "dbshell", + "showmigrations", + "squashmigrations", + "createsuperuser", + "help", + } + if len(sys.argv) >= 2 and sys.argv[1] in skip_commands: + return True + # No Neo4j endpoint configured — probably a build or local dev without + # graph infrastructure. Don't pretend we can check. + if not os.environ.get("NEOMODEL_NEO4J_BOLT_URL"): + return True + return False + + +def _run_startup_probe(): + """ + Emit ERROR/WARNING logs if the stack is misconfigured for search. + + Each check is individually guarded so a single unreachable dependency + doesn't mask the other findings. Returns nothing; side effect is log + output. + """ + from neomodel import db + + # --- 1. System embedding model -------------------------------------- + embedding_dim = None + embedding_model_label = "" + try: + from llm_manager.models import LLMModel + + model = LLMModel.get_system_embedding_model() + if not model: + logger.error( + "No system embedding model configured. Search will return " + "zero results until one is set in the LLM admin." + ) + elif not model.vector_dimensions: + logger.error( + "System embedding model '%s: %s' has no vector_dimensions " + "set. Neo4j vector indexes cannot be validated and search " + "quality will be unpredictable.", + model.api.name, + model.name, + ) + embedding_model_label = f"{model.api.name}: {model.name}" + else: + embedding_dim = model.vector_dimensions + embedding_model_label = f"{model.api.name}: {model.name}" + logger.info( + "System embedding model: %s (%dd)", + embedding_model_label, + embedding_dim, + ) + except Exception as exc: + logger.warning( + "Startup probe could not read system embedding model: %s", exc + ) + + # --- 2. Neo4j indexes present & correctly dimensioned --------------- + try: + results, _ = db.cypher_query( + "SHOW INDEXES YIELD name, type, options RETURN name, type, options" + ) + except Exception as exc: + logger.warning( + "Startup probe could not list Neo4j indexes: %s. Search " + "degradation will only surface at query time.", + exc, + ) + return + + present = {} + for row in results: + name, idx_type, options = row[0], row[1], row[2] + present[name] = (idx_type, options) + + # Missing vector indexes + for name in _EXPECTED_VECTOR_INDEXES: + if name not in present: + logger.error( + "Neo4j vector index '%s' is missing. Run " + "'docker compose run --rm init' (or 'python manage.py " + "setup_neo4j_indexes') to rebuild.", + name, + ) + continue + # Dimension check against the embedding model. + if embedding_dim is None: + continue + idx_type, options = present[name] + config = (options or {}).get("indexConfig") or {} + raw_dim = config.get("vector.dimensions") + try: + existing_dim = int(raw_dim) if raw_dim is not None else None + except (TypeError, ValueError): + existing_dim = None + if existing_dim is not None and existing_dim != embedding_dim: + logger.error( + "Neo4j index '%s' has %d dimensions but system embedding " + "model %s reports %d. Re-run 'setup_neo4j_indexes --drop' " + "and re-embed all content — search will return empty or " + "wrong results until this is fixed.", + name, + existing_dim, + embedding_model_label, + embedding_dim, + ) + + # Missing fulltext indexes + for name in _EXPECTED_FULLTEXT_INDEXES: + if name not in present: + logger.error( + "Neo4j full-text index '%s' is missing. Full-text search " + "will silently return no matches. Run 'setup_neo4j_indexes'.", + name, + ) + class LibraryConfig(AppConfig): default_auto_field = "django.db.models.BigAutoField" name = "library" verbose_name = "Library" + + def ready(self): + if _should_skip_probe(): + return + try: + _run_startup_probe() + except Exception as exc: + # Never let the probe itself take down the process. + logger.warning("Startup probe crashed: %s", exc, exc_info=True) diff --git a/mnemosyne/library/management/commands/setup_neo4j_indexes.py b/mnemosyne/library/management/commands/setup_neo4j_indexes.py index 9a85a1d..7b16aa7 100644 --- a/mnemosyne/library/management/commands/setup_neo4j_indexes.py +++ b/mnemosyne/library/management/commands/setup_neo4j_indexes.py @@ -2,21 +2,28 @@ Management command to create Neo4j indexes for Mnemosyne content graph. Creates: -- Vector indexes (dynamic dimensions from system embedding model) for Chunk, Concept, and ImageEmbedding -- Full-text indexes for text search on Chunk.text_preview and Concept.name -- Constraint indexes enforced by neomodel (unique properties) +- Vector indexes for Chunk, Concept, and ImageEmbedding. Dimensions are read + from the system embedding model — NOT a settings default. If no model is + configured and no ``--dimensions`` override is supplied, the command + fails: a wrong dimension is worse than a clear error. +- Full-text indexes for text search on Chunk.text_preview, Concept.name, + Item.title, Library.name. +- Constraint indexes enforced by neomodel (unique properties). + +When run via the `init` sidecar, a non-zero exit here blocks ``app``/``mcp``/ +``worker`` from starting. That is deliberate: silent zero-result searches +are the failure mode we are trying to prevent. """ import logging +import re +import sys -from django.core.management.base import BaseCommand +from django.core.management.base import BaseCommand, CommandError from neomodel import db logger = logging.getLogger(__name__) -# Default vector dimensions (used when no system embedding model is configured) -DEFAULT_VECTOR_DIMENSIONS = 4096 - # Full-text index definitions: (index_name, label, properties) FULLTEXT_INDEXES = [ ("chunk_text_fulltext", "Chunk", ["text_preview"]), @@ -26,57 +33,122 @@ FULLTEXT_INDEXES = [ ] -def _get_vector_dimensions(): +def _get_vector_dimensions(override: int = 0): """ - Get vector dimensions from the system embedding model. + Resolve the dimension count to build vector indexes at. - Falls back to DEFAULT_VECTOR_DIMENSIONS if no model is configured - or the model has no vector_dimensions set. + Precedence (top wins): + 1. ``--dimensions`` CLI override (positive int) + 2. System embedding model's ``vector_dimensions`` field - :returns: Tuple of (dimensions, source_description). + If neither is available, returns ``(None, reason)`` and the caller must + abort. There is deliberately no hardcoded fallback — an index built at + the wrong dimension silently breaks search forever. + + :param override: Value from the ``--dimensions`` CLI flag (0 if unset). + :returns: Tuple of ``(dimensions, source_description)``. ``dimensions`` + is ``None`` when the value cannot be resolved. """ + if override > 0: + return override, f"CLI override (--dimensions={override})" + try: from llm_manager.models import LLMModel model = LLMModel.get_system_embedding_model() - if model and model.vector_dimensions: - return model.vector_dimensions, f"{model.api.name}: {model.name}" - except Exception: - pass + except Exception as exc: # pragma: no cover - DB unreachable path + return None, f"LLMModel lookup failed: {exc}" - return DEFAULT_VECTOR_DIMENSIONS, "default (no system embedding model)" + if not model: + return None, "no system embedding model configured" + if not model.vector_dimensions: + return ( + None, + f"system embedding model '{model.api.name}: {model.name}' has " + "no vector_dimensions set", + ) + + return model.vector_dimensions, f"{model.api.name}: {model.name}" + + +def _existing_vector_index_dimensions(name: str): + """ + Return the configured dimension count of an existing vector index. + + Neo4j's ``SHOW INDEXES`` returns ``options`` as a map whose + ``indexConfig`` entry holds ``vector.dimensions``. Returns ``None`` if + the index doesn't exist, isn't a vector index, or the dimension cannot + be parsed (e.g. Neo4j version differences). + """ + try: + results, _ = db.cypher_query( + "SHOW INDEXES YIELD name, type, options " + "WHERE name = $name RETURN type, options", + {"name": name}, + ) + except Exception: + return None + + if not results: + return None + + idx_type, options = results[0] + if (idx_type or "").upper() != "VECTOR": + return None + + # ``options`` comes back as a dict in the Python driver. + config = (options or {}).get("indexConfig") or {} + dims = config.get("vector.dimensions") + if dims is None: + return None + try: + return int(dims) + except (TypeError, ValueError): + # Neo4j sometimes returns the value as a string like "4096". + m = re.search(r"\d+", str(dims)) + return int(m.group(0)) if m else None class Command(BaseCommand): help = ( - "Create Neo4j vector, full-text, and constraint indexes " - "for the Mnemosyne content graph. Vector dimensions are read " - "from the system embedding model." + "Create Neo4j vector, full-text, and constraint indexes for the " + "Mnemosyne content graph. Vector dimensions are read from the " + "system embedding model; the command fails if the model is not " + "configured (pass --dimensions only for explicit overrides)." ) def add_arguments(self, parser): parser.add_argument( "--drop", action="store_true", - help="Drop existing indexes before recreating them", + help="Drop existing managed indexes before recreating them.", ) parser.add_argument( "--dimensions", type=int, default=0, - help="Override vector dimensions (default: read from system embedding model)", + help=( + "Override vector dimensions (normally read from the system " + "embedding model). Use with care — an incorrect value " + "silently breaks search." + ), ) def handle(self, *args, **options): drop = options["drop"] override_dims = options["dimensions"] - # Resolve vector dimensions - if override_dims > 0: - dimensions = override_dims - source = f"CLI override ({override_dims})" - else: - dimensions, source = _get_vector_dimensions() + dimensions, source = _get_vector_dimensions(override_dims) + + if dimensions is None: + # Fail loudly. Returning success while skipping vector-index + # creation is exactly how `app`/`mcp`/`worker` end up running + # against an un-indexed Neo4j and serving empty search results. + raise CommandError( + "Cannot create vector indexes: " + f"{source}. Configure a system embedding model with " + "vector_dimensions set, or pass --dimensions N explicitly." + ) self.stdout.write( self.style.HTTP_INFO( @@ -84,7 +156,7 @@ class Command(BaseCommand): ) ) - # Vector index definitions (dynamic dimensions) + # Vector index definitions (dimensions resolved above) vector_indexes = [ ("chunk_embedding_index", "Chunk", "embedding", dimensions, "cosine"), ("concept_embedding_index", "Concept", "embedding", dimensions, "cosine"), @@ -98,11 +170,27 @@ class Command(BaseCommand): self._drop_indexes(existing_indexes, vector_indexes) existing_indexes = self._get_existing_indexes() + dim_mismatch = False + # Create vector indexes for name, label, prop, dims, similarity in vector_indexes: if name in existing_indexes: + existing_dim = _existing_vector_index_dimensions(name) + if existing_dim is not None and existing_dim != dims: + dim_mismatch = True + self.stderr.write( + self.style.ERROR( + f"Vector index '{name}' exists at {existing_dim} " + f"dimensions but the system model requires {dims}. " + "Re-run with --drop and re-embed all content." + ) + ) + continue self.stdout.write( - self.style.NOTICE(f"Vector index '{name}' already exists, skipping") + self.style.NOTICE( + f"Vector index '{name}' already exists " + f"({existing_dim or 'unknown'}d), skipping" + ) ) continue try: @@ -164,6 +252,20 @@ class Command(BaseCommand): self.style.ERROR(f"Failed to install neomodel labels: {e}") ) + if dim_mismatch: + # Exit non-zero so the `init` sidecar's compose dependency marks + # the stack startup as failed. Admin must re-run with --drop and + # re-embed content. + self.stderr.write( + self.style.ERROR( + "\nOne or more vector indexes exist at the wrong " + "dimension. Search will return empty or garbage results " + "until you run: setup_neo4j_indexes --drop " + "and re-embed all content." + ) + ) + sys.exit(2) + self.stdout.write(self.style.SUCCESS("\nNeo4j index setup complete.")) def _get_existing_indexes(self): diff --git a/mnemosyne/library/services/reranker.py b/mnemosyne/library/services/reranker.py index 3dc0bcc..e4674fe 100644 --- a/mnemosyne/library/services/reranker.py +++ b/mnemosyne/library/services/reranker.py @@ -79,7 +79,13 @@ class RerankerClient: query, candidates, instruction, top_n, query_image ) - url = f"{self.base_url}/v1/rerank" + # Convention shared with every other service client in Mnemosyne + # (embedding_client, vision, concepts): ``base_url`` is the + # OpenAI-style ``/v1`` root (e.g. ``http://pan.helu.ca:8400/v1``), + # and each client appends a path-only segment. Prepending + # ``/v1`` here would build ``…/v1/v1/rerank`` and 404 against + # Synesis. + url = f"{self.base_url}/rerank" headers = {"Content-Type": "application/json"} if self.api.api_key: headers["Authorization"] = f"Bearer {self.api.api_key}" diff --git a/mnemosyne/library/services/search.py b/mnemosyne/library/services/search.py index 234ce39..690300a 100644 --- a/mnemosyne/library/services/search.py +++ b/mnemosyne/library/services/search.py @@ -99,7 +99,17 @@ class SearchRequest: @dataclass class SearchResponse: - """Results from a search query.""" + """Results from a search query. + + ``reranker_skip_reason`` is set when the caller requested + ``rerank=True`` but the re-ranking step did not actually run — for + example because no system reranker model is configured, the Synesis + HTTP call raised, or the fused candidate list was empty. It is + ``None`` both when re-ranking succeeded (``reranker_used=True``) and + when the caller asked for ``rerank=False`` — callers / templates + should distinguish those two "no-reason" cases by looking at the + original request's ``rerank`` flag. + """ query: str candidates: list[SearchCandidate] @@ -109,6 +119,7 @@ class SearchResponse: reranker_used: bool reranker_model: Optional[str] search_types_used: list[str] + reranker_skip_reason: Optional[str] = None class SearchService: @@ -186,13 +197,19 @@ class SearchService: # --- Re-rank --- reranker_used = False reranker_model_name = None + reranker_skip_reason: Optional[str] = None - if request.rerank and fused: - reranked, model_name = self._rerank(request, fused) - if reranked is not None: - fused = reranked - reranker_used = True - reranker_model_name = model_name + if request.rerank: + if not fused: + reranker_skip_reason = "no_candidates" + else: + reranked, model_name, skip_reason = self._rerank(request, fused) + if reranked is not None: + fused = reranked + reranker_used = True + reranker_model_name = model_name + else: + reranker_skip_reason = skip_reason # Trim to limit fused = fused[: request.limit] @@ -225,6 +242,7 @@ class SearchService: reranker_used=reranker_used, reranker_model=reranker_model_name, search_types_used=search_types_used, + reranker_skip_reason=reranker_skip_reason, ) # ------------------------------------------------------------------ @@ -720,13 +738,28 @@ class SearchService: def _rerank( self, request: SearchRequest, candidates: list[SearchCandidate] - ) -> tuple[Optional[list[SearchCandidate]], Optional[str]]: + ) -> tuple[Optional[list[SearchCandidate]], Optional[str], Optional[str]]: """ Re-rank candidates via Synesis. :param request: SearchRequest. :param candidates: Fused candidates to re-rank. - :returns: Tuple of (reranked_candidates, model_name) or (None, None). + :returns: Tuple of ``(reranked_candidates, model_name, skip_reason)``. + + On success the first element is the reranked list, the second + is the model name, and the third is ``None``. + + On skip the first two are ``None`` and the third is a short + machine-readable reason: + + * ``"no_system_model"`` — no ``LLMModel.is_system_reranker_model`` + configured. + * ``"api_error: "`` — ``RerankerClient.rerank`` + raised (HTTP error, network error, malformed response). + + The reason is intended for display on the search page so the + user can see *why* re-ranking didn't happen, without having to + grep server logs. """ from llm_manager.models import LLMModel @@ -735,7 +768,7 @@ class SearchService: reranker_model = LLMModel.get_system_reranker_model() if not reranker_model: logger.debug("No system reranker model — skipping re-ranking") - return None, None + return None, None, "no_system_model" # Get content-type reranker instruction instruction = self._get_reranker_instruction(request, candidates) @@ -755,12 +788,17 @@ class SearchService: instruction=instruction, query_image=request.query_image, ) - return reranked, reranker_model.name + return reranked, reranker_model.name, None except Exception as exc: logger.warning( "Re-ranking failed, returning fusion results: %s", exc ) - return None, None + # Truncate the exception message to keep the UI tooltip / + # JSON payload bounded; full detail is in the WARNING log. + msg = str(exc) + if len(msg) > 200: + msg = msg[:197] + "..." + return None, None, f"api_error: {msg}" # ------------------------------------------------------------------ # Helpers diff --git a/mnemosyne/library/templates/library/library_detail.html b/mnemosyne/library/templates/library/library_detail.html index b2ea2f5..470dc7c 100644 --- a/mnemosyne/library/templates/library/library_detail.html +++ b/mnemosyne/library/templates/library/library_detail.html @@ -146,9 +146,23 @@ {% if results_reranked.reranker_used %} {{ results_reranked.reranker_model|default:"on" }} {% else %} - unavailable + {# The A/B page always requests rerank=True, so a + negative here is always a skip with a reason — + surface the reason in a tooltip so the user + knows *why* the B side fell back to fusion + order. #} + + unavailable + {% endif %} + {% if not results_reranked.reranker_used and results_reranked.reranker_skip_reason %} +
+ {{ results_reranked.reranker_skip_reason }} +
+ {% endif %} diff --git a/mnemosyne/library/templates/library/search.html b/mnemosyne/library/templates/library/search.html index 1a7f47c..b7e6b62 100644 --- a/mnemosyne/library/templates/library/search.html +++ b/mnemosyne/library/templates/library/search.html @@ -81,12 +81,30 @@
Re-ranked
+ {# Three-state indicator: + - Yes: re-ranker ran successfully + - Skipped (+ tooltip): re-rank was requested but the + pipeline bailed — badge colour is a warning so the + user knows something didn't work + - Off: user unchecked the re-rank box + #} {% if results.reranker_used %} Yes + {% elif results.reranker_skip_reason %} + + Skipped + {% else %} - No + Off {% endif %}
+ {% if results.reranker_skip_reason %} +
+ {{ results.reranker_skip_reason }} +
+ {% endif %}
Search Types
diff --git a/mnemosyne/library/tests/test_reranker.py b/mnemosyne/library/tests/test_reranker.py index 813699f..7d0f5d8 100644 --- a/mnemosyne/library/tests/test_reranker.py +++ b/mnemosyne/library/tests/test_reranker.py @@ -29,10 +29,15 @@ def _make_candidate(chunk_uid: str, text_preview: str = "Some text", **kwargs): def _mock_reranker_model(): - """Create a mock LLMModel for reranking.""" + """Create a mock LLMModel for reranking. + + ``base_url`` follows the project-wide convention: it is the + OpenAI-style ``/v1`` root. Every service client (embedding, vision, + concepts, reranker) appends a path-only segment to it. + """ model = MagicMock() model.name = "qwen3-vl-reranker-2b" - model.api.base_url = "http://pan.helu.ca:8400" + model.api.base_url = "http://pan.helu.ca:8400/v1" model.api.api_key = "" model.api.timeout_seconds = 30 model.input_cost_per_1k = Decimal("0") @@ -49,7 +54,7 @@ class RerankerClientInitTest(TestCase): client = RerankerClient(model) self.assertEqual(client.model_name, "qwen3-vl-reranker-2b") - self.assertEqual(client.base_url, "http://pan.helu.ca:8400") + self.assertEqual(client.base_url, "http://pan.helu.ca:8400/v1") class RerankerClientRerankTest(TestCase): @@ -198,6 +203,33 @@ class RerankerClientRerankTest(TestCase): with self.assertRaises(Exception): client.rerank(query="test", candidates=candidates) + @patch("library.services.reranker.requests.post") + def test_request_url_is_base_plus_rerank(self, mock_post): + """URL is ``{base_url}/rerank`` — base_url already contains ``/v1``. + + Regression guard: earlier versions prepended ``/v1/`` inside the + client, which produced ``…/v1/v1/rerank`` and 404 against Synesis + when the ``LLMApi.base_url`` followed the same convention as the + embedding / chat / vision clients. + """ + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "results": [{"index": 0, "score": 0.5}], + } + mock_post.return_value = mock_response + + model = _mock_reranker_model() + client = RerankerClient(model) + candidates = [_make_candidate("a")] + + client.rerank(query="test", candidates=candidates) + + # First positional arg to requests.post is the URL. + called_url = mock_post.call_args.args[0] if mock_post.call_args.args \ + else mock_post.call_args.kwargs.get("url") + self.assertEqual(called_url, "http://pan.helu.ca:8400/v1/rerank") + @patch("library.services.reranker.requests.post") def test_no_instruction_omits_field(self, mock_post): """Empty instruction is not sent in payload.""" diff --git a/mnemosyne/library/tests/test_search.py b/mnemosyne/library/tests/test_search.py index 5b34ad4..8189563 100644 --- a/mnemosyne/library/tests/test_search.py +++ b/mnemosyne/library/tests/test_search.py @@ -52,7 +52,7 @@ class SearchServiceSearchTest(TestCase): ] mock_fulltext.return_value = [] mock_graph.return_value = [] - mock_rerank.return_value = (None, None) + mock_rerank.return_value = (None, None, "no_system_model") mock_image.return_value = [] request = SearchRequest( @@ -110,7 +110,9 @@ class SearchServiceSearchTest(TestCase): chunk_s3_key="s3/key", chunk_index=0, score=0.95, source="fulltext", ) - mock_rerank.return_value = ([reranked_candidate], "qwen3-vl-reranker-2b") + mock_rerank.return_value = ( + [reranked_candidate], "qwen3-vl-reranker-2b", None, + ) request = SearchRequest( query="test", @@ -125,6 +127,8 @@ class SearchServiceSearchTest(TestCase): self.assertTrue(response.reranker_used) self.assertEqual(response.reranker_model, "qwen3-vl-reranker-2b") self.assertAlmostEqual(response.candidates[0].score, 0.95) + # Successful rerank → no skip reason surfaced to UI / API. + self.assertIsNone(response.reranker_skip_reason) @patch("library.services.search.SearchService._fulltext_search") @patch("library.services.search.SearchService._embed_query") @@ -152,6 +156,103 @@ class SearchServiceSearchTest(TestCase): self.assertFalse(response.reranker_used) self.assertIsNone(response.reranker_model) + # ``rerank=False`` means "not requested", not "skipped" — so no + # reason is reported. Template distinguishes this from the + # skip case by looking at the original request's rerank flag. + self.assertIsNone(response.reranker_skip_reason) + + @patch("library.services.search.SearchService._rerank") + @patch("library.services.search.SearchService._fulltext_search") + @patch("library.services.search.SearchService._embed_query") + def test_search_reports_skip_reason_no_system_model( + self, mock_embed, mock_fulltext, mock_rerank + ): + """Rerank requested but no system model → ``no_system_model`` surfaced.""" + mock_embed.return_value = None + mock_fulltext.return_value = [ + SearchCandidate( + chunk_uid="c1", item_uid="i1", item_title="Test", + library_type="technical", text_preview="preview", + chunk_s3_key="s3/key", chunk_index=0, score=0.5, + source="fulltext", + ) + ] + mock_rerank.return_value = (None, None, "no_system_model") + + request = SearchRequest( + query="test", + search_types=["fulltext"], + rerank=True, + include_images=False, + ) + + service = SearchService() + response = service.search(request) + + self.assertFalse(response.reranker_used) + self.assertIsNone(response.reranker_model) + self.assertEqual(response.reranker_skip_reason, "no_system_model") + + @patch("library.services.search.SearchService._rerank") + @patch("library.services.search.SearchService._fulltext_search") + @patch("library.services.search.SearchService._embed_query") + def test_search_reports_skip_reason_api_error( + self, mock_embed, mock_fulltext, mock_rerank + ): + """Rerank API raising → ``api_error: ...`` surfaced in response.""" + mock_embed.return_value = None + mock_fulltext.return_value = [ + SearchCandidate( + chunk_uid="c1", item_uid="i1", item_title="Test", + library_type="technical", text_preview="preview", + chunk_s3_key="s3/key", chunk_index=0, score=0.5, + source="fulltext", + ) + ] + mock_rerank.return_value = ( + None, None, + "api_error: 404 Client Error: Not Found for url: " + "http://pan.helu.ca:8400/v1/v1/rerank", + ) + + request = SearchRequest( + query="test", + search_types=["fulltext"], + rerank=True, + include_images=False, + ) + + service = SearchService() + response = service.search(request) + + self.assertFalse(response.reranker_used) + self.assertIsNotNone(response.reranker_skip_reason) + self.assertTrue( + response.reranker_skip_reason.startswith("api_error:"), + f"expected api_error: prefix, got {response.reranker_skip_reason!r}", + ) + + @patch("library.services.search.SearchService._fulltext_search") + @patch("library.services.search.SearchService._embed_query") + def test_search_reports_skip_reason_no_candidates( + self, mock_embed, mock_fulltext + ): + """Rerank requested but fusion produced nothing → ``no_candidates``.""" + mock_embed.return_value = None + mock_fulltext.return_value = [] + + request = SearchRequest( + query="test", + search_types=["fulltext"], + rerank=True, + include_images=False, + ) + + service = SearchService() + response = service.search(request) + + self.assertFalse(response.reranker_used) + self.assertEqual(response.reranker_skip_reason, "no_candidates") @patch("library.services.search.SearchService._fulltext_search") @patch("library.services.search.SearchService._embed_query") diff --git a/mnemosyne/library/tests/test_search_api.py b/mnemosyne/library/tests/test_search_api.py index bbf3d2e..23e1ab6 100644 --- a/mnemosyne/library/tests/test_search_api.py +++ b/mnemosyne/library/tests/test_search_api.py @@ -208,6 +208,41 @@ class SearchAPIResponseTest(TestCase): self.assertEqual(call_args.search_types, ["fulltext"]) self.assertFalse(call_args.rerank) + @patch("library.api.views.SearchService") + def test_reranker_skip_reason_surfaced_in_json(self, MockService): + """``reranker_skip_reason`` propagates through the JSON API.""" + mock_response = SearchResponse( + query="neural networks", + candidates=[], + images=[], + total_candidates=0, + search_time_ms=10.0, + reranker_used=False, + reranker_model=None, + search_types_used=[], + reranker_skip_reason=( + "api_error: 404 Client Error: Not Found for url: " + "http://pan.helu.ca:8400/v1/v1/rerank" + ), + ) + mock_instance = MockService.return_value + mock_instance.search.return_value = mock_response + + response = self.client.post( + "/library/api/search/", + {"query": "neural networks"}, + format="json", + ) + + self.assertEqual(response.status_code, 200) + data = response.json() + self.assertFalse(data["reranker_used"]) + self.assertIn("reranker_skip_reason", data) + self.assertTrue( + data["reranker_skip_reason"].startswith("api_error:"), + f"got {data['reranker_skip_reason']!r}", + ) + class ConceptAPITest(TestCase): """Tests for concept API endpoints.""" diff --git a/mnemosyne/library/tests/test_search_views_admin_scope.py b/mnemosyne/library/tests/test_search_views_admin_scope.py index e70903e..76914eb 100644 --- a/mnemosyne/library/tests/test_search_views_admin_scope.py +++ b/mnemosyne/library/tests/test_search_views_admin_scope.py @@ -117,11 +117,15 @@ class SearchPageAllowedLibrariesTests(TestCase): ) self.client.force_login(self.user) - def _patched_search(self): + def _patched_search(self, reranker_skip_reason=None): """Return a (request_capture, patch_context) pair. The patch captures the ``SearchRequest`` that ``SearchService.search`` is called with so assertions can run after the view returns. + + :param reranker_skip_reason: Value to set on the stub response's + ``reranker_skip_reason`` attribute, for tests that want to + exercise the "Skipped" badge rendering path. """ capture: dict = {} @@ -137,6 +141,7 @@ class SearchPageAllowedLibrariesTests(TestCase): reranker_used=False, reranker_model=None, search_types_used=[], + reranker_skip_reason=reranker_skip_reason, ) return capture, patch( @@ -178,3 +183,117 @@ class SearchPageAllowedLibrariesTests(TestCase): ) self.assertIsNone(capture["request"].allowed_libraries) + + +# --------------------------------------------------------------------------- +# search_page rerank-status rendering +# --------------------------------------------------------------------------- + + +class SearchPageRerankBadgeTests(TestCase): + """Verify the three-state Re-ranked indicator on the search page. + + The badge must distinguish: + + * Success (``reranker_used=True``) — green "Yes" + * Skipped (``rerank=True`` requested but ``reranker_skip_reason`` set) + — warning "Skipped" with the reason shown + * Off (user unchecked the re-rank box) — ghost "Off" + + This guards the regression that surfaced when Synesis returned 404 + on a mis-constructed rerank URL: the UI said "No" and gave no hint + the re-ranker had actually failed. + """ + + def setUp(self): + self.user = User.objects.create_user( + username="admin", email="a@example.com", password="pw" + ) + self.client.force_login(self.user) + + def _run(self, rerank_value, reranker_used, reranker_skip_reason): + capture: dict = {} + + def fake_search(self, request): + capture["request"] = request + return SimpleNamespace( + query=request.query, + candidates=[], + images=[], + total_candidates=0, + search_time_ms=0.0, + reranker_used=reranker_used, + reranker_model=None, + search_types_used=[], + reranker_skip_reason=reranker_skip_reason, + ) + + post_data = {"query": "postgresql"} + if rerank_value is not None: + post_data["rerank"] = rerank_value + + with patch("library.views._all_library_uids", return_value=[]), \ + patch("library.services.search.SearchService.search", fake_search): + response = self.client.post(reverse("library:search"), post_data) + + self.assertEqual(response.status_code, 200) + return response, capture + + def test_badge_shows_yes_when_rerank_succeeded(self): + response, _ = self._run( + rerank_value="on", + reranker_used=True, + reranker_skip_reason=None, + ) + body = response.content.decode() + self.assertIn("badge-success", body) + self.assertIn(">Yes<", body) + self.assertNotIn(">Skipped<", body) + self.assertNotIn(">Off<", body) + + def test_badge_shows_skipped_with_reason_on_api_error(self): + reason = ( + "api_error: 404 Client Error: Not Found for url: " + "http://pan.helu.ca:8400/v1/v1/rerank" + ) + response, capture = self._run( + rerank_value="on", + reranker_used=False, + reranker_skip_reason=reason, + ) + # Sanity: the view actually requested re-ranking. + self.assertTrue(capture["request"].rerank) + + body = response.content.decode() + self.assertIn(">Skipped", body) + # Reason shown in-page so the user can debug without grepping logs. + # Django auto-escapes the colon-space and URL, which is fine. + self.assertIn("api_error:", body) + self.assertIn("404", body) + # Must not claim success. + self.assertNotIn(">Yes<", body) + + def test_badge_shows_skipped_on_no_system_model(self): + response, _ = self._run( + rerank_value="on", + reranker_used=False, + reranker_skip_reason="no_system_model", + ) + body = response.content.decode() + self.assertIn(">Skipped", body) + self.assertIn("no_system_model", body) + + def test_badge_shows_off_when_rerank_unchecked(self): + # HTML checkbox form: unchecked checkboxes are simply omitted + # from the POST body, so we pass rerank_value=None (not "off"). + response, capture = self._run( + rerank_value=None, + reranker_used=False, + reranker_skip_reason=None, + ) + self.assertFalse(capture["request"].rerank) + + body = response.content.decode() + self.assertIn(">Off<", body) + self.assertNotIn(">Skipped", body) + self.assertNotIn(">Yes<", body)