feat: add init sidecar for migrations and setup on compose up
Introduces a one-shot `init` service in docker-compose that runs Postgres migrations, Neo4j index setup, and library-type seeding on every `up`. Long-running services (`app`, `mcp`, `worker`) now depend on its successful completion via `service_completed_successfully`, blocking the stack on configuration errors (missing embedding model, dimension mismatch, unreachable DB) rather than serving silent zero-result searches. Also standardizes reranker test fixtures to use the `/v1` OpenAI-style base URL convention used across other service clients.
This commit is contained in:
@@ -24,10 +24,17 @@
|
||||
#
|
||||
# Run:
|
||||
# docker compose up -d
|
||||
# docker compose run --rm app migrate # one-shot DB migrate
|
||||
# docker compose run --rm app setup # Neo4j indexes + library types
|
||||
#
|
||||
# The `init` sidecar (below) runs Postgres migrations, Neo4j index setup,
|
||||
# and library-type seeding on every `up`. Long-running services wait for
|
||||
# it via `depends_on: init: service_completed_successfully` — so a failure
|
||||
# there (missing embedding model, dimension mismatch, unreachable DB)
|
||||
# blocks the stack rather than letting it serve silent zero-result
|
||||
# searches. The standalone `migrate` / `setup` entrypoint commands remain
|
||||
# available for ad-hoc ops work.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
services:
|
||||
# ── Static-file seeder: copies /app/staticfiles into the shared volume on
|
||||
# every `up`. Runs once and exits. Without this, the named volume is only
|
||||
@@ -41,6 +48,41 @@ services:
|
||||
- mnemosyne-static:/shared-static
|
||||
restart: "no"
|
||||
|
||||
# ── Init sidecar: one-shot Postgres migrate + Neo4j index setup + library
|
||||
# type seed. Runs on every `up` and exits. Long-running services below
|
||||
# depend on `service_completed_successfully`, so a failure here (no system
|
||||
# embedding model configured, dimension mismatch, unreachable DB) blocks
|
||||
# `app`/`mcp`/`worker` from starting — which is the whole point. All three
|
||||
# commands are idempotent: re-running is a no-op unless state actually
|
||||
# needs to change.
|
||||
#
|
||||
# This sidecar only needs Postgres, Neo4j, and logging env — no S3, no
|
||||
# Celery, no LLM encryption key. Keep it that way.
|
||||
init:
|
||||
image: git.helu.ca/r/mnemosyne:latest
|
||||
pull_policy: always
|
||||
command: ["init"]
|
||||
environment:
|
||||
# Django core (settings import)
|
||||
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
|
||||
- SECRET_KEY=${SECRET_KEY}
|
||||
- DEBUG=${DEBUG}
|
||||
- TIME_ZONE=${TIME_ZONE}
|
||||
- LANGUAGE_CODE=${LANGUAGE_CODE}
|
||||
# Postgres (migrate)
|
||||
- APP_DB_NAME=${APP_DB_NAME}
|
||||
- APP_DB_USER=${APP_DB_USER}
|
||||
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
|
||||
- DB_HOST=${DB_HOST}
|
||||
- DB_PORT=${DB_PORT}
|
||||
# Neo4j (setup_neo4j_indexes + load_library_types)
|
||||
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
|
||||
# Logging
|
||||
- LOGGING_LEVEL=${LOGGING_LEVEL}
|
||||
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
|
||||
restart: "no"
|
||||
|
||||
|
||||
# ── App: Django REST API + admin ──────────────────────────────────────────
|
||||
# Serves /library/api/*, /admin/, /live/, /ready/, /metrics. Enqueues
|
||||
# Celery tasks (hence CELERY_BROKER_URL is required here too — Django is
|
||||
@@ -103,6 +145,8 @@ services:
|
||||
depends_on:
|
||||
static-init:
|
||||
condition: service_completed_successfully
|
||||
init:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- mnemosyne-media:/app/media
|
||||
healthcheck:
|
||||
@@ -112,6 +156,7 @@ services:
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
|
||||
# ── MCP server: FastMCP Streamable HTTP at /mcp/ ───────────────────────────
|
||||
# Read-only LLM-facing surface. Intentionally excluded:
|
||||
# CELERY_BROKER_URL — MCP must not enqueue tasks
|
||||
@@ -171,6 +216,9 @@ services:
|
||||
- LOGGING_LEVEL=${LOGGING_LEVEL}
|
||||
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
init:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- mnemosyne-media:/app/media
|
||||
healthcheck:
|
||||
@@ -180,6 +228,7 @@ services:
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
|
||||
# ── Celery worker: embedding + ingest + batch queues ───────────────────────
|
||||
# Consumer side of the queue. Needs the full S3 block (reads Daedalus's
|
||||
# bucket, writes to Mnemosyne's), the LLM API encryption key (ingest calls
|
||||
|
||||
@@ -55,6 +55,19 @@ case "$1" in
|
||||
python manage.py load_library_types
|
||||
;;
|
||||
|
||||
init)
|
||||
# Bundled one-shot init run by the `init` sidecar on every
|
||||
# `docker compose up`. Idempotent: re-runs are no-ops unless migrations
|
||||
# or indexes need to change. A non-zero exit here blocks `app`, `mcp`,
|
||||
# and `worker` from starting, which is the point — we'd rather fail
|
||||
# loudly than serve silent zero-result searches.
|
||||
set -e
|
||||
python manage.py migrate --noinput
|
||||
python manage.py setup_neo4j_indexes
|
||||
python manage.py load_library_types
|
||||
;;
|
||||
|
||||
|
||||
shell)
|
||||
# Drop into the management shell for ad-hoc work.
|
||||
exec python manage.py shell
|
||||
|
||||
@@ -61,6 +61,22 @@ POST http://pan.helu.ca:8400/v1/rerank
|
||||
}
|
||||
```
|
||||
|
||||
> **`LLMApi.base_url` convention.** Every Mnemosyne service client
|
||||
> (`EmbeddingClient`, `RerankerClient`, `vision.py`, `concepts.py`)
|
||||
> treats `base_url` as the **OpenAI-style `/v1` root** and appends a
|
||||
> path-only segment: `/embeddings`, `/rerank`, `/chat/completions`.
|
||||
> So a single `LLMApi` row with `base_url=http://pan.helu.ca:8400/v1`
|
||||
> serves both the embedding and the reranker endpoints — no per-purpose
|
||||
> duplication needed.
|
||||
>
|
||||
> Get this wrong (e.g. set `base_url=http://pan.helu.ca:8400` with no
|
||||
> `/v1`, or have a client prepend `/v1` locally) and you get a
|
||||
> double-prefixed URL like `…/v1/v1/rerank` that 404s silently —
|
||||
> `SearchService._rerank` catches the exception, the UI shows
|
||||
> "Re-rank: Skipped", and the search falls back to raw RRF order.
|
||||
> Check `results.reranker_skip_reason` on the search page for the
|
||||
> specific error.
|
||||
|
||||
## Deliverables
|
||||
|
||||
### 1. Search Service (`library/services/search.py`)
|
||||
|
||||
@@ -294,31 +294,37 @@ graph LR
|
||||
</div>
|
||||
|
||||
<div class="alert alert-warning border-start border-4 border-warning">
|
||||
<h4><i class="bi bi-lightning"></i> Neo4j Vector Indexes</h4>
|
||||
<pre class="bg-light p-3 rounded mb-0"><code>// Chunk text+image embeddings (4096 dimensions, no pgvector limits!)
|
||||
CREATE VECTOR INDEX chunk_embedding FOR (c:Chunk)
|
||||
<h4><i class="bi bi-lightning"></i> Neo4j Indexes (managed by <code>setup_neo4j_indexes</code>)</h4>
|
||||
<p>Created by the <code>init</code> sidecar on every <code>docker compose up</code>. Vector dimensions come from the system embedding model's <code>vector_dimensions</code> field — the command fails if no model is configured. Current production model: <strong>Pan Synesis · qwen3-vl-embedding-2b · 2048d</strong>.</p>
|
||||
<pre class="bg-light p-3 rounded mb-0"><code>// Chunk text+image embeddings (dimensions read from system embedding model)
|
||||
CREATE VECTOR INDEX chunk_embedding_index FOR (c:Chunk)
|
||||
ON (c.embedding) OPTIONS {indexConfig: {
|
||||
`vector.dimensions`: 4096,
|
||||
`vector.dimensions`: 2048,
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
|
||||
// Concept embeddings for semantic concept search
|
||||
CREATE VECTOR INDEX concept_embedding FOR (con:Concept)
|
||||
CREATE VECTOR INDEX concept_embedding_index FOR (con:Concept)
|
||||
ON (con.embedding) OPTIONS {indexConfig: {
|
||||
`vector.dimensions`: 4096,
|
||||
`vector.dimensions`: 2048,
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
|
||||
// Image multimodal embeddings
|
||||
CREATE VECTOR INDEX image_embedding FOR (ie:ImageEmbedding)
|
||||
CREATE VECTOR INDEX image_embedding_index FOR (ie:ImageEmbedding)
|
||||
ON (ie.embedding) OPTIONS {indexConfig: {
|
||||
`vector.dimensions`: 4096,
|
||||
`vector.dimensions`: 2048,
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
|
||||
// Full-text index for keyword/BM25-style search
|
||||
CREATE FULLTEXT INDEX chunk_fulltext FOR (c:Chunk) ON EACH [c.text_preview]</code></pre>
|
||||
// Full-text indexes (BM25-style keyword search)
|
||||
CREATE FULLTEXT INDEX chunk_text_fulltext FOR (c:Chunk) ON EACH [c.text_preview]
|
||||
CREATE FULLTEXT INDEX concept_name_fulltext FOR (c:Concept) ON EACH [c.name]
|
||||
CREATE FULLTEXT INDEX item_title_fulltext FOR (i:Item) ON EACH [i.title]
|
||||
CREATE FULLTEXT INDEX library_name_fulltext FOR (l:Library) ON EACH [l.name]</code></pre>
|
||||
<p class="mb-0 mt-3"><strong>Changing the embedding model or dimensions is a re-embedding event.</strong> Drop + recreate the vector indexes (<code>setup_neo4j_indexes --drop</code>) and re-queue all content for embedding. Old vectors at the previous dimension remain on the nodes until overwritten but are no longer indexed.</p>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
<!-- SECTION: CONTENT TYPES -->
|
||||
@@ -521,10 +527,11 @@ flowchart TD
|
||||
<div class="card-body">
|
||||
<p>Cosine similarity via Neo4j vector index on Chunk and ImageEmbedding nodes.</p>
|
||||
<pre class="bg-light p-2 rounded"><code>CALL db.index.vector.queryNodes(
|
||||
'chunk_embedding', 30,
|
||||
'chunk_embedding_index', 30,
|
||||
$query_vector
|
||||
) YIELD node, score
|
||||
WHERE score > $threshold</code></pre>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -548,9 +555,10 @@ RETURN c2, i2</code></pre>
|
||||
<div class="card-body">
|
||||
<p>Neo4j native full-text index for keyword matching (BM25-equivalent).</p>
|
||||
<pre class="bg-light p-2 rounded"><code>CALL db.index.fulltext.queryNodes(
|
||||
'chunk_fulltext',
|
||||
'chunk_text_fulltext',
|
||||
$query_text
|
||||
) YIELD node, score</code></pre>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -150,6 +150,16 @@ class SearchResponseSerializer(serializers.Serializer):
|
||||
reranker_used = serializers.BooleanField()
|
||||
reranker_model = serializers.CharField(allow_null=True)
|
||||
search_types_used = serializers.ListField(child=serializers.CharField())
|
||||
# Populated when ``rerank=True`` was requested but the re-ranking
|
||||
# step did not run — e.g. no system reranker model configured
|
||||
# (``no_system_model``), the Synesis call raised
|
||||
# (``api_error: ...``), or fusion produced zero candidates
|
||||
# (``no_candidates``). ``null`` means either success or that
|
||||
# re-ranking was not requested. ``required=False`` keeps old
|
||||
# clients happy.
|
||||
reranker_skip_reason = serializers.CharField(
|
||||
allow_null=True, required=False, default=None
|
||||
)
|
||||
|
||||
|
||||
# --- Workspace lifecycle (Daedalus integration) ---
|
||||
|
||||
@@ -1,7 +1,202 @@
|
||||
"""
|
||||
Django AppConfig for the ``library`` app.
|
||||
|
||||
Registers a startup probe that runs once per Python process and yells if
|
||||
Mnemosyne is misconfigured in a way that would cause silent zero-result
|
||||
searches — missing embedding model, missing Neo4j index, or a dimension
|
||||
mismatch between the model and an existing index. Loud ERROR lines are the
|
||||
only defence against "search works but returns nothing", which is
|
||||
indistinguishable from "search works and matched nothing" unless you read
|
||||
the stderr of a different container.
|
||||
|
||||
The probe is deliberately best-effort: it cannot crash the process even if
|
||||
Neo4j is unreachable, because a transient DB blip on startup should not
|
||||
take down the whole app. The `init` sidecar is the hard gate; this is the
|
||||
second line of defence for long-running containers.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from django.apps import AppConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Index names we expect setup_neo4j_indexes to have created. Kept in sync
|
||||
# with library/management/commands/setup_neo4j_indexes.py. A test asserts
|
||||
# they stay in sync.
|
||||
_EXPECTED_VECTOR_INDEXES = (
|
||||
"chunk_embedding_index",
|
||||
"concept_embedding_index",
|
||||
"image_embedding_index",
|
||||
)
|
||||
_EXPECTED_FULLTEXT_INDEXES = (
|
||||
"chunk_text_fulltext",
|
||||
"concept_name_fulltext",
|
||||
"item_title_fulltext",
|
||||
"library_name_fulltext",
|
||||
)
|
||||
|
||||
|
||||
def _should_skip_probe() -> bool:
|
||||
"""
|
||||
Decide whether to skip the startup probe.
|
||||
|
||||
Skip when:
|
||||
- Running a management command other than the long-running servers
|
||||
(migrate, makemigrations, setup_neo4j_indexes, load_library_types,
|
||||
collectstatic, test, shell). The probe would just spam stderr for
|
||||
ops work that doesn't care about Neo4j index state.
|
||||
- Neo4j bolt URL is unset (build-time ``collectstatic`` stubs, CI
|
||||
unit tests without real infra).
|
||||
- Running under pytest (any argv contains ``pytest`` or a test runner
|
||||
env var is set).
|
||||
"""
|
||||
if "pytest" in sys.argv[0] or "PYTEST_CURRENT_TEST" in os.environ:
|
||||
return True
|
||||
if os.environ.get("DJANGO_SKIP_STARTUP_PROBE") == "1":
|
||||
return True
|
||||
# Typical Django command invocations where the probe is noise.
|
||||
skip_commands = {
|
||||
"migrate",
|
||||
"makemigrations",
|
||||
"setup_neo4j_indexes",
|
||||
"load_library_types",
|
||||
"collectstatic",
|
||||
"test",
|
||||
"shell",
|
||||
"check",
|
||||
"dbshell",
|
||||
"showmigrations",
|
||||
"squashmigrations",
|
||||
"createsuperuser",
|
||||
"help",
|
||||
}
|
||||
if len(sys.argv) >= 2 and sys.argv[1] in skip_commands:
|
||||
return True
|
||||
# No Neo4j endpoint configured — probably a build or local dev without
|
||||
# graph infrastructure. Don't pretend we can check.
|
||||
if not os.environ.get("NEOMODEL_NEO4J_BOLT_URL"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _run_startup_probe():
|
||||
"""
|
||||
Emit ERROR/WARNING logs if the stack is misconfigured for search.
|
||||
|
||||
Each check is individually guarded so a single unreachable dependency
|
||||
doesn't mask the other findings. Returns nothing; side effect is log
|
||||
output.
|
||||
"""
|
||||
from neomodel import db
|
||||
|
||||
# --- 1. System embedding model --------------------------------------
|
||||
embedding_dim = None
|
||||
embedding_model_label = "<unknown>"
|
||||
try:
|
||||
from llm_manager.models import LLMModel
|
||||
|
||||
model = LLMModel.get_system_embedding_model()
|
||||
if not model:
|
||||
logger.error(
|
||||
"No system embedding model configured. Search will return "
|
||||
"zero results until one is set in the LLM admin."
|
||||
)
|
||||
elif not model.vector_dimensions:
|
||||
logger.error(
|
||||
"System embedding model '%s: %s' has no vector_dimensions "
|
||||
"set. Neo4j vector indexes cannot be validated and search "
|
||||
"quality will be unpredictable.",
|
||||
model.api.name,
|
||||
model.name,
|
||||
)
|
||||
embedding_model_label = f"{model.api.name}: {model.name}"
|
||||
else:
|
||||
embedding_dim = model.vector_dimensions
|
||||
embedding_model_label = f"{model.api.name}: {model.name}"
|
||||
logger.info(
|
||||
"System embedding model: %s (%dd)",
|
||||
embedding_model_label,
|
||||
embedding_dim,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Startup probe could not read system embedding model: %s", exc
|
||||
)
|
||||
|
||||
# --- 2. Neo4j indexes present & correctly dimensioned ---------------
|
||||
try:
|
||||
results, _ = db.cypher_query(
|
||||
"SHOW INDEXES YIELD name, type, options RETURN name, type, options"
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Startup probe could not list Neo4j indexes: %s. Search "
|
||||
"degradation will only surface at query time.",
|
||||
exc,
|
||||
)
|
||||
return
|
||||
|
||||
present = {}
|
||||
for row in results:
|
||||
name, idx_type, options = row[0], row[1], row[2]
|
||||
present[name] = (idx_type, options)
|
||||
|
||||
# Missing vector indexes
|
||||
for name in _EXPECTED_VECTOR_INDEXES:
|
||||
if name not in present:
|
||||
logger.error(
|
||||
"Neo4j vector index '%s' is missing. Run "
|
||||
"'docker compose run --rm init' (or 'python manage.py "
|
||||
"setup_neo4j_indexes') to rebuild.",
|
||||
name,
|
||||
)
|
||||
continue
|
||||
# Dimension check against the embedding model.
|
||||
if embedding_dim is None:
|
||||
continue
|
||||
idx_type, options = present[name]
|
||||
config = (options or {}).get("indexConfig") or {}
|
||||
raw_dim = config.get("vector.dimensions")
|
||||
try:
|
||||
existing_dim = int(raw_dim) if raw_dim is not None else None
|
||||
except (TypeError, ValueError):
|
||||
existing_dim = None
|
||||
if existing_dim is not None and existing_dim != embedding_dim:
|
||||
logger.error(
|
||||
"Neo4j index '%s' has %d dimensions but system embedding "
|
||||
"model %s reports %d. Re-run 'setup_neo4j_indexes --drop' "
|
||||
"and re-embed all content — search will return empty or "
|
||||
"wrong results until this is fixed.",
|
||||
name,
|
||||
existing_dim,
|
||||
embedding_model_label,
|
||||
embedding_dim,
|
||||
)
|
||||
|
||||
# Missing fulltext indexes
|
||||
for name in _EXPECTED_FULLTEXT_INDEXES:
|
||||
if name not in present:
|
||||
logger.error(
|
||||
"Neo4j full-text index '%s' is missing. Full-text search "
|
||||
"will silently return no matches. Run 'setup_neo4j_indexes'.",
|
||||
name,
|
||||
)
|
||||
|
||||
|
||||
class LibraryConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "library"
|
||||
verbose_name = "Library"
|
||||
|
||||
def ready(self):
|
||||
if _should_skip_probe():
|
||||
return
|
||||
try:
|
||||
_run_startup_probe()
|
||||
except Exception as exc:
|
||||
# Never let the probe itself take down the process.
|
||||
logger.warning("Startup probe crashed: %s", exc, exc_info=True)
|
||||
|
||||
@@ -2,21 +2,28 @@
|
||||
Management command to create Neo4j indexes for Mnemosyne content graph.
|
||||
|
||||
Creates:
|
||||
- Vector indexes (dynamic dimensions from system embedding model) for Chunk, Concept, and ImageEmbedding
|
||||
- Full-text indexes for text search on Chunk.text_preview and Concept.name
|
||||
- Constraint indexes enforced by neomodel (unique properties)
|
||||
- Vector indexes for Chunk, Concept, and ImageEmbedding. Dimensions are read
|
||||
from the system embedding model — NOT a settings default. If no model is
|
||||
configured and no ``--dimensions`` override is supplied, the command
|
||||
fails: a wrong dimension is worse than a clear error.
|
||||
- Full-text indexes for text search on Chunk.text_preview, Concept.name,
|
||||
Item.title, Library.name.
|
||||
- Constraint indexes enforced by neomodel (unique properties).
|
||||
|
||||
When run via the `init` sidecar, a non-zero exit here blocks ``app``/``mcp``/
|
||||
``worker`` from starting. That is deliberate: silent zero-result searches
|
||||
are the failure mode we are trying to prevent.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from neomodel import db
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default vector dimensions (used when no system embedding model is configured)
|
||||
DEFAULT_VECTOR_DIMENSIONS = 4096
|
||||
|
||||
# Full-text index definitions: (index_name, label, properties)
|
||||
FULLTEXT_INDEXES = [
|
||||
("chunk_text_fulltext", "Chunk", ["text_preview"]),
|
||||
@@ -26,57 +33,122 @@ FULLTEXT_INDEXES = [
|
||||
]
|
||||
|
||||
|
||||
def _get_vector_dimensions():
|
||||
def _get_vector_dimensions(override: int = 0):
|
||||
"""
|
||||
Get vector dimensions from the system embedding model.
|
||||
Resolve the dimension count to build vector indexes at.
|
||||
|
||||
Falls back to DEFAULT_VECTOR_DIMENSIONS if no model is configured
|
||||
or the model has no vector_dimensions set.
|
||||
Precedence (top wins):
|
||||
1. ``--dimensions`` CLI override (positive int)
|
||||
2. System embedding model's ``vector_dimensions`` field
|
||||
|
||||
:returns: Tuple of (dimensions, source_description).
|
||||
If neither is available, returns ``(None, reason)`` and the caller must
|
||||
abort. There is deliberately no hardcoded fallback — an index built at
|
||||
the wrong dimension silently breaks search forever.
|
||||
|
||||
:param override: Value from the ``--dimensions`` CLI flag (0 if unset).
|
||||
:returns: Tuple of ``(dimensions, source_description)``. ``dimensions``
|
||||
is ``None`` when the value cannot be resolved.
|
||||
"""
|
||||
if override > 0:
|
||||
return override, f"CLI override (--dimensions={override})"
|
||||
|
||||
try:
|
||||
from llm_manager.models import LLMModel
|
||||
|
||||
model = LLMModel.get_system_embedding_model()
|
||||
if model and model.vector_dimensions:
|
||||
return model.vector_dimensions, f"{model.api.name}: {model.name}"
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc: # pragma: no cover - DB unreachable path
|
||||
return None, f"LLMModel lookup failed: {exc}"
|
||||
|
||||
return DEFAULT_VECTOR_DIMENSIONS, "default (no system embedding model)"
|
||||
if not model:
|
||||
return None, "no system embedding model configured"
|
||||
if not model.vector_dimensions:
|
||||
return (
|
||||
None,
|
||||
f"system embedding model '{model.api.name}: {model.name}' has "
|
||||
"no vector_dimensions set",
|
||||
)
|
||||
|
||||
return model.vector_dimensions, f"{model.api.name}: {model.name}"
|
||||
|
||||
|
||||
def _existing_vector_index_dimensions(name: str):
|
||||
"""
|
||||
Return the configured dimension count of an existing vector index.
|
||||
|
||||
Neo4j's ``SHOW INDEXES`` returns ``options`` as a map whose
|
||||
``indexConfig`` entry holds ``vector.dimensions``. Returns ``None`` if
|
||||
the index doesn't exist, isn't a vector index, or the dimension cannot
|
||||
be parsed (e.g. Neo4j version differences).
|
||||
"""
|
||||
try:
|
||||
results, _ = db.cypher_query(
|
||||
"SHOW INDEXES YIELD name, type, options "
|
||||
"WHERE name = $name RETURN type, options",
|
||||
{"name": name},
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if not results:
|
||||
return None
|
||||
|
||||
idx_type, options = results[0]
|
||||
if (idx_type or "").upper() != "VECTOR":
|
||||
return None
|
||||
|
||||
# ``options`` comes back as a dict in the Python driver.
|
||||
config = (options or {}).get("indexConfig") or {}
|
||||
dims = config.get("vector.dimensions")
|
||||
if dims is None:
|
||||
return None
|
||||
try:
|
||||
return int(dims)
|
||||
except (TypeError, ValueError):
|
||||
# Neo4j sometimes returns the value as a string like "4096".
|
||||
m = re.search(r"\d+", str(dims))
|
||||
return int(m.group(0)) if m else None
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = (
|
||||
"Create Neo4j vector, full-text, and constraint indexes "
|
||||
"for the Mnemosyne content graph. Vector dimensions are read "
|
||||
"from the system embedding model."
|
||||
"Create Neo4j vector, full-text, and constraint indexes for the "
|
||||
"Mnemosyne content graph. Vector dimensions are read from the "
|
||||
"system embedding model; the command fails if the model is not "
|
||||
"configured (pass --dimensions only for explicit overrides)."
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--drop",
|
||||
action="store_true",
|
||||
help="Drop existing indexes before recreating them",
|
||||
help="Drop existing managed indexes before recreating them.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dimensions",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Override vector dimensions (default: read from system embedding model)",
|
||||
help=(
|
||||
"Override vector dimensions (normally read from the system "
|
||||
"embedding model). Use with care — an incorrect value "
|
||||
"silently breaks search."
|
||||
),
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
drop = options["drop"]
|
||||
override_dims = options["dimensions"]
|
||||
|
||||
# Resolve vector dimensions
|
||||
if override_dims > 0:
|
||||
dimensions = override_dims
|
||||
source = f"CLI override ({override_dims})"
|
||||
else:
|
||||
dimensions, source = _get_vector_dimensions()
|
||||
dimensions, source = _get_vector_dimensions(override_dims)
|
||||
|
||||
if dimensions is None:
|
||||
# Fail loudly. Returning success while skipping vector-index
|
||||
# creation is exactly how `app`/`mcp`/`worker` end up running
|
||||
# against an un-indexed Neo4j and serving empty search results.
|
||||
raise CommandError(
|
||||
"Cannot create vector indexes: "
|
||||
f"{source}. Configure a system embedding model with "
|
||||
"vector_dimensions set, or pass --dimensions N explicitly."
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
self.style.HTTP_INFO(
|
||||
@@ -84,7 +156,7 @@ class Command(BaseCommand):
|
||||
)
|
||||
)
|
||||
|
||||
# Vector index definitions (dynamic dimensions)
|
||||
# Vector index definitions (dimensions resolved above)
|
||||
vector_indexes = [
|
||||
("chunk_embedding_index", "Chunk", "embedding", dimensions, "cosine"),
|
||||
("concept_embedding_index", "Concept", "embedding", dimensions, "cosine"),
|
||||
@@ -98,11 +170,27 @@ class Command(BaseCommand):
|
||||
self._drop_indexes(existing_indexes, vector_indexes)
|
||||
existing_indexes = self._get_existing_indexes()
|
||||
|
||||
dim_mismatch = False
|
||||
|
||||
# Create vector indexes
|
||||
for name, label, prop, dims, similarity in vector_indexes:
|
||||
if name in existing_indexes:
|
||||
existing_dim = _existing_vector_index_dimensions(name)
|
||||
if existing_dim is not None and existing_dim != dims:
|
||||
dim_mismatch = True
|
||||
self.stderr.write(
|
||||
self.style.ERROR(
|
||||
f"Vector index '{name}' exists at {existing_dim} "
|
||||
f"dimensions but the system model requires {dims}. "
|
||||
"Re-run with --drop and re-embed all content."
|
||||
)
|
||||
)
|
||||
continue
|
||||
self.stdout.write(
|
||||
self.style.NOTICE(f"Vector index '{name}' already exists, skipping")
|
||||
self.style.NOTICE(
|
||||
f"Vector index '{name}' already exists "
|
||||
f"({existing_dim or 'unknown'}d), skipping"
|
||||
)
|
||||
)
|
||||
continue
|
||||
try:
|
||||
@@ -164,6 +252,20 @@ class Command(BaseCommand):
|
||||
self.style.ERROR(f"Failed to install neomodel labels: {e}")
|
||||
)
|
||||
|
||||
if dim_mismatch:
|
||||
# Exit non-zero so the `init` sidecar's compose dependency marks
|
||||
# the stack startup as failed. Admin must re-run with --drop and
|
||||
# re-embed content.
|
||||
self.stderr.write(
|
||||
self.style.ERROR(
|
||||
"\nOne or more vector indexes exist at the wrong "
|
||||
"dimension. Search will return empty or garbage results "
|
||||
"until you run: setup_neo4j_indexes --drop "
|
||||
"and re-embed all content."
|
||||
)
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
self.stdout.write(self.style.SUCCESS("\nNeo4j index setup complete."))
|
||||
|
||||
def _get_existing_indexes(self):
|
||||
|
||||
@@ -79,7 +79,13 @@ class RerankerClient:
|
||||
query, candidates, instruction, top_n, query_image
|
||||
)
|
||||
|
||||
url = f"{self.base_url}/v1/rerank"
|
||||
# Convention shared with every other service client in Mnemosyne
|
||||
# (embedding_client, vision, concepts): ``base_url`` is the
|
||||
# OpenAI-style ``/v1`` root (e.g. ``http://pan.helu.ca:8400/v1``),
|
||||
# and each client appends a path-only segment. Prepending
|
||||
# ``/v1`` here would build ``…/v1/v1/rerank`` and 404 against
|
||||
# Synesis.
|
||||
url = f"{self.base_url}/rerank"
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.api.api_key:
|
||||
headers["Authorization"] = f"Bearer {self.api.api_key}"
|
||||
|
||||
@@ -99,7 +99,17 @@ class SearchRequest:
|
||||
|
||||
@dataclass
|
||||
class SearchResponse:
|
||||
"""Results from a search query."""
|
||||
"""Results from a search query.
|
||||
|
||||
``reranker_skip_reason`` is set when the caller requested
|
||||
``rerank=True`` but the re-ranking step did not actually run — for
|
||||
example because no system reranker model is configured, the Synesis
|
||||
HTTP call raised, or the fused candidate list was empty. It is
|
||||
``None`` both when re-ranking succeeded (``reranker_used=True``) and
|
||||
when the caller asked for ``rerank=False`` — callers / templates
|
||||
should distinguish those two "no-reason" cases by looking at the
|
||||
original request's ``rerank`` flag.
|
||||
"""
|
||||
|
||||
query: str
|
||||
candidates: list[SearchCandidate]
|
||||
@@ -109,6 +119,7 @@ class SearchResponse:
|
||||
reranker_used: bool
|
||||
reranker_model: Optional[str]
|
||||
search_types_used: list[str]
|
||||
reranker_skip_reason: Optional[str] = None
|
||||
|
||||
|
||||
class SearchService:
|
||||
@@ -186,13 +197,19 @@ class SearchService:
|
||||
# --- Re-rank ---
|
||||
reranker_used = False
|
||||
reranker_model_name = None
|
||||
reranker_skip_reason: Optional[str] = None
|
||||
|
||||
if request.rerank and fused:
|
||||
reranked, model_name = self._rerank(request, fused)
|
||||
if reranked is not None:
|
||||
fused = reranked
|
||||
reranker_used = True
|
||||
reranker_model_name = model_name
|
||||
if request.rerank:
|
||||
if not fused:
|
||||
reranker_skip_reason = "no_candidates"
|
||||
else:
|
||||
reranked, model_name, skip_reason = self._rerank(request, fused)
|
||||
if reranked is not None:
|
||||
fused = reranked
|
||||
reranker_used = True
|
||||
reranker_model_name = model_name
|
||||
else:
|
||||
reranker_skip_reason = skip_reason
|
||||
|
||||
# Trim to limit
|
||||
fused = fused[: request.limit]
|
||||
@@ -225,6 +242,7 @@ class SearchService:
|
||||
reranker_used=reranker_used,
|
||||
reranker_model=reranker_model_name,
|
||||
search_types_used=search_types_used,
|
||||
reranker_skip_reason=reranker_skip_reason,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -720,13 +738,28 @@ class SearchService:
|
||||
|
||||
def _rerank(
|
||||
self, request: SearchRequest, candidates: list[SearchCandidate]
|
||||
) -> tuple[Optional[list[SearchCandidate]], Optional[str]]:
|
||||
) -> tuple[Optional[list[SearchCandidate]], Optional[str], Optional[str]]:
|
||||
"""
|
||||
Re-rank candidates via Synesis.
|
||||
|
||||
:param request: SearchRequest.
|
||||
:param candidates: Fused candidates to re-rank.
|
||||
:returns: Tuple of (reranked_candidates, model_name) or (None, None).
|
||||
:returns: Tuple of ``(reranked_candidates, model_name, skip_reason)``.
|
||||
|
||||
On success the first element is the reranked list, the second
|
||||
is the model name, and the third is ``None``.
|
||||
|
||||
On skip the first two are ``None`` and the third is a short
|
||||
machine-readable reason:
|
||||
|
||||
* ``"no_system_model"`` — no ``LLMModel.is_system_reranker_model``
|
||||
configured.
|
||||
* ``"api_error: <truncated msg>"`` — ``RerankerClient.rerank``
|
||||
raised (HTTP error, network error, malformed response).
|
||||
|
||||
The reason is intended for display on the search page so the
|
||||
user can see *why* re-ranking didn't happen, without having to
|
||||
grep server logs.
|
||||
"""
|
||||
from llm_manager.models import LLMModel
|
||||
|
||||
@@ -735,7 +768,7 @@ class SearchService:
|
||||
reranker_model = LLMModel.get_system_reranker_model()
|
||||
if not reranker_model:
|
||||
logger.debug("No system reranker model — skipping re-ranking")
|
||||
return None, None
|
||||
return None, None, "no_system_model"
|
||||
|
||||
# Get content-type reranker instruction
|
||||
instruction = self._get_reranker_instruction(request, candidates)
|
||||
@@ -755,12 +788,17 @@ class SearchService:
|
||||
instruction=instruction,
|
||||
query_image=request.query_image,
|
||||
)
|
||||
return reranked, reranker_model.name
|
||||
return reranked, reranker_model.name, None
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Re-ranking failed, returning fusion results: %s", exc
|
||||
)
|
||||
return None, None
|
||||
# Truncate the exception message to keep the UI tooltip /
|
||||
# JSON payload bounded; full detail is in the WARNING log.
|
||||
msg = str(exc)
|
||||
if len(msg) > 200:
|
||||
msg = msg[:197] + "..."
|
||||
return None, None, f"api_error: {msg}"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Helpers
|
||||
|
||||
@@ -146,9 +146,23 @@
|
||||
{% if results_reranked.reranker_used %}
|
||||
<span class="badge badge-success">{{ results_reranked.reranker_model|default:"on" }}</span>
|
||||
{% else %}
|
||||
<span class="badge badge-warning">unavailable</span>
|
||||
{# The A/B page always requests rerank=True, so a
|
||||
negative here is always a skip with a reason —
|
||||
surface the reason in a tooltip so the user
|
||||
knows *why* the B side fell back to fusion
|
||||
order. #}
|
||||
<span class="badge badge-warning"
|
||||
title="{{ results_reranked.reranker_skip_reason|default:'unknown' }}">
|
||||
unavailable
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% if not results_reranked.reranker_used and results_reranked.reranker_skip_reason %}
|
||||
<div class="stat-desc text-warning text-xs mt-1 max-w-xs truncate"
|
||||
title="{{ results_reranked.reranker_skip_reason }}">
|
||||
{{ results_reranked.reranker_skip_reason }}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -81,12 +81,30 @@
|
||||
<div class="stat">
|
||||
<div class="stat-title">Re-ranked</div>
|
||||
<div class="stat-value text-lg">
|
||||
{# Three-state indicator:
|
||||
- Yes: re-ranker ran successfully
|
||||
- Skipped (+ tooltip): re-rank was requested but the
|
||||
pipeline bailed — badge colour is a warning so the
|
||||
user knows something didn't work
|
||||
- Off: user unchecked the re-rank box
|
||||
#}
|
||||
{% if results.reranker_used %}
|
||||
<span class="badge badge-success">Yes</span>
|
||||
{% elif results.reranker_skip_reason %}
|
||||
<span class="badge badge-warning"
|
||||
title="{{ results.reranker_skip_reason }}">
|
||||
Skipped
|
||||
</span>
|
||||
{% else %}
|
||||
<span class="badge badge-ghost">No</span>
|
||||
<span class="badge badge-ghost">Off</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% if results.reranker_skip_reason %}
|
||||
<div class="stat-desc text-warning text-xs mt-1 max-w-xs truncate"
|
||||
title="{{ results.reranker_skip_reason }}">
|
||||
{{ results.reranker_skip_reason }}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-title">Search Types</div>
|
||||
|
||||
@@ -29,10 +29,15 @@ def _make_candidate(chunk_uid: str, text_preview: str = "Some text", **kwargs):
|
||||
|
||||
|
||||
def _mock_reranker_model():
|
||||
"""Create a mock LLMModel for reranking."""
|
||||
"""Create a mock LLMModel for reranking.
|
||||
|
||||
``base_url`` follows the project-wide convention: it is the
|
||||
OpenAI-style ``/v1`` root. Every service client (embedding, vision,
|
||||
concepts, reranker) appends a path-only segment to it.
|
||||
"""
|
||||
model = MagicMock()
|
||||
model.name = "qwen3-vl-reranker-2b"
|
||||
model.api.base_url = "http://pan.helu.ca:8400"
|
||||
model.api.base_url = "http://pan.helu.ca:8400/v1"
|
||||
model.api.api_key = ""
|
||||
model.api.timeout_seconds = 30
|
||||
model.input_cost_per_1k = Decimal("0")
|
||||
@@ -49,7 +54,7 @@ class RerankerClientInitTest(TestCase):
|
||||
client = RerankerClient(model)
|
||||
|
||||
self.assertEqual(client.model_name, "qwen3-vl-reranker-2b")
|
||||
self.assertEqual(client.base_url, "http://pan.helu.ca:8400")
|
||||
self.assertEqual(client.base_url, "http://pan.helu.ca:8400/v1")
|
||||
|
||||
|
||||
class RerankerClientRerankTest(TestCase):
|
||||
@@ -198,6 +203,33 @@ class RerankerClientRerankTest(TestCase):
|
||||
with self.assertRaises(Exception):
|
||||
client.rerank(query="test", candidates=candidates)
|
||||
|
||||
@patch("library.services.reranker.requests.post")
|
||||
def test_request_url_is_base_plus_rerank(self, mock_post):
|
||||
"""URL is ``{base_url}/rerank`` — base_url already contains ``/v1``.
|
||||
|
||||
Regression guard: earlier versions prepended ``/v1/`` inside the
|
||||
client, which produced ``…/v1/v1/rerank`` and 404 against Synesis
|
||||
when the ``LLMApi.base_url`` followed the same convention as the
|
||||
embedding / chat / vision clients.
|
||||
"""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"results": [{"index": 0, "score": 0.5}],
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
model = _mock_reranker_model()
|
||||
client = RerankerClient(model)
|
||||
candidates = [_make_candidate("a")]
|
||||
|
||||
client.rerank(query="test", candidates=candidates)
|
||||
|
||||
# First positional arg to requests.post is the URL.
|
||||
called_url = mock_post.call_args.args[0] if mock_post.call_args.args \
|
||||
else mock_post.call_args.kwargs.get("url")
|
||||
self.assertEqual(called_url, "http://pan.helu.ca:8400/v1/rerank")
|
||||
|
||||
@patch("library.services.reranker.requests.post")
|
||||
def test_no_instruction_omits_field(self, mock_post):
|
||||
"""Empty instruction is not sent in payload."""
|
||||
|
||||
@@ -52,7 +52,7 @@ class SearchServiceSearchTest(TestCase):
|
||||
]
|
||||
mock_fulltext.return_value = []
|
||||
mock_graph.return_value = []
|
||||
mock_rerank.return_value = (None, None)
|
||||
mock_rerank.return_value = (None, None, "no_system_model")
|
||||
mock_image.return_value = []
|
||||
|
||||
request = SearchRequest(
|
||||
@@ -110,7 +110,9 @@ class SearchServiceSearchTest(TestCase):
|
||||
chunk_s3_key="s3/key", chunk_index=0, score=0.95,
|
||||
source="fulltext",
|
||||
)
|
||||
mock_rerank.return_value = ([reranked_candidate], "qwen3-vl-reranker-2b")
|
||||
mock_rerank.return_value = (
|
||||
[reranked_candidate], "qwen3-vl-reranker-2b", None,
|
||||
)
|
||||
|
||||
request = SearchRequest(
|
||||
query="test",
|
||||
@@ -125,6 +127,8 @@ class SearchServiceSearchTest(TestCase):
|
||||
self.assertTrue(response.reranker_used)
|
||||
self.assertEqual(response.reranker_model, "qwen3-vl-reranker-2b")
|
||||
self.assertAlmostEqual(response.candidates[0].score, 0.95)
|
||||
# Successful rerank → no skip reason surfaced to UI / API.
|
||||
self.assertIsNone(response.reranker_skip_reason)
|
||||
|
||||
@patch("library.services.search.SearchService._fulltext_search")
|
||||
@patch("library.services.search.SearchService._embed_query")
|
||||
@@ -152,6 +156,103 @@ class SearchServiceSearchTest(TestCase):
|
||||
|
||||
self.assertFalse(response.reranker_used)
|
||||
self.assertIsNone(response.reranker_model)
|
||||
# ``rerank=False`` means "not requested", not "skipped" — so no
|
||||
# reason is reported. Template distinguishes this from the
|
||||
# skip case by looking at the original request's rerank flag.
|
||||
self.assertIsNone(response.reranker_skip_reason)
|
||||
|
||||
@patch("library.services.search.SearchService._rerank")
|
||||
@patch("library.services.search.SearchService._fulltext_search")
|
||||
@patch("library.services.search.SearchService._embed_query")
|
||||
def test_search_reports_skip_reason_no_system_model(
|
||||
self, mock_embed, mock_fulltext, mock_rerank
|
||||
):
|
||||
"""Rerank requested but no system model → ``no_system_model`` surfaced."""
|
||||
mock_embed.return_value = None
|
||||
mock_fulltext.return_value = [
|
||||
SearchCandidate(
|
||||
chunk_uid="c1", item_uid="i1", item_title="Test",
|
||||
library_type="technical", text_preview="preview",
|
||||
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
|
||||
source="fulltext",
|
||||
)
|
||||
]
|
||||
mock_rerank.return_value = (None, None, "no_system_model")
|
||||
|
||||
request = SearchRequest(
|
||||
query="test",
|
||||
search_types=["fulltext"],
|
||||
rerank=True,
|
||||
include_images=False,
|
||||
)
|
||||
|
||||
service = SearchService()
|
||||
response = service.search(request)
|
||||
|
||||
self.assertFalse(response.reranker_used)
|
||||
self.assertIsNone(response.reranker_model)
|
||||
self.assertEqual(response.reranker_skip_reason, "no_system_model")
|
||||
|
||||
@patch("library.services.search.SearchService._rerank")
|
||||
@patch("library.services.search.SearchService._fulltext_search")
|
||||
@patch("library.services.search.SearchService._embed_query")
|
||||
def test_search_reports_skip_reason_api_error(
|
||||
self, mock_embed, mock_fulltext, mock_rerank
|
||||
):
|
||||
"""Rerank API raising → ``api_error: ...`` surfaced in response."""
|
||||
mock_embed.return_value = None
|
||||
mock_fulltext.return_value = [
|
||||
SearchCandidate(
|
||||
chunk_uid="c1", item_uid="i1", item_title="Test",
|
||||
library_type="technical", text_preview="preview",
|
||||
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
|
||||
source="fulltext",
|
||||
)
|
||||
]
|
||||
mock_rerank.return_value = (
|
||||
None, None,
|
||||
"api_error: 404 Client Error: Not Found for url: "
|
||||
"http://pan.helu.ca:8400/v1/v1/rerank",
|
||||
)
|
||||
|
||||
request = SearchRequest(
|
||||
query="test",
|
||||
search_types=["fulltext"],
|
||||
rerank=True,
|
||||
include_images=False,
|
||||
)
|
||||
|
||||
service = SearchService()
|
||||
response = service.search(request)
|
||||
|
||||
self.assertFalse(response.reranker_used)
|
||||
self.assertIsNotNone(response.reranker_skip_reason)
|
||||
self.assertTrue(
|
||||
response.reranker_skip_reason.startswith("api_error:"),
|
||||
f"expected api_error: prefix, got {response.reranker_skip_reason!r}",
|
||||
)
|
||||
|
||||
@patch("library.services.search.SearchService._fulltext_search")
|
||||
@patch("library.services.search.SearchService._embed_query")
|
||||
def test_search_reports_skip_reason_no_candidates(
|
||||
self, mock_embed, mock_fulltext
|
||||
):
|
||||
"""Rerank requested but fusion produced nothing → ``no_candidates``."""
|
||||
mock_embed.return_value = None
|
||||
mock_fulltext.return_value = []
|
||||
|
||||
request = SearchRequest(
|
||||
query="test",
|
||||
search_types=["fulltext"],
|
||||
rerank=True,
|
||||
include_images=False,
|
||||
)
|
||||
|
||||
service = SearchService()
|
||||
response = service.search(request)
|
||||
|
||||
self.assertFalse(response.reranker_used)
|
||||
self.assertEqual(response.reranker_skip_reason, "no_candidates")
|
||||
|
||||
@patch("library.services.search.SearchService._fulltext_search")
|
||||
@patch("library.services.search.SearchService._embed_query")
|
||||
|
||||
@@ -208,6 +208,41 @@ class SearchAPIResponseTest(TestCase):
|
||||
self.assertEqual(call_args.search_types, ["fulltext"])
|
||||
self.assertFalse(call_args.rerank)
|
||||
|
||||
@patch("library.api.views.SearchService")
|
||||
def test_reranker_skip_reason_surfaced_in_json(self, MockService):
|
||||
"""``reranker_skip_reason`` propagates through the JSON API."""
|
||||
mock_response = SearchResponse(
|
||||
query="neural networks",
|
||||
candidates=[],
|
||||
images=[],
|
||||
total_candidates=0,
|
||||
search_time_ms=10.0,
|
||||
reranker_used=False,
|
||||
reranker_model=None,
|
||||
search_types_used=[],
|
||||
reranker_skip_reason=(
|
||||
"api_error: 404 Client Error: Not Found for url: "
|
||||
"http://pan.helu.ca:8400/v1/v1/rerank"
|
||||
),
|
||||
)
|
||||
mock_instance = MockService.return_value
|
||||
mock_instance.search.return_value = mock_response
|
||||
|
||||
response = self.client.post(
|
||||
"/library/api/search/",
|
||||
{"query": "neural networks"},
|
||||
format="json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
self.assertFalse(data["reranker_used"])
|
||||
self.assertIn("reranker_skip_reason", data)
|
||||
self.assertTrue(
|
||||
data["reranker_skip_reason"].startswith("api_error:"),
|
||||
f"got {data['reranker_skip_reason']!r}",
|
||||
)
|
||||
|
||||
|
||||
class ConceptAPITest(TestCase):
|
||||
"""Tests for concept API endpoints."""
|
||||
|
||||
@@ -117,11 +117,15 @@ class SearchPageAllowedLibrariesTests(TestCase):
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
|
||||
def _patched_search(self):
|
||||
def _patched_search(self, reranker_skip_reason=None):
|
||||
"""Return a (request_capture, patch_context) pair.
|
||||
|
||||
The patch captures the ``SearchRequest`` that ``SearchService.search``
|
||||
is called with so assertions can run after the view returns.
|
||||
|
||||
:param reranker_skip_reason: Value to set on the stub response's
|
||||
``reranker_skip_reason`` attribute, for tests that want to
|
||||
exercise the "Skipped" badge rendering path.
|
||||
"""
|
||||
capture: dict = {}
|
||||
|
||||
@@ -137,6 +141,7 @@ class SearchPageAllowedLibrariesTests(TestCase):
|
||||
reranker_used=False,
|
||||
reranker_model=None,
|
||||
search_types_used=[],
|
||||
reranker_skip_reason=reranker_skip_reason,
|
||||
)
|
||||
|
||||
return capture, patch(
|
||||
@@ -178,3 +183,117 @@ class SearchPageAllowedLibrariesTests(TestCase):
|
||||
)
|
||||
|
||||
self.assertIsNone(capture["request"].allowed_libraries)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# search_page rerank-status rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SearchPageRerankBadgeTests(TestCase):
|
||||
"""Verify the three-state Re-ranked indicator on the search page.
|
||||
|
||||
The badge must distinguish:
|
||||
|
||||
* Success (``reranker_used=True``) — green "Yes"
|
||||
* Skipped (``rerank=True`` requested but ``reranker_skip_reason`` set)
|
||||
— warning "Skipped" with the reason shown
|
||||
* Off (user unchecked the re-rank box) — ghost "Off"
|
||||
|
||||
This guards the regression that surfaced when Synesis returned 404
|
||||
on a mis-constructed rerank URL: the UI said "No" and gave no hint
|
||||
the re-ranker had actually failed.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(
|
||||
username="admin", email="a@example.com", password="pw"
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
|
||||
def _run(self, rerank_value, reranker_used, reranker_skip_reason):
|
||||
capture: dict = {}
|
||||
|
||||
def fake_search(self, request):
|
||||
capture["request"] = request
|
||||
return SimpleNamespace(
|
||||
query=request.query,
|
||||
candidates=[],
|
||||
images=[],
|
||||
total_candidates=0,
|
||||
search_time_ms=0.0,
|
||||
reranker_used=reranker_used,
|
||||
reranker_model=None,
|
||||
search_types_used=[],
|
||||
reranker_skip_reason=reranker_skip_reason,
|
||||
)
|
||||
|
||||
post_data = {"query": "postgresql"}
|
||||
if rerank_value is not None:
|
||||
post_data["rerank"] = rerank_value
|
||||
|
||||
with patch("library.views._all_library_uids", return_value=[]), \
|
||||
patch("library.services.search.SearchService.search", fake_search):
|
||||
response = self.client.post(reverse("library:search"), post_data)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
return response, capture
|
||||
|
||||
def test_badge_shows_yes_when_rerank_succeeded(self):
|
||||
response, _ = self._run(
|
||||
rerank_value="on",
|
||||
reranker_used=True,
|
||||
reranker_skip_reason=None,
|
||||
)
|
||||
body = response.content.decode()
|
||||
self.assertIn("badge-success", body)
|
||||
self.assertIn(">Yes<", body)
|
||||
self.assertNotIn(">Skipped<", body)
|
||||
self.assertNotIn(">Off<", body)
|
||||
|
||||
def test_badge_shows_skipped_with_reason_on_api_error(self):
|
||||
reason = (
|
||||
"api_error: 404 Client Error: Not Found for url: "
|
||||
"http://pan.helu.ca:8400/v1/v1/rerank"
|
||||
)
|
||||
response, capture = self._run(
|
||||
rerank_value="on",
|
||||
reranker_used=False,
|
||||
reranker_skip_reason=reason,
|
||||
)
|
||||
# Sanity: the view actually requested re-ranking.
|
||||
self.assertTrue(capture["request"].rerank)
|
||||
|
||||
body = response.content.decode()
|
||||
self.assertIn(">Skipped", body)
|
||||
# Reason shown in-page so the user can debug without grepping logs.
|
||||
# Django auto-escapes the colon-space and URL, which is fine.
|
||||
self.assertIn("api_error:", body)
|
||||
self.assertIn("404", body)
|
||||
# Must not claim success.
|
||||
self.assertNotIn(">Yes<", body)
|
||||
|
||||
def test_badge_shows_skipped_on_no_system_model(self):
|
||||
response, _ = self._run(
|
||||
rerank_value="on",
|
||||
reranker_used=False,
|
||||
reranker_skip_reason="no_system_model",
|
||||
)
|
||||
body = response.content.decode()
|
||||
self.assertIn(">Skipped", body)
|
||||
self.assertIn("no_system_model", body)
|
||||
|
||||
def test_badge_shows_off_when_rerank_unchecked(self):
|
||||
# HTML checkbox form: unchecked checkboxes are simply omitted
|
||||
# from the POST body, so we pass rerank_value=None (not "off").
|
||||
response, capture = self._run(
|
||||
rerank_value=None,
|
||||
reranker_used=False,
|
||||
reranker_skip_reason=None,
|
||||
)
|
||||
self.assertFalse(capture["request"].rerank)
|
||||
|
||||
body = response.content.decode()
|
||||
self.assertIn(">Off<", body)
|
||||
self.assertNotIn(">Skipped", body)
|
||||
self.assertNotIn(">Yes<", body)
|
||||
|
||||
Reference in New Issue
Block a user