feat: add init sidecar for migrations and setup on compose up
Introduces a one-shot `init` service in docker-compose that runs Postgres migrations, Neo4j index setup, and library-type seeding on every `up`. Long-running services (`app`, `mcp`, `worker`) now depend on its successful completion via `service_completed_successfully`, blocking the stack on configuration errors (missing embedding model, dimension mismatch, unreachable DB) rather than serving silent zero-result searches. Also standardizes reranker test fixtures to use the `/v1` OpenAI-style base URL convention used across other service clients.
This commit is contained in:
@@ -24,10 +24,17 @@
|
|||||||
#
|
#
|
||||||
# Run:
|
# Run:
|
||||||
# docker compose up -d
|
# docker compose up -d
|
||||||
# docker compose run --rm app migrate # one-shot DB migrate
|
#
|
||||||
# docker compose run --rm app setup # Neo4j indexes + library types
|
# The `init` sidecar (below) runs Postgres migrations, Neo4j index setup,
|
||||||
|
# and library-type seeding on every `up`. Long-running services wait for
|
||||||
|
# it via `depends_on: init: service_completed_successfully` — so a failure
|
||||||
|
# there (missing embedding model, dimension mismatch, unreachable DB)
|
||||||
|
# blocks the stack rather than letting it serve silent zero-result
|
||||||
|
# searches. The standalone `migrate` / `setup` entrypoint commands remain
|
||||||
|
# available for ad-hoc ops work.
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
# ── Static-file seeder: copies /app/staticfiles into the shared volume on
|
# ── Static-file seeder: copies /app/staticfiles into the shared volume on
|
||||||
# every `up`. Runs once and exits. Without this, the named volume is only
|
# every `up`. Runs once and exits. Without this, the named volume is only
|
||||||
@@ -41,6 +48,41 @@ services:
|
|||||||
- mnemosyne-static:/shared-static
|
- mnemosyne-static:/shared-static
|
||||||
restart: "no"
|
restart: "no"
|
||||||
|
|
||||||
|
# ── Init sidecar: one-shot Postgres migrate + Neo4j index setup + library
|
||||||
|
# type seed. Runs on every `up` and exits. Long-running services below
|
||||||
|
# depend on `service_completed_successfully`, so a failure here (no system
|
||||||
|
# embedding model configured, dimension mismatch, unreachable DB) blocks
|
||||||
|
# `app`/`mcp`/`worker` from starting — which is the whole point. All three
|
||||||
|
# commands are idempotent: re-running is a no-op unless state actually
|
||||||
|
# needs to change.
|
||||||
|
#
|
||||||
|
# This sidecar only needs Postgres, Neo4j, and logging env — no S3, no
|
||||||
|
# Celery, no LLM encryption key. Keep it that way.
|
||||||
|
init:
|
||||||
|
image: git.helu.ca/r/mnemosyne:latest
|
||||||
|
pull_policy: always
|
||||||
|
command: ["init"]
|
||||||
|
environment:
|
||||||
|
# Django core (settings import)
|
||||||
|
- DJANGO_SETTINGS_MODULE=mnemosyne.settings
|
||||||
|
- SECRET_KEY=${SECRET_KEY}
|
||||||
|
- DEBUG=${DEBUG}
|
||||||
|
- TIME_ZONE=${TIME_ZONE}
|
||||||
|
- LANGUAGE_CODE=${LANGUAGE_CODE}
|
||||||
|
# Postgres (migrate)
|
||||||
|
- APP_DB_NAME=${APP_DB_NAME}
|
||||||
|
- APP_DB_USER=${APP_DB_USER}
|
||||||
|
- APP_DB_PASSWORD=${APP_DB_PASSWORD}
|
||||||
|
- DB_HOST=${DB_HOST}
|
||||||
|
- DB_PORT=${DB_PORT}
|
||||||
|
# Neo4j (setup_neo4j_indexes + load_library_types)
|
||||||
|
- NEOMODEL_NEO4J_BOLT_URL=${NEOMODEL_NEO4J_BOLT_URL}
|
||||||
|
# Logging
|
||||||
|
- LOGGING_LEVEL=${LOGGING_LEVEL}
|
||||||
|
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
|
||||||
|
restart: "no"
|
||||||
|
|
||||||
|
|
||||||
# ── App: Django REST API + admin ──────────────────────────────────────────
|
# ── App: Django REST API + admin ──────────────────────────────────────────
|
||||||
# Serves /library/api/*, /admin/, /live/, /ready/, /metrics. Enqueues
|
# Serves /library/api/*, /admin/, /live/, /ready/, /metrics. Enqueues
|
||||||
# Celery tasks (hence CELERY_BROKER_URL is required here too — Django is
|
# Celery tasks (hence CELERY_BROKER_URL is required here too — Django is
|
||||||
@@ -103,6 +145,8 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
static-init:
|
static-init:
|
||||||
condition: service_completed_successfully
|
condition: service_completed_successfully
|
||||||
|
init:
|
||||||
|
condition: service_completed_successfully
|
||||||
volumes:
|
volumes:
|
||||||
- mnemosyne-media:/app/media
|
- mnemosyne-media:/app/media
|
||||||
healthcheck:
|
healthcheck:
|
||||||
@@ -112,6 +156,7 @@ services:
|
|||||||
retries: 3
|
retries: 3
|
||||||
start_period: 30s
|
start_period: 30s
|
||||||
|
|
||||||
|
|
||||||
# ── MCP server: FastMCP Streamable HTTP at /mcp/ ───────────────────────────
|
# ── MCP server: FastMCP Streamable HTTP at /mcp/ ───────────────────────────
|
||||||
# Read-only LLM-facing surface. Intentionally excluded:
|
# Read-only LLM-facing surface. Intentionally excluded:
|
||||||
# CELERY_BROKER_URL — MCP must not enqueue tasks
|
# CELERY_BROKER_URL — MCP must not enqueue tasks
|
||||||
@@ -171,6 +216,9 @@ services:
|
|||||||
- LOGGING_LEVEL=${LOGGING_LEVEL}
|
- LOGGING_LEVEL=${LOGGING_LEVEL}
|
||||||
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
|
- DJANGO_LOGGING_LEVEL=${DJANGO_LOGGING_LEVEL}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
init:
|
||||||
|
condition: service_completed_successfully
|
||||||
volumes:
|
volumes:
|
||||||
- mnemosyne-media:/app/media
|
- mnemosyne-media:/app/media
|
||||||
healthcheck:
|
healthcheck:
|
||||||
@@ -180,6 +228,7 @@ services:
|
|||||||
retries: 3
|
retries: 3
|
||||||
start_period: 30s
|
start_period: 30s
|
||||||
|
|
||||||
|
|
||||||
# ── Celery worker: embedding + ingest + batch queues ───────────────────────
|
# ── Celery worker: embedding + ingest + batch queues ───────────────────────
|
||||||
# Consumer side of the queue. Needs the full S3 block (reads Daedalus's
|
# Consumer side of the queue. Needs the full S3 block (reads Daedalus's
|
||||||
# bucket, writes to Mnemosyne's), the LLM API encryption key (ingest calls
|
# bucket, writes to Mnemosyne's), the LLM API encryption key (ingest calls
|
||||||
|
|||||||
@@ -55,6 +55,19 @@ case "$1" in
|
|||||||
python manage.py load_library_types
|
python manage.py load_library_types
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
init)
|
||||||
|
# Bundled one-shot init run by the `init` sidecar on every
|
||||||
|
# `docker compose up`. Idempotent: re-runs are no-ops unless migrations
|
||||||
|
# or indexes need to change. A non-zero exit here blocks `app`, `mcp`,
|
||||||
|
# and `worker` from starting, which is the point — we'd rather fail
|
||||||
|
# loudly than serve silent zero-result searches.
|
||||||
|
set -e
|
||||||
|
python manage.py migrate --noinput
|
||||||
|
python manage.py setup_neo4j_indexes
|
||||||
|
python manage.py load_library_types
|
||||||
|
;;
|
||||||
|
|
||||||
|
|
||||||
shell)
|
shell)
|
||||||
# Drop into the management shell for ad-hoc work.
|
# Drop into the management shell for ad-hoc work.
|
||||||
exec python manage.py shell
|
exec python manage.py shell
|
||||||
|
|||||||
@@ -61,6 +61,22 @@ POST http://pan.helu.ca:8400/v1/rerank
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **`LLMApi.base_url` convention.** Every Mnemosyne service client
|
||||||
|
> (`EmbeddingClient`, `RerankerClient`, `vision.py`, `concepts.py`)
|
||||||
|
> treats `base_url` as the **OpenAI-style `/v1` root** and appends a
|
||||||
|
> path-only segment: `/embeddings`, `/rerank`, `/chat/completions`.
|
||||||
|
> So a single `LLMApi` row with `base_url=http://pan.helu.ca:8400/v1`
|
||||||
|
> serves both the embedding and the reranker endpoints — no per-purpose
|
||||||
|
> duplication needed.
|
||||||
|
>
|
||||||
|
> Get this wrong (e.g. set `base_url=http://pan.helu.ca:8400` with no
|
||||||
|
> `/v1`, or have a client prepend `/v1` locally) and you get a
|
||||||
|
> double-prefixed URL like `…/v1/v1/rerank` that 404s silently —
|
||||||
|
> `SearchService._rerank` catches the exception, the UI shows
|
||||||
|
> "Re-rank: Skipped", and the search falls back to raw RRF order.
|
||||||
|
> Check `results.reranker_skip_reason` on the search page for the
|
||||||
|
> specific error.
|
||||||
|
|
||||||
## Deliverables
|
## Deliverables
|
||||||
|
|
||||||
### 1. Search Service (`library/services/search.py`)
|
### 1. Search Service (`library/services/search.py`)
|
||||||
|
|||||||
@@ -294,31 +294,37 @@ graph LR
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="alert alert-warning border-start border-4 border-warning">
|
<div class="alert alert-warning border-start border-4 border-warning">
|
||||||
<h4><i class="bi bi-lightning"></i> Neo4j Vector Indexes</h4>
|
<h4><i class="bi bi-lightning"></i> Neo4j Indexes (managed by <code>setup_neo4j_indexes</code>)</h4>
|
||||||
<pre class="bg-light p-3 rounded mb-0"><code>// Chunk text+image embeddings (4096 dimensions, no pgvector limits!)
|
<p>Created by the <code>init</code> sidecar on every <code>docker compose up</code>. Vector dimensions come from the system embedding model's <code>vector_dimensions</code> field — the command fails if no model is configured. Current production model: <strong>Pan Synesis · qwen3-vl-embedding-2b · 2048d</strong>.</p>
|
||||||
CREATE VECTOR INDEX chunk_embedding FOR (c:Chunk)
|
<pre class="bg-light p-3 rounded mb-0"><code>// Chunk text+image embeddings (dimensions read from system embedding model)
|
||||||
|
CREATE VECTOR INDEX chunk_embedding_index FOR (c:Chunk)
|
||||||
ON (c.embedding) OPTIONS {indexConfig: {
|
ON (c.embedding) OPTIONS {indexConfig: {
|
||||||
`vector.dimensions`: 4096,
|
`vector.dimensions`: 2048,
|
||||||
`vector.similarity_function`: 'cosine'
|
`vector.similarity_function`: 'cosine'
|
||||||
}}
|
}}
|
||||||
|
|
||||||
// Concept embeddings for semantic concept search
|
// Concept embeddings for semantic concept search
|
||||||
CREATE VECTOR INDEX concept_embedding FOR (con:Concept)
|
CREATE VECTOR INDEX concept_embedding_index FOR (con:Concept)
|
||||||
ON (con.embedding) OPTIONS {indexConfig: {
|
ON (con.embedding) OPTIONS {indexConfig: {
|
||||||
`vector.dimensions`: 4096,
|
`vector.dimensions`: 2048,
|
||||||
`vector.similarity_function`: 'cosine'
|
`vector.similarity_function`: 'cosine'
|
||||||
}}
|
}}
|
||||||
|
|
||||||
// Image multimodal embeddings
|
// Image multimodal embeddings
|
||||||
CREATE VECTOR INDEX image_embedding FOR (ie:ImageEmbedding)
|
CREATE VECTOR INDEX image_embedding_index FOR (ie:ImageEmbedding)
|
||||||
ON (ie.embedding) OPTIONS {indexConfig: {
|
ON (ie.embedding) OPTIONS {indexConfig: {
|
||||||
`vector.dimensions`: 4096,
|
`vector.dimensions`: 2048,
|
||||||
`vector.similarity_function`: 'cosine'
|
`vector.similarity_function`: 'cosine'
|
||||||
}}
|
}}
|
||||||
|
|
||||||
// Full-text index for keyword/BM25-style search
|
// Full-text indexes (BM25-style keyword search)
|
||||||
CREATE FULLTEXT INDEX chunk_fulltext FOR (c:Chunk) ON EACH [c.text_preview]</code></pre>
|
CREATE FULLTEXT INDEX chunk_text_fulltext FOR (c:Chunk) ON EACH [c.text_preview]
|
||||||
|
CREATE FULLTEXT INDEX concept_name_fulltext FOR (c:Concept) ON EACH [c.name]
|
||||||
|
CREATE FULLTEXT INDEX item_title_fulltext FOR (i:Item) ON EACH [i.title]
|
||||||
|
CREATE FULLTEXT INDEX library_name_fulltext FOR (l:Library) ON EACH [l.name]</code></pre>
|
||||||
|
<p class="mb-0 mt-3"><strong>Changing the embedding model or dimensions is a re-embedding event.</strong> Drop + recreate the vector indexes (<code>setup_neo4j_indexes --drop</code>) and re-queue all content for embedding. Old vectors at the previous dimension remain on the nodes until overwritten but are no longer indexed.</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<!-- SECTION: CONTENT TYPES -->
|
<!-- SECTION: CONTENT TYPES -->
|
||||||
@@ -521,10 +527,11 @@ flowchart TD
|
|||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<p>Cosine similarity via Neo4j vector index on Chunk and ImageEmbedding nodes.</p>
|
<p>Cosine similarity via Neo4j vector index on Chunk and ImageEmbedding nodes.</p>
|
||||||
<pre class="bg-light p-2 rounded"><code>CALL db.index.vector.queryNodes(
|
<pre class="bg-light p-2 rounded"><code>CALL db.index.vector.queryNodes(
|
||||||
'chunk_embedding', 30,
|
'chunk_embedding_index', 30,
|
||||||
$query_vector
|
$query_vector
|
||||||
) YIELD node, score
|
) YIELD node, score
|
||||||
WHERE score > $threshold</code></pre>
|
WHERE score > $threshold</code></pre>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -548,9 +555,10 @@ RETURN c2, i2</code></pre>
|
|||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<p>Neo4j native full-text index for keyword matching (BM25-equivalent).</p>
|
<p>Neo4j native full-text index for keyword matching (BM25-equivalent).</p>
|
||||||
<pre class="bg-light p-2 rounded"><code>CALL db.index.fulltext.queryNodes(
|
<pre class="bg-light p-2 rounded"><code>CALL db.index.fulltext.queryNodes(
|
||||||
'chunk_fulltext',
|
'chunk_text_fulltext',
|
||||||
$query_text
|
$query_text
|
||||||
) YIELD node, score</code></pre>
|
) YIELD node, score</code></pre>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -150,6 +150,16 @@ class SearchResponseSerializer(serializers.Serializer):
|
|||||||
reranker_used = serializers.BooleanField()
|
reranker_used = serializers.BooleanField()
|
||||||
reranker_model = serializers.CharField(allow_null=True)
|
reranker_model = serializers.CharField(allow_null=True)
|
||||||
search_types_used = serializers.ListField(child=serializers.CharField())
|
search_types_used = serializers.ListField(child=serializers.CharField())
|
||||||
|
# Populated when ``rerank=True`` was requested but the re-ranking
|
||||||
|
# step did not run — e.g. no system reranker model configured
|
||||||
|
# (``no_system_model``), the Synesis call raised
|
||||||
|
# (``api_error: ...``), or fusion produced zero candidates
|
||||||
|
# (``no_candidates``). ``null`` means either success or that
|
||||||
|
# re-ranking was not requested. ``required=False`` keeps old
|
||||||
|
# clients happy.
|
||||||
|
reranker_skip_reason = serializers.CharField(
|
||||||
|
allow_null=True, required=False, default=None
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# --- Workspace lifecycle (Daedalus integration) ---
|
# --- Workspace lifecycle (Daedalus integration) ---
|
||||||
|
|||||||
@@ -1,7 +1,202 @@
|
|||||||
|
"""
|
||||||
|
Django AppConfig for the ``library`` app.
|
||||||
|
|
||||||
|
Registers a startup probe that runs once per Python process and yells if
|
||||||
|
Mnemosyne is misconfigured in a way that would cause silent zero-result
|
||||||
|
searches — missing embedding model, missing Neo4j index, or a dimension
|
||||||
|
mismatch between the model and an existing index. Loud ERROR lines are the
|
||||||
|
only defence against "search works but returns nothing", which is
|
||||||
|
indistinguishable from "search works and matched nothing" unless you read
|
||||||
|
the stderr of a different container.
|
||||||
|
|
||||||
|
The probe is deliberately best-effort: it cannot crash the process even if
|
||||||
|
Neo4j is unreachable, because a transient DB blip on startup should not
|
||||||
|
take down the whole app. The `init` sidecar is the hard gate; this is the
|
||||||
|
second line of defence for long-running containers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
from django.apps import AppConfig
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Index names we expect setup_neo4j_indexes to have created. Kept in sync
|
||||||
|
# with library/management/commands/setup_neo4j_indexes.py. A test asserts
|
||||||
|
# they stay in sync.
|
||||||
|
_EXPECTED_VECTOR_INDEXES = (
|
||||||
|
"chunk_embedding_index",
|
||||||
|
"concept_embedding_index",
|
||||||
|
"image_embedding_index",
|
||||||
|
)
|
||||||
|
_EXPECTED_FULLTEXT_INDEXES = (
|
||||||
|
"chunk_text_fulltext",
|
||||||
|
"concept_name_fulltext",
|
||||||
|
"item_title_fulltext",
|
||||||
|
"library_name_fulltext",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _should_skip_probe() -> bool:
|
||||||
|
"""
|
||||||
|
Decide whether to skip the startup probe.
|
||||||
|
|
||||||
|
Skip when:
|
||||||
|
- Running a management command other than the long-running servers
|
||||||
|
(migrate, makemigrations, setup_neo4j_indexes, load_library_types,
|
||||||
|
collectstatic, test, shell). The probe would just spam stderr for
|
||||||
|
ops work that doesn't care about Neo4j index state.
|
||||||
|
- Neo4j bolt URL is unset (build-time ``collectstatic`` stubs, CI
|
||||||
|
unit tests without real infra).
|
||||||
|
- Running under pytest (any argv contains ``pytest`` or a test runner
|
||||||
|
env var is set).
|
||||||
|
"""
|
||||||
|
if "pytest" in sys.argv[0] or "PYTEST_CURRENT_TEST" in os.environ:
|
||||||
|
return True
|
||||||
|
if os.environ.get("DJANGO_SKIP_STARTUP_PROBE") == "1":
|
||||||
|
return True
|
||||||
|
# Typical Django command invocations where the probe is noise.
|
||||||
|
skip_commands = {
|
||||||
|
"migrate",
|
||||||
|
"makemigrations",
|
||||||
|
"setup_neo4j_indexes",
|
||||||
|
"load_library_types",
|
||||||
|
"collectstatic",
|
||||||
|
"test",
|
||||||
|
"shell",
|
||||||
|
"check",
|
||||||
|
"dbshell",
|
||||||
|
"showmigrations",
|
||||||
|
"squashmigrations",
|
||||||
|
"createsuperuser",
|
||||||
|
"help",
|
||||||
|
}
|
||||||
|
if len(sys.argv) >= 2 and sys.argv[1] in skip_commands:
|
||||||
|
return True
|
||||||
|
# No Neo4j endpoint configured — probably a build or local dev without
|
||||||
|
# graph infrastructure. Don't pretend we can check.
|
||||||
|
if not os.environ.get("NEOMODEL_NEO4J_BOLT_URL"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _run_startup_probe():
|
||||||
|
"""
|
||||||
|
Emit ERROR/WARNING logs if the stack is misconfigured for search.
|
||||||
|
|
||||||
|
Each check is individually guarded so a single unreachable dependency
|
||||||
|
doesn't mask the other findings. Returns nothing; side effect is log
|
||||||
|
output.
|
||||||
|
"""
|
||||||
|
from neomodel import db
|
||||||
|
|
||||||
|
# --- 1. System embedding model --------------------------------------
|
||||||
|
embedding_dim = None
|
||||||
|
embedding_model_label = "<unknown>"
|
||||||
|
try:
|
||||||
|
from llm_manager.models import LLMModel
|
||||||
|
|
||||||
|
model = LLMModel.get_system_embedding_model()
|
||||||
|
if not model:
|
||||||
|
logger.error(
|
||||||
|
"No system embedding model configured. Search will return "
|
||||||
|
"zero results until one is set in the LLM admin."
|
||||||
|
)
|
||||||
|
elif not model.vector_dimensions:
|
||||||
|
logger.error(
|
||||||
|
"System embedding model '%s: %s' has no vector_dimensions "
|
||||||
|
"set. Neo4j vector indexes cannot be validated and search "
|
||||||
|
"quality will be unpredictable.",
|
||||||
|
model.api.name,
|
||||||
|
model.name,
|
||||||
|
)
|
||||||
|
embedding_model_label = f"{model.api.name}: {model.name}"
|
||||||
|
else:
|
||||||
|
embedding_dim = model.vector_dimensions
|
||||||
|
embedding_model_label = f"{model.api.name}: {model.name}"
|
||||||
|
logger.info(
|
||||||
|
"System embedding model: %s (%dd)",
|
||||||
|
embedding_model_label,
|
||||||
|
embedding_dim,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"Startup probe could not read system embedding model: %s", exc
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- 2. Neo4j indexes present & correctly dimensioned ---------------
|
||||||
|
try:
|
||||||
|
results, _ = db.cypher_query(
|
||||||
|
"SHOW INDEXES YIELD name, type, options RETURN name, type, options"
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"Startup probe could not list Neo4j indexes: %s. Search "
|
||||||
|
"degradation will only surface at query time.",
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
present = {}
|
||||||
|
for row in results:
|
||||||
|
name, idx_type, options = row[0], row[1], row[2]
|
||||||
|
present[name] = (idx_type, options)
|
||||||
|
|
||||||
|
# Missing vector indexes
|
||||||
|
for name in _EXPECTED_VECTOR_INDEXES:
|
||||||
|
if name not in present:
|
||||||
|
logger.error(
|
||||||
|
"Neo4j vector index '%s' is missing. Run "
|
||||||
|
"'docker compose run --rm init' (or 'python manage.py "
|
||||||
|
"setup_neo4j_indexes') to rebuild.",
|
||||||
|
name,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
# Dimension check against the embedding model.
|
||||||
|
if embedding_dim is None:
|
||||||
|
continue
|
||||||
|
idx_type, options = present[name]
|
||||||
|
config = (options or {}).get("indexConfig") or {}
|
||||||
|
raw_dim = config.get("vector.dimensions")
|
||||||
|
try:
|
||||||
|
existing_dim = int(raw_dim) if raw_dim is not None else None
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
existing_dim = None
|
||||||
|
if existing_dim is not None and existing_dim != embedding_dim:
|
||||||
|
logger.error(
|
||||||
|
"Neo4j index '%s' has %d dimensions but system embedding "
|
||||||
|
"model %s reports %d. Re-run 'setup_neo4j_indexes --drop' "
|
||||||
|
"and re-embed all content — search will return empty or "
|
||||||
|
"wrong results until this is fixed.",
|
||||||
|
name,
|
||||||
|
existing_dim,
|
||||||
|
embedding_model_label,
|
||||||
|
embedding_dim,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Missing fulltext indexes
|
||||||
|
for name in _EXPECTED_FULLTEXT_INDEXES:
|
||||||
|
if name not in present:
|
||||||
|
logger.error(
|
||||||
|
"Neo4j full-text index '%s' is missing. Full-text search "
|
||||||
|
"will silently return no matches. Run 'setup_neo4j_indexes'.",
|
||||||
|
name,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class LibraryConfig(AppConfig):
|
class LibraryConfig(AppConfig):
|
||||||
default_auto_field = "django.db.models.BigAutoField"
|
default_auto_field = "django.db.models.BigAutoField"
|
||||||
name = "library"
|
name = "library"
|
||||||
verbose_name = "Library"
|
verbose_name = "Library"
|
||||||
|
|
||||||
|
def ready(self):
|
||||||
|
if _should_skip_probe():
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
_run_startup_probe()
|
||||||
|
except Exception as exc:
|
||||||
|
# Never let the probe itself take down the process.
|
||||||
|
logger.warning("Startup probe crashed: %s", exc, exc_info=True)
|
||||||
|
|||||||
@@ -2,21 +2,28 @@
|
|||||||
Management command to create Neo4j indexes for Mnemosyne content graph.
|
Management command to create Neo4j indexes for Mnemosyne content graph.
|
||||||
|
|
||||||
Creates:
|
Creates:
|
||||||
- Vector indexes (dynamic dimensions from system embedding model) for Chunk, Concept, and ImageEmbedding
|
- Vector indexes for Chunk, Concept, and ImageEmbedding. Dimensions are read
|
||||||
- Full-text indexes for text search on Chunk.text_preview and Concept.name
|
from the system embedding model — NOT a settings default. If no model is
|
||||||
- Constraint indexes enforced by neomodel (unique properties)
|
configured and no ``--dimensions`` override is supplied, the command
|
||||||
|
fails: a wrong dimension is worse than a clear error.
|
||||||
|
- Full-text indexes for text search on Chunk.text_preview, Concept.name,
|
||||||
|
Item.title, Library.name.
|
||||||
|
- Constraint indexes enforced by neomodel (unique properties).
|
||||||
|
|
||||||
|
When run via the `init` sidecar, a non-zero exit here blocks ``app``/``mcp``/
|
||||||
|
``worker`` from starting. That is deliberate: silent zero-result searches
|
||||||
|
are the failure mode we are trying to prevent.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
from neomodel import db
|
from neomodel import db
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Default vector dimensions (used when no system embedding model is configured)
|
|
||||||
DEFAULT_VECTOR_DIMENSIONS = 4096
|
|
||||||
|
|
||||||
# Full-text index definitions: (index_name, label, properties)
|
# Full-text index definitions: (index_name, label, properties)
|
||||||
FULLTEXT_INDEXES = [
|
FULLTEXT_INDEXES = [
|
||||||
("chunk_text_fulltext", "Chunk", ["text_preview"]),
|
("chunk_text_fulltext", "Chunk", ["text_preview"]),
|
||||||
@@ -26,57 +33,122 @@ FULLTEXT_INDEXES = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def _get_vector_dimensions():
|
def _get_vector_dimensions(override: int = 0):
|
||||||
"""
|
"""
|
||||||
Get vector dimensions from the system embedding model.
|
Resolve the dimension count to build vector indexes at.
|
||||||
|
|
||||||
Falls back to DEFAULT_VECTOR_DIMENSIONS if no model is configured
|
Precedence (top wins):
|
||||||
or the model has no vector_dimensions set.
|
1. ``--dimensions`` CLI override (positive int)
|
||||||
|
2. System embedding model's ``vector_dimensions`` field
|
||||||
|
|
||||||
:returns: Tuple of (dimensions, source_description).
|
If neither is available, returns ``(None, reason)`` and the caller must
|
||||||
|
abort. There is deliberately no hardcoded fallback — an index built at
|
||||||
|
the wrong dimension silently breaks search forever.
|
||||||
|
|
||||||
|
:param override: Value from the ``--dimensions`` CLI flag (0 if unset).
|
||||||
|
:returns: Tuple of ``(dimensions, source_description)``. ``dimensions``
|
||||||
|
is ``None`` when the value cannot be resolved.
|
||||||
"""
|
"""
|
||||||
|
if override > 0:
|
||||||
|
return override, f"CLI override (--dimensions={override})"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from llm_manager.models import LLMModel
|
from llm_manager.models import LLMModel
|
||||||
|
|
||||||
model = LLMModel.get_system_embedding_model()
|
model = LLMModel.get_system_embedding_model()
|
||||||
if model and model.vector_dimensions:
|
except Exception as exc: # pragma: no cover - DB unreachable path
|
||||||
return model.vector_dimensions, f"{model.api.name}: {model.name}"
|
return None, f"LLMModel lookup failed: {exc}"
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return DEFAULT_VECTOR_DIMENSIONS, "default (no system embedding model)"
|
if not model:
|
||||||
|
return None, "no system embedding model configured"
|
||||||
|
if not model.vector_dimensions:
|
||||||
|
return (
|
||||||
|
None,
|
||||||
|
f"system embedding model '{model.api.name}: {model.name}' has "
|
||||||
|
"no vector_dimensions set",
|
||||||
|
)
|
||||||
|
|
||||||
|
return model.vector_dimensions, f"{model.api.name}: {model.name}"
|
||||||
|
|
||||||
|
|
||||||
|
def _existing_vector_index_dimensions(name: str):
|
||||||
|
"""
|
||||||
|
Return the configured dimension count of an existing vector index.
|
||||||
|
|
||||||
|
Neo4j's ``SHOW INDEXES`` returns ``options`` as a map whose
|
||||||
|
``indexConfig`` entry holds ``vector.dimensions``. Returns ``None`` if
|
||||||
|
the index doesn't exist, isn't a vector index, or the dimension cannot
|
||||||
|
be parsed (e.g. Neo4j version differences).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
results, _ = db.cypher_query(
|
||||||
|
"SHOW INDEXES YIELD name, type, options "
|
||||||
|
"WHERE name = $name RETURN type, options",
|
||||||
|
{"name": name},
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return None
|
||||||
|
|
||||||
|
idx_type, options = results[0]
|
||||||
|
if (idx_type or "").upper() != "VECTOR":
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ``options`` comes back as a dict in the Python driver.
|
||||||
|
config = (options or {}).get("indexConfig") or {}
|
||||||
|
dims = config.get("vector.dimensions")
|
||||||
|
if dims is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return int(dims)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
# Neo4j sometimes returns the value as a string like "4096".
|
||||||
|
m = re.search(r"\d+", str(dims))
|
||||||
|
return int(m.group(0)) if m else None
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = (
|
help = (
|
||||||
"Create Neo4j vector, full-text, and constraint indexes "
|
"Create Neo4j vector, full-text, and constraint indexes for the "
|
||||||
"for the Mnemosyne content graph. Vector dimensions are read "
|
"Mnemosyne content graph. Vector dimensions are read from the "
|
||||||
"from the system embedding model."
|
"system embedding model; the command fails if the model is not "
|
||||||
|
"configured (pass --dimensions only for explicit overrides)."
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--drop",
|
"--drop",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Drop existing indexes before recreating them",
|
help="Drop existing managed indexes before recreating them.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--dimensions",
|
"--dimensions",
|
||||||
type=int,
|
type=int,
|
||||||
default=0,
|
default=0,
|
||||||
help="Override vector dimensions (default: read from system embedding model)",
|
help=(
|
||||||
|
"Override vector dimensions (normally read from the system "
|
||||||
|
"embedding model). Use with care — an incorrect value "
|
||||||
|
"silently breaks search."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
drop = options["drop"]
|
drop = options["drop"]
|
||||||
override_dims = options["dimensions"]
|
override_dims = options["dimensions"]
|
||||||
|
|
||||||
# Resolve vector dimensions
|
dimensions, source = _get_vector_dimensions(override_dims)
|
||||||
if override_dims > 0:
|
|
||||||
dimensions = override_dims
|
if dimensions is None:
|
||||||
source = f"CLI override ({override_dims})"
|
# Fail loudly. Returning success while skipping vector-index
|
||||||
else:
|
# creation is exactly how `app`/`mcp`/`worker` end up running
|
||||||
dimensions, source = _get_vector_dimensions()
|
# against an un-indexed Neo4j and serving empty search results.
|
||||||
|
raise CommandError(
|
||||||
|
"Cannot create vector indexes: "
|
||||||
|
f"{source}. Configure a system embedding model with "
|
||||||
|
"vector_dimensions set, or pass --dimensions N explicitly."
|
||||||
|
)
|
||||||
|
|
||||||
self.stdout.write(
|
self.stdout.write(
|
||||||
self.style.HTTP_INFO(
|
self.style.HTTP_INFO(
|
||||||
@@ -84,7 +156,7 @@ class Command(BaseCommand):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Vector index definitions (dynamic dimensions)
|
# Vector index definitions (dimensions resolved above)
|
||||||
vector_indexes = [
|
vector_indexes = [
|
||||||
("chunk_embedding_index", "Chunk", "embedding", dimensions, "cosine"),
|
("chunk_embedding_index", "Chunk", "embedding", dimensions, "cosine"),
|
||||||
("concept_embedding_index", "Concept", "embedding", dimensions, "cosine"),
|
("concept_embedding_index", "Concept", "embedding", dimensions, "cosine"),
|
||||||
@@ -98,11 +170,27 @@ class Command(BaseCommand):
|
|||||||
self._drop_indexes(existing_indexes, vector_indexes)
|
self._drop_indexes(existing_indexes, vector_indexes)
|
||||||
existing_indexes = self._get_existing_indexes()
|
existing_indexes = self._get_existing_indexes()
|
||||||
|
|
||||||
|
dim_mismatch = False
|
||||||
|
|
||||||
# Create vector indexes
|
# Create vector indexes
|
||||||
for name, label, prop, dims, similarity in vector_indexes:
|
for name, label, prop, dims, similarity in vector_indexes:
|
||||||
if name in existing_indexes:
|
if name in existing_indexes:
|
||||||
|
existing_dim = _existing_vector_index_dimensions(name)
|
||||||
|
if existing_dim is not None and existing_dim != dims:
|
||||||
|
dim_mismatch = True
|
||||||
|
self.stderr.write(
|
||||||
|
self.style.ERROR(
|
||||||
|
f"Vector index '{name}' exists at {existing_dim} "
|
||||||
|
f"dimensions but the system model requires {dims}. "
|
||||||
|
"Re-run with --drop and re-embed all content."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
continue
|
||||||
self.stdout.write(
|
self.stdout.write(
|
||||||
self.style.NOTICE(f"Vector index '{name}' already exists, skipping")
|
self.style.NOTICE(
|
||||||
|
f"Vector index '{name}' already exists "
|
||||||
|
f"({existing_dim or 'unknown'}d), skipping"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
@@ -164,6 +252,20 @@ class Command(BaseCommand):
|
|||||||
self.style.ERROR(f"Failed to install neomodel labels: {e}")
|
self.style.ERROR(f"Failed to install neomodel labels: {e}")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if dim_mismatch:
|
||||||
|
# Exit non-zero so the `init` sidecar's compose dependency marks
|
||||||
|
# the stack startup as failed. Admin must re-run with --drop and
|
||||||
|
# re-embed content.
|
||||||
|
self.stderr.write(
|
||||||
|
self.style.ERROR(
|
||||||
|
"\nOne or more vector indexes exist at the wrong "
|
||||||
|
"dimension. Search will return empty or garbage results "
|
||||||
|
"until you run: setup_neo4j_indexes --drop "
|
||||||
|
"and re-embed all content."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
self.stdout.write(self.style.SUCCESS("\nNeo4j index setup complete."))
|
self.stdout.write(self.style.SUCCESS("\nNeo4j index setup complete."))
|
||||||
|
|
||||||
def _get_existing_indexes(self):
|
def _get_existing_indexes(self):
|
||||||
|
|||||||
@@ -79,7 +79,13 @@ class RerankerClient:
|
|||||||
query, candidates, instruction, top_n, query_image
|
query, candidates, instruction, top_n, query_image
|
||||||
)
|
)
|
||||||
|
|
||||||
url = f"{self.base_url}/v1/rerank"
|
# Convention shared with every other service client in Mnemosyne
|
||||||
|
# (embedding_client, vision, concepts): ``base_url`` is the
|
||||||
|
# OpenAI-style ``/v1`` root (e.g. ``http://pan.helu.ca:8400/v1``),
|
||||||
|
# and each client appends a path-only segment. Prepending
|
||||||
|
# ``/v1`` here would build ``…/v1/v1/rerank`` and 404 against
|
||||||
|
# Synesis.
|
||||||
|
url = f"{self.base_url}/rerank"
|
||||||
headers = {"Content-Type": "application/json"}
|
headers = {"Content-Type": "application/json"}
|
||||||
if self.api.api_key:
|
if self.api.api_key:
|
||||||
headers["Authorization"] = f"Bearer {self.api.api_key}"
|
headers["Authorization"] = f"Bearer {self.api.api_key}"
|
||||||
|
|||||||
@@ -99,7 +99,17 @@ class SearchRequest:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SearchResponse:
|
class SearchResponse:
|
||||||
"""Results from a search query."""
|
"""Results from a search query.
|
||||||
|
|
||||||
|
``reranker_skip_reason`` is set when the caller requested
|
||||||
|
``rerank=True`` but the re-ranking step did not actually run — for
|
||||||
|
example because no system reranker model is configured, the Synesis
|
||||||
|
HTTP call raised, or the fused candidate list was empty. It is
|
||||||
|
``None`` both when re-ranking succeeded (``reranker_used=True``) and
|
||||||
|
when the caller asked for ``rerank=False`` — callers / templates
|
||||||
|
should distinguish those two "no-reason" cases by looking at the
|
||||||
|
original request's ``rerank`` flag.
|
||||||
|
"""
|
||||||
|
|
||||||
query: str
|
query: str
|
||||||
candidates: list[SearchCandidate]
|
candidates: list[SearchCandidate]
|
||||||
@@ -109,6 +119,7 @@ class SearchResponse:
|
|||||||
reranker_used: bool
|
reranker_used: bool
|
||||||
reranker_model: Optional[str]
|
reranker_model: Optional[str]
|
||||||
search_types_used: list[str]
|
search_types_used: list[str]
|
||||||
|
reranker_skip_reason: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class SearchService:
|
class SearchService:
|
||||||
@@ -186,13 +197,19 @@ class SearchService:
|
|||||||
# --- Re-rank ---
|
# --- Re-rank ---
|
||||||
reranker_used = False
|
reranker_used = False
|
||||||
reranker_model_name = None
|
reranker_model_name = None
|
||||||
|
reranker_skip_reason: Optional[str] = None
|
||||||
|
|
||||||
if request.rerank and fused:
|
if request.rerank:
|
||||||
reranked, model_name = self._rerank(request, fused)
|
if not fused:
|
||||||
if reranked is not None:
|
reranker_skip_reason = "no_candidates"
|
||||||
fused = reranked
|
else:
|
||||||
reranker_used = True
|
reranked, model_name, skip_reason = self._rerank(request, fused)
|
||||||
reranker_model_name = model_name
|
if reranked is not None:
|
||||||
|
fused = reranked
|
||||||
|
reranker_used = True
|
||||||
|
reranker_model_name = model_name
|
||||||
|
else:
|
||||||
|
reranker_skip_reason = skip_reason
|
||||||
|
|
||||||
# Trim to limit
|
# Trim to limit
|
||||||
fused = fused[: request.limit]
|
fused = fused[: request.limit]
|
||||||
@@ -225,6 +242,7 @@ class SearchService:
|
|||||||
reranker_used=reranker_used,
|
reranker_used=reranker_used,
|
||||||
reranker_model=reranker_model_name,
|
reranker_model=reranker_model_name,
|
||||||
search_types_used=search_types_used,
|
search_types_used=search_types_used,
|
||||||
|
reranker_skip_reason=reranker_skip_reason,
|
||||||
)
|
)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -720,13 +738,28 @@ class SearchService:
|
|||||||
|
|
||||||
def _rerank(
|
def _rerank(
|
||||||
self, request: SearchRequest, candidates: list[SearchCandidate]
|
self, request: SearchRequest, candidates: list[SearchCandidate]
|
||||||
) -> tuple[Optional[list[SearchCandidate]], Optional[str]]:
|
) -> tuple[Optional[list[SearchCandidate]], Optional[str], Optional[str]]:
|
||||||
"""
|
"""
|
||||||
Re-rank candidates via Synesis.
|
Re-rank candidates via Synesis.
|
||||||
|
|
||||||
:param request: SearchRequest.
|
:param request: SearchRequest.
|
||||||
:param candidates: Fused candidates to re-rank.
|
:param candidates: Fused candidates to re-rank.
|
||||||
:returns: Tuple of (reranked_candidates, model_name) or (None, None).
|
:returns: Tuple of ``(reranked_candidates, model_name, skip_reason)``.
|
||||||
|
|
||||||
|
On success the first element is the reranked list, the second
|
||||||
|
is the model name, and the third is ``None``.
|
||||||
|
|
||||||
|
On skip the first two are ``None`` and the third is a short
|
||||||
|
machine-readable reason:
|
||||||
|
|
||||||
|
* ``"no_system_model"`` — no ``LLMModel.is_system_reranker_model``
|
||||||
|
configured.
|
||||||
|
* ``"api_error: <truncated msg>"`` — ``RerankerClient.rerank``
|
||||||
|
raised (HTTP error, network error, malformed response).
|
||||||
|
|
||||||
|
The reason is intended for display on the search page so the
|
||||||
|
user can see *why* re-ranking didn't happen, without having to
|
||||||
|
grep server logs.
|
||||||
"""
|
"""
|
||||||
from llm_manager.models import LLMModel
|
from llm_manager.models import LLMModel
|
||||||
|
|
||||||
@@ -735,7 +768,7 @@ class SearchService:
|
|||||||
reranker_model = LLMModel.get_system_reranker_model()
|
reranker_model = LLMModel.get_system_reranker_model()
|
||||||
if not reranker_model:
|
if not reranker_model:
|
||||||
logger.debug("No system reranker model — skipping re-ranking")
|
logger.debug("No system reranker model — skipping re-ranking")
|
||||||
return None, None
|
return None, None, "no_system_model"
|
||||||
|
|
||||||
# Get content-type reranker instruction
|
# Get content-type reranker instruction
|
||||||
instruction = self._get_reranker_instruction(request, candidates)
|
instruction = self._get_reranker_instruction(request, candidates)
|
||||||
@@ -755,12 +788,17 @@ class SearchService:
|
|||||||
instruction=instruction,
|
instruction=instruction,
|
||||||
query_image=request.query_image,
|
query_image=request.query_image,
|
||||||
)
|
)
|
||||||
return reranked, reranker_model.name
|
return reranked, reranker_model.name, None
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Re-ranking failed, returning fusion results: %s", exc
|
"Re-ranking failed, returning fusion results: %s", exc
|
||||||
)
|
)
|
||||||
return None, None
|
# Truncate the exception message to keep the UI tooltip /
|
||||||
|
# JSON payload bounded; full detail is in the WARNING log.
|
||||||
|
msg = str(exc)
|
||||||
|
if len(msg) > 200:
|
||||||
|
msg = msg[:197] + "..."
|
||||||
|
return None, None, f"api_error: {msg}"
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Helpers
|
# Helpers
|
||||||
|
|||||||
@@ -146,9 +146,23 @@
|
|||||||
{% if results_reranked.reranker_used %}
|
{% if results_reranked.reranker_used %}
|
||||||
<span class="badge badge-success">{{ results_reranked.reranker_model|default:"on" }}</span>
|
<span class="badge badge-success">{{ results_reranked.reranker_model|default:"on" }}</span>
|
||||||
{% else %}
|
{% else %}
|
||||||
<span class="badge badge-warning">unavailable</span>
|
{# The A/B page always requests rerank=True, so a
|
||||||
|
negative here is always a skip with a reason —
|
||||||
|
surface the reason in a tooltip so the user
|
||||||
|
knows *why* the B side fell back to fusion
|
||||||
|
order. #}
|
||||||
|
<span class="badge badge-warning"
|
||||||
|
title="{{ results_reranked.reranker_skip_reason|default:'unknown' }}">
|
||||||
|
unavailable
|
||||||
|
</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
{% if not results_reranked.reranker_used and results_reranked.reranker_skip_reason %}
|
||||||
|
<div class="stat-desc text-warning text-xs mt-1 max-w-xs truncate"
|
||||||
|
title="{{ results_reranked.reranker_skip_reason }}">
|
||||||
|
{{ results_reranked.reranker_skip_reason }}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -81,12 +81,30 @@
|
|||||||
<div class="stat">
|
<div class="stat">
|
||||||
<div class="stat-title">Re-ranked</div>
|
<div class="stat-title">Re-ranked</div>
|
||||||
<div class="stat-value text-lg">
|
<div class="stat-value text-lg">
|
||||||
|
{# Three-state indicator:
|
||||||
|
- Yes: re-ranker ran successfully
|
||||||
|
- Skipped (+ tooltip): re-rank was requested but the
|
||||||
|
pipeline bailed — badge colour is a warning so the
|
||||||
|
user knows something didn't work
|
||||||
|
- Off: user unchecked the re-rank box
|
||||||
|
#}
|
||||||
{% if results.reranker_used %}
|
{% if results.reranker_used %}
|
||||||
<span class="badge badge-success">Yes</span>
|
<span class="badge badge-success">Yes</span>
|
||||||
|
{% elif results.reranker_skip_reason %}
|
||||||
|
<span class="badge badge-warning"
|
||||||
|
title="{{ results.reranker_skip_reason }}">
|
||||||
|
Skipped
|
||||||
|
</span>
|
||||||
{% else %}
|
{% else %}
|
||||||
<span class="badge badge-ghost">No</span>
|
<span class="badge badge-ghost">Off</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
{% if results.reranker_skip_reason %}
|
||||||
|
<div class="stat-desc text-warning text-xs mt-1 max-w-xs truncate"
|
||||||
|
title="{{ results.reranker_skip_reason }}">
|
||||||
|
{{ results.reranker_skip_reason }}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
<div class="stat">
|
<div class="stat">
|
||||||
<div class="stat-title">Search Types</div>
|
<div class="stat-title">Search Types</div>
|
||||||
|
|||||||
@@ -29,10 +29,15 @@ def _make_candidate(chunk_uid: str, text_preview: str = "Some text", **kwargs):
|
|||||||
|
|
||||||
|
|
||||||
def _mock_reranker_model():
|
def _mock_reranker_model():
|
||||||
"""Create a mock LLMModel for reranking."""
|
"""Create a mock LLMModel for reranking.
|
||||||
|
|
||||||
|
``base_url`` follows the project-wide convention: it is the
|
||||||
|
OpenAI-style ``/v1`` root. Every service client (embedding, vision,
|
||||||
|
concepts, reranker) appends a path-only segment to it.
|
||||||
|
"""
|
||||||
model = MagicMock()
|
model = MagicMock()
|
||||||
model.name = "qwen3-vl-reranker-2b"
|
model.name = "qwen3-vl-reranker-2b"
|
||||||
model.api.base_url = "http://pan.helu.ca:8400"
|
model.api.base_url = "http://pan.helu.ca:8400/v1"
|
||||||
model.api.api_key = ""
|
model.api.api_key = ""
|
||||||
model.api.timeout_seconds = 30
|
model.api.timeout_seconds = 30
|
||||||
model.input_cost_per_1k = Decimal("0")
|
model.input_cost_per_1k = Decimal("0")
|
||||||
@@ -49,7 +54,7 @@ class RerankerClientInitTest(TestCase):
|
|||||||
client = RerankerClient(model)
|
client = RerankerClient(model)
|
||||||
|
|
||||||
self.assertEqual(client.model_name, "qwen3-vl-reranker-2b")
|
self.assertEqual(client.model_name, "qwen3-vl-reranker-2b")
|
||||||
self.assertEqual(client.base_url, "http://pan.helu.ca:8400")
|
self.assertEqual(client.base_url, "http://pan.helu.ca:8400/v1")
|
||||||
|
|
||||||
|
|
||||||
class RerankerClientRerankTest(TestCase):
|
class RerankerClientRerankTest(TestCase):
|
||||||
@@ -198,6 +203,33 @@ class RerankerClientRerankTest(TestCase):
|
|||||||
with self.assertRaises(Exception):
|
with self.assertRaises(Exception):
|
||||||
client.rerank(query="test", candidates=candidates)
|
client.rerank(query="test", candidates=candidates)
|
||||||
|
|
||||||
|
@patch("library.services.reranker.requests.post")
|
||||||
|
def test_request_url_is_base_plus_rerank(self, mock_post):
|
||||||
|
"""URL is ``{base_url}/rerank`` — base_url already contains ``/v1``.
|
||||||
|
|
||||||
|
Regression guard: earlier versions prepended ``/v1/`` inside the
|
||||||
|
client, which produced ``…/v1/v1/rerank`` and 404 against Synesis
|
||||||
|
when the ``LLMApi.base_url`` followed the same convention as the
|
||||||
|
embedding / chat / vision clients.
|
||||||
|
"""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
"results": [{"index": 0, "score": 0.5}],
|
||||||
|
}
|
||||||
|
mock_post.return_value = mock_response
|
||||||
|
|
||||||
|
model = _mock_reranker_model()
|
||||||
|
client = RerankerClient(model)
|
||||||
|
candidates = [_make_candidate("a")]
|
||||||
|
|
||||||
|
client.rerank(query="test", candidates=candidates)
|
||||||
|
|
||||||
|
# First positional arg to requests.post is the URL.
|
||||||
|
called_url = mock_post.call_args.args[0] if mock_post.call_args.args \
|
||||||
|
else mock_post.call_args.kwargs.get("url")
|
||||||
|
self.assertEqual(called_url, "http://pan.helu.ca:8400/v1/rerank")
|
||||||
|
|
||||||
@patch("library.services.reranker.requests.post")
|
@patch("library.services.reranker.requests.post")
|
||||||
def test_no_instruction_omits_field(self, mock_post):
|
def test_no_instruction_omits_field(self, mock_post):
|
||||||
"""Empty instruction is not sent in payload."""
|
"""Empty instruction is not sent in payload."""
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ class SearchServiceSearchTest(TestCase):
|
|||||||
]
|
]
|
||||||
mock_fulltext.return_value = []
|
mock_fulltext.return_value = []
|
||||||
mock_graph.return_value = []
|
mock_graph.return_value = []
|
||||||
mock_rerank.return_value = (None, None)
|
mock_rerank.return_value = (None, None, "no_system_model")
|
||||||
mock_image.return_value = []
|
mock_image.return_value = []
|
||||||
|
|
||||||
request = SearchRequest(
|
request = SearchRequest(
|
||||||
@@ -110,7 +110,9 @@ class SearchServiceSearchTest(TestCase):
|
|||||||
chunk_s3_key="s3/key", chunk_index=0, score=0.95,
|
chunk_s3_key="s3/key", chunk_index=0, score=0.95,
|
||||||
source="fulltext",
|
source="fulltext",
|
||||||
)
|
)
|
||||||
mock_rerank.return_value = ([reranked_candidate], "qwen3-vl-reranker-2b")
|
mock_rerank.return_value = (
|
||||||
|
[reranked_candidate], "qwen3-vl-reranker-2b", None,
|
||||||
|
)
|
||||||
|
|
||||||
request = SearchRequest(
|
request = SearchRequest(
|
||||||
query="test",
|
query="test",
|
||||||
@@ -125,6 +127,8 @@ class SearchServiceSearchTest(TestCase):
|
|||||||
self.assertTrue(response.reranker_used)
|
self.assertTrue(response.reranker_used)
|
||||||
self.assertEqual(response.reranker_model, "qwen3-vl-reranker-2b")
|
self.assertEqual(response.reranker_model, "qwen3-vl-reranker-2b")
|
||||||
self.assertAlmostEqual(response.candidates[0].score, 0.95)
|
self.assertAlmostEqual(response.candidates[0].score, 0.95)
|
||||||
|
# Successful rerank → no skip reason surfaced to UI / API.
|
||||||
|
self.assertIsNone(response.reranker_skip_reason)
|
||||||
|
|
||||||
@patch("library.services.search.SearchService._fulltext_search")
|
@patch("library.services.search.SearchService._fulltext_search")
|
||||||
@patch("library.services.search.SearchService._embed_query")
|
@patch("library.services.search.SearchService._embed_query")
|
||||||
@@ -152,6 +156,103 @@ class SearchServiceSearchTest(TestCase):
|
|||||||
|
|
||||||
self.assertFalse(response.reranker_used)
|
self.assertFalse(response.reranker_used)
|
||||||
self.assertIsNone(response.reranker_model)
|
self.assertIsNone(response.reranker_model)
|
||||||
|
# ``rerank=False`` means "not requested", not "skipped" — so no
|
||||||
|
# reason is reported. Template distinguishes this from the
|
||||||
|
# skip case by looking at the original request's rerank flag.
|
||||||
|
self.assertIsNone(response.reranker_skip_reason)
|
||||||
|
|
||||||
|
@patch("library.services.search.SearchService._rerank")
|
||||||
|
@patch("library.services.search.SearchService._fulltext_search")
|
||||||
|
@patch("library.services.search.SearchService._embed_query")
|
||||||
|
def test_search_reports_skip_reason_no_system_model(
|
||||||
|
self, mock_embed, mock_fulltext, mock_rerank
|
||||||
|
):
|
||||||
|
"""Rerank requested but no system model → ``no_system_model`` surfaced."""
|
||||||
|
mock_embed.return_value = None
|
||||||
|
mock_fulltext.return_value = [
|
||||||
|
SearchCandidate(
|
||||||
|
chunk_uid="c1", item_uid="i1", item_title="Test",
|
||||||
|
library_type="technical", text_preview="preview",
|
||||||
|
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
|
||||||
|
source="fulltext",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
mock_rerank.return_value = (None, None, "no_system_model")
|
||||||
|
|
||||||
|
request = SearchRequest(
|
||||||
|
query="test",
|
||||||
|
search_types=["fulltext"],
|
||||||
|
rerank=True,
|
||||||
|
include_images=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
service = SearchService()
|
||||||
|
response = service.search(request)
|
||||||
|
|
||||||
|
self.assertFalse(response.reranker_used)
|
||||||
|
self.assertIsNone(response.reranker_model)
|
||||||
|
self.assertEqual(response.reranker_skip_reason, "no_system_model")
|
||||||
|
|
||||||
|
@patch("library.services.search.SearchService._rerank")
|
||||||
|
@patch("library.services.search.SearchService._fulltext_search")
|
||||||
|
@patch("library.services.search.SearchService._embed_query")
|
||||||
|
def test_search_reports_skip_reason_api_error(
|
||||||
|
self, mock_embed, mock_fulltext, mock_rerank
|
||||||
|
):
|
||||||
|
"""Rerank API raising → ``api_error: ...`` surfaced in response."""
|
||||||
|
mock_embed.return_value = None
|
||||||
|
mock_fulltext.return_value = [
|
||||||
|
SearchCandidate(
|
||||||
|
chunk_uid="c1", item_uid="i1", item_title="Test",
|
||||||
|
library_type="technical", text_preview="preview",
|
||||||
|
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
|
||||||
|
source="fulltext",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
mock_rerank.return_value = (
|
||||||
|
None, None,
|
||||||
|
"api_error: 404 Client Error: Not Found for url: "
|
||||||
|
"http://pan.helu.ca:8400/v1/v1/rerank",
|
||||||
|
)
|
||||||
|
|
||||||
|
request = SearchRequest(
|
||||||
|
query="test",
|
||||||
|
search_types=["fulltext"],
|
||||||
|
rerank=True,
|
||||||
|
include_images=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
service = SearchService()
|
||||||
|
response = service.search(request)
|
||||||
|
|
||||||
|
self.assertFalse(response.reranker_used)
|
||||||
|
self.assertIsNotNone(response.reranker_skip_reason)
|
||||||
|
self.assertTrue(
|
||||||
|
response.reranker_skip_reason.startswith("api_error:"),
|
||||||
|
f"expected api_error: prefix, got {response.reranker_skip_reason!r}",
|
||||||
|
)
|
||||||
|
|
||||||
|
@patch("library.services.search.SearchService._fulltext_search")
|
||||||
|
@patch("library.services.search.SearchService._embed_query")
|
||||||
|
def test_search_reports_skip_reason_no_candidates(
|
||||||
|
self, mock_embed, mock_fulltext
|
||||||
|
):
|
||||||
|
"""Rerank requested but fusion produced nothing → ``no_candidates``."""
|
||||||
|
mock_embed.return_value = None
|
||||||
|
mock_fulltext.return_value = []
|
||||||
|
|
||||||
|
request = SearchRequest(
|
||||||
|
query="test",
|
||||||
|
search_types=["fulltext"],
|
||||||
|
rerank=True,
|
||||||
|
include_images=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
service = SearchService()
|
||||||
|
response = service.search(request)
|
||||||
|
|
||||||
|
self.assertFalse(response.reranker_used)
|
||||||
|
self.assertEqual(response.reranker_skip_reason, "no_candidates")
|
||||||
|
|
||||||
@patch("library.services.search.SearchService._fulltext_search")
|
@patch("library.services.search.SearchService._fulltext_search")
|
||||||
@patch("library.services.search.SearchService._embed_query")
|
@patch("library.services.search.SearchService._embed_query")
|
||||||
|
|||||||
@@ -208,6 +208,41 @@ class SearchAPIResponseTest(TestCase):
|
|||||||
self.assertEqual(call_args.search_types, ["fulltext"])
|
self.assertEqual(call_args.search_types, ["fulltext"])
|
||||||
self.assertFalse(call_args.rerank)
|
self.assertFalse(call_args.rerank)
|
||||||
|
|
||||||
|
@patch("library.api.views.SearchService")
|
||||||
|
def test_reranker_skip_reason_surfaced_in_json(self, MockService):
|
||||||
|
"""``reranker_skip_reason`` propagates through the JSON API."""
|
||||||
|
mock_response = SearchResponse(
|
||||||
|
query="neural networks",
|
||||||
|
candidates=[],
|
||||||
|
images=[],
|
||||||
|
total_candidates=0,
|
||||||
|
search_time_ms=10.0,
|
||||||
|
reranker_used=False,
|
||||||
|
reranker_model=None,
|
||||||
|
search_types_used=[],
|
||||||
|
reranker_skip_reason=(
|
||||||
|
"api_error: 404 Client Error: Not Found for url: "
|
||||||
|
"http://pan.helu.ca:8400/v1/v1/rerank"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
mock_instance = MockService.return_value
|
||||||
|
mock_instance.search.return_value = mock_response
|
||||||
|
|
||||||
|
response = self.client.post(
|
||||||
|
"/library/api/search/",
|
||||||
|
{"query": "neural networks"},
|
||||||
|
format="json",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
data = response.json()
|
||||||
|
self.assertFalse(data["reranker_used"])
|
||||||
|
self.assertIn("reranker_skip_reason", data)
|
||||||
|
self.assertTrue(
|
||||||
|
data["reranker_skip_reason"].startswith("api_error:"),
|
||||||
|
f"got {data['reranker_skip_reason']!r}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ConceptAPITest(TestCase):
|
class ConceptAPITest(TestCase):
|
||||||
"""Tests for concept API endpoints."""
|
"""Tests for concept API endpoints."""
|
||||||
|
|||||||
@@ -117,11 +117,15 @@ class SearchPageAllowedLibrariesTests(TestCase):
|
|||||||
)
|
)
|
||||||
self.client.force_login(self.user)
|
self.client.force_login(self.user)
|
||||||
|
|
||||||
def _patched_search(self):
|
def _patched_search(self, reranker_skip_reason=None):
|
||||||
"""Return a (request_capture, patch_context) pair.
|
"""Return a (request_capture, patch_context) pair.
|
||||||
|
|
||||||
The patch captures the ``SearchRequest`` that ``SearchService.search``
|
The patch captures the ``SearchRequest`` that ``SearchService.search``
|
||||||
is called with so assertions can run after the view returns.
|
is called with so assertions can run after the view returns.
|
||||||
|
|
||||||
|
:param reranker_skip_reason: Value to set on the stub response's
|
||||||
|
``reranker_skip_reason`` attribute, for tests that want to
|
||||||
|
exercise the "Skipped" badge rendering path.
|
||||||
"""
|
"""
|
||||||
capture: dict = {}
|
capture: dict = {}
|
||||||
|
|
||||||
@@ -137,6 +141,7 @@ class SearchPageAllowedLibrariesTests(TestCase):
|
|||||||
reranker_used=False,
|
reranker_used=False,
|
||||||
reranker_model=None,
|
reranker_model=None,
|
||||||
search_types_used=[],
|
search_types_used=[],
|
||||||
|
reranker_skip_reason=reranker_skip_reason,
|
||||||
)
|
)
|
||||||
|
|
||||||
return capture, patch(
|
return capture, patch(
|
||||||
@@ -178,3 +183,117 @@ class SearchPageAllowedLibrariesTests(TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsNone(capture["request"].allowed_libraries)
|
self.assertIsNone(capture["request"].allowed_libraries)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# search_page rerank-status rendering
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class SearchPageRerankBadgeTests(TestCase):
|
||||||
|
"""Verify the three-state Re-ranked indicator on the search page.
|
||||||
|
|
||||||
|
The badge must distinguish:
|
||||||
|
|
||||||
|
* Success (``reranker_used=True``) — green "Yes"
|
||||||
|
* Skipped (``rerank=True`` requested but ``reranker_skip_reason`` set)
|
||||||
|
— warning "Skipped" with the reason shown
|
||||||
|
* Off (user unchecked the re-rank box) — ghost "Off"
|
||||||
|
|
||||||
|
This guards the regression that surfaced when Synesis returned 404
|
||||||
|
on a mis-constructed rerank URL: the UI said "No" and gave no hint
|
||||||
|
the re-ranker had actually failed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.user = User.objects.create_user(
|
||||||
|
username="admin", email="a@example.com", password="pw"
|
||||||
|
)
|
||||||
|
self.client.force_login(self.user)
|
||||||
|
|
||||||
|
def _run(self, rerank_value, reranker_used, reranker_skip_reason):
|
||||||
|
capture: dict = {}
|
||||||
|
|
||||||
|
def fake_search(self, request):
|
||||||
|
capture["request"] = request
|
||||||
|
return SimpleNamespace(
|
||||||
|
query=request.query,
|
||||||
|
candidates=[],
|
||||||
|
images=[],
|
||||||
|
total_candidates=0,
|
||||||
|
search_time_ms=0.0,
|
||||||
|
reranker_used=reranker_used,
|
||||||
|
reranker_model=None,
|
||||||
|
search_types_used=[],
|
||||||
|
reranker_skip_reason=reranker_skip_reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
post_data = {"query": "postgresql"}
|
||||||
|
if rerank_value is not None:
|
||||||
|
post_data["rerank"] = rerank_value
|
||||||
|
|
||||||
|
with patch("library.views._all_library_uids", return_value=[]), \
|
||||||
|
patch("library.services.search.SearchService.search", fake_search):
|
||||||
|
response = self.client.post(reverse("library:search"), post_data)
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
return response, capture
|
||||||
|
|
||||||
|
def test_badge_shows_yes_when_rerank_succeeded(self):
|
||||||
|
response, _ = self._run(
|
||||||
|
rerank_value="on",
|
||||||
|
reranker_used=True,
|
||||||
|
reranker_skip_reason=None,
|
||||||
|
)
|
||||||
|
body = response.content.decode()
|
||||||
|
self.assertIn("badge-success", body)
|
||||||
|
self.assertIn(">Yes<", body)
|
||||||
|
self.assertNotIn(">Skipped<", body)
|
||||||
|
self.assertNotIn(">Off<", body)
|
||||||
|
|
||||||
|
def test_badge_shows_skipped_with_reason_on_api_error(self):
|
||||||
|
reason = (
|
||||||
|
"api_error: 404 Client Error: Not Found for url: "
|
||||||
|
"http://pan.helu.ca:8400/v1/v1/rerank"
|
||||||
|
)
|
||||||
|
response, capture = self._run(
|
||||||
|
rerank_value="on",
|
||||||
|
reranker_used=False,
|
||||||
|
reranker_skip_reason=reason,
|
||||||
|
)
|
||||||
|
# Sanity: the view actually requested re-ranking.
|
||||||
|
self.assertTrue(capture["request"].rerank)
|
||||||
|
|
||||||
|
body = response.content.decode()
|
||||||
|
self.assertIn(">Skipped", body)
|
||||||
|
# Reason shown in-page so the user can debug without grepping logs.
|
||||||
|
# Django auto-escapes the colon-space and URL, which is fine.
|
||||||
|
self.assertIn("api_error:", body)
|
||||||
|
self.assertIn("404", body)
|
||||||
|
# Must not claim success.
|
||||||
|
self.assertNotIn(">Yes<", body)
|
||||||
|
|
||||||
|
def test_badge_shows_skipped_on_no_system_model(self):
|
||||||
|
response, _ = self._run(
|
||||||
|
rerank_value="on",
|
||||||
|
reranker_used=False,
|
||||||
|
reranker_skip_reason="no_system_model",
|
||||||
|
)
|
||||||
|
body = response.content.decode()
|
||||||
|
self.assertIn(">Skipped", body)
|
||||||
|
self.assertIn("no_system_model", body)
|
||||||
|
|
||||||
|
def test_badge_shows_off_when_rerank_unchecked(self):
|
||||||
|
# HTML checkbox form: unchecked checkboxes are simply omitted
|
||||||
|
# from the POST body, so we pass rerank_value=None (not "off").
|
||||||
|
response, capture = self._run(
|
||||||
|
rerank_value=None,
|
||||||
|
reranker_used=False,
|
||||||
|
reranker_skip_reason=None,
|
||||||
|
)
|
||||||
|
self.assertFalse(capture["request"].rerank)
|
||||||
|
|
||||||
|
body = response.content.decode()
|
||||||
|
self.assertIn(">Off<", body)
|
||||||
|
self.assertNotIn(">Skipped", body)
|
||||||
|
self.assertNotIn(">Yes<", body)
|
||||||
|
|||||||
Reference in New Issue
Block a user