fix(library): admin UI search now sees workspace-scoped libraries
Root cause
----------
SearchService unconditionally appends _WORKSPACE_SCOPE_CLAUSE to every
Cypher query. With both workspace_id and allowed_libraries NULL, the
clause only matches libraries whose workspace_id is also NULL:
AND ( ($workspace_id IS NOT NULL AND lib.workspace_id = $workspace_id)
OR ($allowed_libraries IS NOT NULL AND lib.uid IN $allowed_libraries)
OR ($workspace_id IS NULL AND $allowed_libraries IS NULL
AND lib.workspace_id IS NULL) )
search_page and library_search both built their SearchRequest without
setting either parameter, so the third branch was always the only one
that matched. Every Daedalus-ingested library carries a non-null
workspace_id, so documents ingested via Daedalus were invisible to the
/library/search/ admin UI — the symptom being zero results for terms
that demonstrably exist in indexed chunks.
Fix
---
Both admin-UI views are `@login_required` debug/admin tools for
Django-authenticated operators, not MCP endpoints — they have no
workspace-scoping contract to honour. Added `_all_library_uids()`
helper that returns every Library UID (or [] when Neo4j is down / a
neomodel error bubbles up) and wired it into both views as
`allowed_libraries=`. This flips the scope clause into its second
branch ('lib.uid IN $allowed_libraries'), which matches every library
regardless of workspace_id — reusing the exact mechanism Phase-2 chat
turns use for user-managed libraries.
SearchRequest.__post_init__ collapses an empty list to None, so an
unreachable Neo4j gracefully reverts to the legacy global-only
behaviour rather than 500-ing the page.
Tests
-----
library/tests/test_search_views_admin_scope.py:
* AllLibraryUidsHelperTests — Neo4j unavailable, normal listing,
empty/None-uid filtering, unexpected-exception degradation.
* SearchPageAllowedLibrariesTests — admin POST to /library/search/
reaches SearchService with the captured list; empty list collapses
to None. Stubs SearchService.search to keep the test hermetic.
6 new tests; all 16 tests in library.tests.test_search* are green:
TEST_NEO4J_ENABLED=0 python manage.py test \
library.tests.test_search_views_admin_scope \
library.tests.test_search_scoping \
--testrunner=test_db_manager.django_integration.PostgreSQLTestRunner
This commit is contained in:
180
mnemosyne/library/tests/test_search_views_admin_scope.py
Normal file
180
mnemosyne/library/tests/test_search_views_admin_scope.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Tests for the admin-UI search views' interaction with ``allowed_libraries``.
|
||||
|
||||
The two views in ``library/views.py`` that back the Mnemosyne HTML
|
||||
search surface (``search_page`` for the global page, ``library_search``
|
||||
for the per-library page) are ``@login_required`` debug/admin tools for
|
||||
Django-authenticated operators — not MCP endpoints. They must therefore
|
||||
see *every* library, including Daedalus workspace-scoped ones whose
|
||||
``workspace_id`` is non-null.
|
||||
|
||||
``SearchService`` unconditionally appends ``_WORKSPACE_SCOPE_CLAUSE`` to
|
||||
every Cypher query. Without ``workspace_id`` or ``allowed_libraries``
|
||||
set, the clause matches only libraries with ``workspace_id IS NULL`` —
|
||||
so a Daedalus-ingested document is silently invisible to the admin UI.
|
||||
|
||||
These tests cover:
|
||||
|
||||
* ``_all_library_uids`` — returns every UID when Neo4j is reachable,
|
||||
returns ``[]`` when it isn't, and swallows unexpected model errors
|
||||
rather than 500-ing the whole page.
|
||||
* Defence in depth: make sure the admin views actually pass that list
|
||||
into ``SearchRequest`` so a future refactor doesn't silently
|
||||
re-introduce the "zero results for workspace libraries" bug.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
from library import views
|
||||
|
||||
User = get_user_model()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _all_library_uids
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class AllLibraryUidsHelperTests(TestCase):
|
||||
"""Cover every branch of ``library.views._all_library_uids``."""
|
||||
|
||||
def test_returns_empty_when_neo4j_unavailable(self):
|
||||
"""Helper must not touch ``Library.nodes`` if Neo4j is down."""
|
||||
with patch("library.views.neo4j_available", return_value=False):
|
||||
self.assertEqual(views._all_library_uids(), [])
|
||||
|
||||
def test_returns_every_library_uid(self):
|
||||
"""All nodes returned from ``Library.nodes.all()`` are enumerated."""
|
||||
fake_libs = [
|
||||
SimpleNamespace(uid="lib_workspace_1"),
|
||||
SimpleNamespace(uid="lib_global_2"),
|
||||
SimpleNamespace(uid="lib_workspace_3"),
|
||||
]
|
||||
fake_nodes = MagicMock()
|
||||
fake_nodes.all.return_value = fake_libs
|
||||
fake_library_cls = SimpleNamespace(nodes=fake_nodes)
|
||||
|
||||
with patch("library.views.neo4j_available", return_value=True), \
|
||||
patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}):
|
||||
result = views._all_library_uids()
|
||||
|
||||
self.assertEqual(
|
||||
sorted(result),
|
||||
["lib_global_2", "lib_workspace_1", "lib_workspace_3"],
|
||||
)
|
||||
|
||||
def test_skips_nodes_with_empty_uid(self):
|
||||
"""Defensive filter — a node with no uid would break the Cypher IN."""
|
||||
fake_libs = [
|
||||
SimpleNamespace(uid="lib_ok"),
|
||||
SimpleNamespace(uid=""),
|
||||
SimpleNamespace(uid=None),
|
||||
SimpleNamespace(uid="lib_ok_2"),
|
||||
]
|
||||
fake_nodes = MagicMock()
|
||||
fake_nodes.all.return_value = fake_libs
|
||||
fake_library_cls = SimpleNamespace(nodes=fake_nodes)
|
||||
|
||||
with patch("library.views.neo4j_available", return_value=True), \
|
||||
patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}):
|
||||
result = views._all_library_uids()
|
||||
|
||||
self.assertEqual(sorted(result), ["lib_ok", "lib_ok_2"])
|
||||
|
||||
def test_returns_empty_on_unexpected_exception(self):
|
||||
"""Neo4j hiccup must not break the whole search page — log & degrade."""
|
||||
fake_nodes = MagicMock()
|
||||
fake_nodes.all.side_effect = RuntimeError("neo4j blew up")
|
||||
fake_library_cls = SimpleNamespace(nodes=fake_nodes)
|
||||
|
||||
with patch("library.views.neo4j_available", return_value=True), \
|
||||
patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}):
|
||||
self.assertEqual(views._all_library_uids(), [])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# search_page wiring
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SearchPageAllowedLibrariesTests(TestCase):
|
||||
"""Verify ``search_page`` forwards every library UID as ``allowed_libraries``.
|
||||
|
||||
Stubs out ``_all_library_uids`` and ``SearchService`` so the test
|
||||
asserts on the ``SearchRequest`` that actually reaches the service
|
||||
— not on Neo4j behaviour — keeping the test hermetic.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(
|
||||
username="admin", email="a@example.com", password="pw"
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
|
||||
def _patched_search(self):
|
||||
"""Return a (request_capture, patch_context) pair.
|
||||
|
||||
The patch captures the ``SearchRequest`` that ``SearchService.search``
|
||||
is called with so assertions can run after the view returns.
|
||||
"""
|
||||
capture: dict = {}
|
||||
|
||||
def fake_search(self, request):
|
||||
capture["request"] = request
|
||||
# Return a minimally-shaped response to satisfy the template.
|
||||
return SimpleNamespace(
|
||||
query=request.query,
|
||||
candidates=[],
|
||||
images=[],
|
||||
total_candidates=0,
|
||||
search_time_ms=0.0,
|
||||
reranker_used=False,
|
||||
reranker_model=None,
|
||||
search_types_used=[],
|
||||
)
|
||||
|
||||
return capture, patch(
|
||||
"library.services.search.SearchService.search", fake_search
|
||||
)
|
||||
|
||||
def test_search_page_forwards_allowed_libraries(self):
|
||||
capture, patched = self._patched_search()
|
||||
with patch(
|
||||
"library.views._all_library_uids",
|
||||
return_value=["lib_ws_a", "lib_ws_b"],
|
||||
), patched:
|
||||
response = self.client.post(
|
||||
reverse("library:search"),
|
||||
{"query": "contract renewal", "rerank": "on"},
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
req = capture["request"]
|
||||
self.assertEqual(req.query, "contract renewal")
|
||||
self.assertEqual(req.allowed_libraries, ["lib_ws_a", "lib_ws_b"])
|
||||
# workspace_id must stay None so the "admin sees everything"
|
||||
# second branch of _WORKSPACE_SCOPE_CLAUSE is the active one.
|
||||
self.assertIsNone(req.workspace_id)
|
||||
|
||||
def test_search_page_empty_library_list_collapses_to_none(self):
|
||||
"""When Neo4j is down / there are zero libraries, fall back cleanly.
|
||||
|
||||
``SearchRequest.__post_init__`` converts an empty list to ``None``,
|
||||
which reverts to the legacy global-only behaviour. That's
|
||||
acceptable as a degraded fallback: the page is already broken if
|
||||
Neo4j has nothing in it.
|
||||
"""
|
||||
capture, patched = self._patched_search()
|
||||
with patch("library.views._all_library_uids", return_value=[]), patched:
|
||||
self.client.post(
|
||||
reverse("library:search"),
|
||||
{"query": "anything", "rerank": "on"},
|
||||
)
|
||||
|
||||
self.assertIsNone(capture["request"].allowed_libraries)
|
||||
@@ -140,6 +140,43 @@ def library_detail(request, uid):
|
||||
_MAX_QUERY_IMAGE_BYTES = 8 * 1024 * 1024
|
||||
|
||||
|
||||
def _all_library_uids() -> list[str]:
|
||||
"""Return the UIDs of every Library node in Neo4j.
|
||||
|
||||
The Django-side HTML search views (``search_page`` and
|
||||
``library_search``) are admin/debug tools gated by ``@login_required``
|
||||
against a local Django account; they are not exposed to external
|
||||
MCP callers and have no workspace-scoping contract to honour.
|
||||
|
||||
The underlying ``SearchService`` always appends
|
||||
``_WORKSPACE_SCOPE_CLAUSE`` to every Cypher query, and that clause's
|
||||
default branch — "``$workspace_id`` IS NULL AND ``$allowed_libraries``
|
||||
IS NULL" — only matches libraries whose own ``workspace_id`` is
|
||||
``NULL``. So an authenticated admin searching from the UI would
|
||||
silently miss every Daedalus-ingested document, because those
|
||||
libraries always carry a non-null ``workspace_id``.
|
||||
|
||||
Passing the full set of library UIDs as ``allowed_libraries`` flips
|
||||
the clause into its second branch
|
||||
(``lib.uid IN $allowed_libraries``) which matches every library
|
||||
regardless of ``workspace_id``. This reuses the exact mechanism
|
||||
Phase-2 chat turns use for "user-managed libraries"; we're simply
|
||||
granting the admin access to all of them. Returning ``[]`` is fine
|
||||
when Neo4j is unreachable — ``SearchRequest.__post_init__``
|
||||
collapses an empty list to ``None``, reverting to the legacy global
|
||||
behaviour.
|
||||
"""
|
||||
if not neo4j_available():
|
||||
return []
|
||||
try:
|
||||
from .models import Library
|
||||
|
||||
return [lib.uid for lib in Library.nodes.all() if lib.uid]
|
||||
except Exception as exc: # pragma: no cover - Neo4j unreachable paths
|
||||
logger.warning("Failed to enumerate library UIDs for search: %s", exc)
|
||||
return []
|
||||
|
||||
|
||||
@login_required
|
||||
def library_search(request, uid):
|
||||
"""
|
||||
@@ -191,12 +228,19 @@ def library_search(request, uid):
|
||||
|
||||
from .services.search import SearchRequest, SearchService
|
||||
|
||||
# Admin UI searches every library the operator can see — including
|
||||
# Daedalus workspace-scoped ones. See ``_all_library_uids`` for
|
||||
# why this is needed (the default scope clause branch hides any
|
||||
# library with a non-null ``workspace_id``).
|
||||
allowed = _all_library_uids()
|
||||
|
||||
def _make_request(rerank: bool) -> "SearchRequest":
|
||||
return SearchRequest(
|
||||
query=query,
|
||||
query_image=image_bytes,
|
||||
query_image_ext=image_ext,
|
||||
library_uid=uid,
|
||||
allowed_libraries=allowed,
|
||||
limit=getattr(django_settings, "SEARCH_DEFAULT_LIMIT", 20),
|
||||
vector_top_k=getattr(django_settings, "SEARCH_VECTOR_TOP_K", 50),
|
||||
fulltext_top_k=getattr(django_settings, "SEARCH_FULLTEXT_TOP_K", 30),
|
||||
@@ -786,6 +830,11 @@ def search_page(request):
|
||||
query=query,
|
||||
library_uid=library_uid or None,
|
||||
library_type=library_type or None,
|
||||
# Admin UI sees everything — workspace-scoped libraries
|
||||
# included. Without this, ``_WORKSPACE_SCOPE_CLAUSE``
|
||||
# falls back to its "global-only" branch and silently
|
||||
# hides all Daedalus-ingested content.
|
||||
allowed_libraries=_all_library_uids(),
|
||||
limit=getattr(django_settings, "SEARCH_DEFAULT_LIMIT", 20),
|
||||
vector_top_k=getattr(django_settings, "SEARCH_VECTOR_TOP_K", 50),
|
||||
fulltext_top_k=getattr(django_settings, "SEARCH_FULLTEXT_TOP_K", 30),
|
||||
|
||||
Reference in New Issue
Block a user