fix(library): admin UI search now sees workspace-scoped libraries
All checks were successful
CVE Scan & Docker Build / security-scan (push) Successful in 53s
CVE Scan & Docker Build / build-and-push (push) Successful in 2m34s

Root cause
----------
SearchService unconditionally appends _WORKSPACE_SCOPE_CLAUSE to every
Cypher query. With both workspace_id and allowed_libraries NULL, the
clause only matches libraries whose workspace_id is also NULL:

    AND ( ($workspace_id IS NOT NULL AND lib.workspace_id = $workspace_id)
       OR ($allowed_libraries IS NOT NULL AND lib.uid IN $allowed_libraries)
       OR ($workspace_id IS NULL AND $allowed_libraries IS NULL
           AND lib.workspace_id IS NULL) )

search_page and library_search both built their SearchRequest without
setting either parameter, so the third branch was always the only one
that matched. Every Daedalus-ingested library carries a non-null
workspace_id, so documents ingested via Daedalus were invisible to the
/library/search/ admin UI — the symptom being zero results for terms
that demonstrably exist in indexed chunks.

Fix
---
Both admin-UI views are `@login_required` debug/admin tools for
Django-authenticated operators, not MCP endpoints — they have no
workspace-scoping contract to honour. Added `_all_library_uids()`
helper that returns every Library UID (or [] when Neo4j is down / a
neomodel error bubbles up) and wired it into both views as
`allowed_libraries=`. This flips the scope clause into its second
branch ('lib.uid IN $allowed_libraries'), which matches every library
regardless of workspace_id — reusing the exact mechanism Phase-2 chat
turns use for user-managed libraries.

SearchRequest.__post_init__ collapses an empty list to None, so an
unreachable Neo4j gracefully reverts to the legacy global-only
behaviour rather than 500-ing the page.

Tests
-----
library/tests/test_search_views_admin_scope.py:
* AllLibraryUidsHelperTests — Neo4j unavailable, normal listing,
  empty/None-uid filtering, unexpected-exception degradation.
* SearchPageAllowedLibrariesTests — admin POST to /library/search/
  reaches SearchService with the captured list; empty list collapses
  to None. Stubs SearchService.search to keep the test hermetic.

6 new tests; all 16 tests in library.tests.test_search* are green:

    TEST_NEO4J_ENABLED=0 python manage.py test \
      library.tests.test_search_views_admin_scope \
      library.tests.test_search_scoping \
      --testrunner=test_db_manager.django_integration.PostgreSQLTestRunner
This commit is contained in:
2026-05-09 21:54:30 -04:00
parent 642268cec1
commit 9ceb01f829
2 changed files with 229 additions and 0 deletions

View File

@@ -0,0 +1,180 @@
"""Tests for the admin-UI search views' interaction with ``allowed_libraries``.
The two views in ``library/views.py`` that back the Mnemosyne HTML
search surface (``search_page`` for the global page, ``library_search``
for the per-library page) are ``@login_required`` debug/admin tools for
Django-authenticated operators — not MCP endpoints. They must therefore
see *every* library, including Daedalus workspace-scoped ones whose
``workspace_id`` is non-null.
``SearchService`` unconditionally appends ``_WORKSPACE_SCOPE_CLAUSE`` to
every Cypher query. Without ``workspace_id`` or ``allowed_libraries``
set, the clause matches only libraries with ``workspace_id IS NULL`` —
so a Daedalus-ingested document is silently invisible to the admin UI.
These tests cover:
* ``_all_library_uids`` — returns every UID when Neo4j is reachable,
returns ``[]`` when it isn't, and swallows unexpected model errors
rather than 500-ing the whole page.
* Defence in depth: make sure the admin views actually pass that list
into ``SearchRequest`` so a future refactor doesn't silently
re-introduce the "zero results for workspace libraries" bug.
"""
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
from django.contrib.auth import get_user_model
from django.test import TestCase
from django.urls import reverse
from library import views
User = get_user_model()
# ---------------------------------------------------------------------------
# _all_library_uids
# ---------------------------------------------------------------------------
class AllLibraryUidsHelperTests(TestCase):
"""Cover every branch of ``library.views._all_library_uids``."""
def test_returns_empty_when_neo4j_unavailable(self):
"""Helper must not touch ``Library.nodes`` if Neo4j is down."""
with patch("library.views.neo4j_available", return_value=False):
self.assertEqual(views._all_library_uids(), [])
def test_returns_every_library_uid(self):
"""All nodes returned from ``Library.nodes.all()`` are enumerated."""
fake_libs = [
SimpleNamespace(uid="lib_workspace_1"),
SimpleNamespace(uid="lib_global_2"),
SimpleNamespace(uid="lib_workspace_3"),
]
fake_nodes = MagicMock()
fake_nodes.all.return_value = fake_libs
fake_library_cls = SimpleNamespace(nodes=fake_nodes)
with patch("library.views.neo4j_available", return_value=True), \
patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}):
result = views._all_library_uids()
self.assertEqual(
sorted(result),
["lib_global_2", "lib_workspace_1", "lib_workspace_3"],
)
def test_skips_nodes_with_empty_uid(self):
"""Defensive filter — a node with no uid would break the Cypher IN."""
fake_libs = [
SimpleNamespace(uid="lib_ok"),
SimpleNamespace(uid=""),
SimpleNamespace(uid=None),
SimpleNamespace(uid="lib_ok_2"),
]
fake_nodes = MagicMock()
fake_nodes.all.return_value = fake_libs
fake_library_cls = SimpleNamespace(nodes=fake_nodes)
with patch("library.views.neo4j_available", return_value=True), \
patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}):
result = views._all_library_uids()
self.assertEqual(sorted(result), ["lib_ok", "lib_ok_2"])
def test_returns_empty_on_unexpected_exception(self):
"""Neo4j hiccup must not break the whole search page — log & degrade."""
fake_nodes = MagicMock()
fake_nodes.all.side_effect = RuntimeError("neo4j blew up")
fake_library_cls = SimpleNamespace(nodes=fake_nodes)
with patch("library.views.neo4j_available", return_value=True), \
patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}):
self.assertEqual(views._all_library_uids(), [])
# ---------------------------------------------------------------------------
# search_page wiring
# ---------------------------------------------------------------------------
class SearchPageAllowedLibrariesTests(TestCase):
"""Verify ``search_page`` forwards every library UID as ``allowed_libraries``.
Stubs out ``_all_library_uids`` and ``SearchService`` so the test
asserts on the ``SearchRequest`` that actually reaches the service
— not on Neo4j behaviour — keeping the test hermetic.
"""
def setUp(self):
self.user = User.objects.create_user(
username="admin", email="a@example.com", password="pw"
)
self.client.force_login(self.user)
def _patched_search(self):
"""Return a (request_capture, patch_context) pair.
The patch captures the ``SearchRequest`` that ``SearchService.search``
is called with so assertions can run after the view returns.
"""
capture: dict = {}
def fake_search(self, request):
capture["request"] = request
# Return a minimally-shaped response to satisfy the template.
return SimpleNamespace(
query=request.query,
candidates=[],
images=[],
total_candidates=0,
search_time_ms=0.0,
reranker_used=False,
reranker_model=None,
search_types_used=[],
)
return capture, patch(
"library.services.search.SearchService.search", fake_search
)
def test_search_page_forwards_allowed_libraries(self):
capture, patched = self._patched_search()
with patch(
"library.views._all_library_uids",
return_value=["lib_ws_a", "lib_ws_b"],
), patched:
response = self.client.post(
reverse("library:search"),
{"query": "contract renewal", "rerank": "on"},
)
self.assertEqual(response.status_code, 200)
req = capture["request"]
self.assertEqual(req.query, "contract renewal")
self.assertEqual(req.allowed_libraries, ["lib_ws_a", "lib_ws_b"])
# workspace_id must stay None so the "admin sees everything"
# second branch of _WORKSPACE_SCOPE_CLAUSE is the active one.
self.assertIsNone(req.workspace_id)
def test_search_page_empty_library_list_collapses_to_none(self):
"""When Neo4j is down / there are zero libraries, fall back cleanly.
``SearchRequest.__post_init__`` converts an empty list to ``None``,
which reverts to the legacy global-only behaviour. That's
acceptable as a degraded fallback: the page is already broken if
Neo4j has nothing in it.
"""
capture, patched = self._patched_search()
with patch("library.views._all_library_uids", return_value=[]), patched:
self.client.post(
reverse("library:search"),
{"query": "anything", "rerank": "on"},
)
self.assertIsNone(capture["request"].allowed_libraries)

View File

@@ -140,6 +140,43 @@ def library_detail(request, uid):
_MAX_QUERY_IMAGE_BYTES = 8 * 1024 * 1024 _MAX_QUERY_IMAGE_BYTES = 8 * 1024 * 1024
def _all_library_uids() -> list[str]:
"""Return the UIDs of every Library node in Neo4j.
The Django-side HTML search views (``search_page`` and
``library_search``) are admin/debug tools gated by ``@login_required``
against a local Django account; they are not exposed to external
MCP callers and have no workspace-scoping contract to honour.
The underlying ``SearchService`` always appends
``_WORKSPACE_SCOPE_CLAUSE`` to every Cypher query, and that clause's
default branch — "``$workspace_id`` IS NULL AND ``$allowed_libraries``
IS NULL" — only matches libraries whose own ``workspace_id`` is
``NULL``. So an authenticated admin searching from the UI would
silently miss every Daedalus-ingested document, because those
libraries always carry a non-null ``workspace_id``.
Passing the full set of library UIDs as ``allowed_libraries`` flips
the clause into its second branch
(``lib.uid IN $allowed_libraries``) which matches every library
regardless of ``workspace_id``. This reuses the exact mechanism
Phase-2 chat turns use for "user-managed libraries"; we're simply
granting the admin access to all of them. Returning ``[]`` is fine
when Neo4j is unreachable — ``SearchRequest.__post_init__``
collapses an empty list to ``None``, reverting to the legacy global
behaviour.
"""
if not neo4j_available():
return []
try:
from .models import Library
return [lib.uid for lib in Library.nodes.all() if lib.uid]
except Exception as exc: # pragma: no cover - Neo4j unreachable paths
logger.warning("Failed to enumerate library UIDs for search: %s", exc)
return []
@login_required @login_required
def library_search(request, uid): def library_search(request, uid):
""" """
@@ -191,12 +228,19 @@ def library_search(request, uid):
from .services.search import SearchRequest, SearchService from .services.search import SearchRequest, SearchService
# Admin UI searches every library the operator can see — including
# Daedalus workspace-scoped ones. See ``_all_library_uids`` for
# why this is needed (the default scope clause branch hides any
# library with a non-null ``workspace_id``).
allowed = _all_library_uids()
def _make_request(rerank: bool) -> "SearchRequest": def _make_request(rerank: bool) -> "SearchRequest":
return SearchRequest( return SearchRequest(
query=query, query=query,
query_image=image_bytes, query_image=image_bytes,
query_image_ext=image_ext, query_image_ext=image_ext,
library_uid=uid, library_uid=uid,
allowed_libraries=allowed,
limit=getattr(django_settings, "SEARCH_DEFAULT_LIMIT", 20), limit=getattr(django_settings, "SEARCH_DEFAULT_LIMIT", 20),
vector_top_k=getattr(django_settings, "SEARCH_VECTOR_TOP_K", 50), vector_top_k=getattr(django_settings, "SEARCH_VECTOR_TOP_K", 50),
fulltext_top_k=getattr(django_settings, "SEARCH_FULLTEXT_TOP_K", 30), fulltext_top_k=getattr(django_settings, "SEARCH_FULLTEXT_TOP_K", 30),
@@ -786,6 +830,11 @@ def search_page(request):
query=query, query=query,
library_uid=library_uid or None, library_uid=library_uid or None,
library_type=library_type or None, library_type=library_type or None,
# Admin UI sees everything — workspace-scoped libraries
# included. Without this, ``_WORKSPACE_SCOPE_CLAUSE``
# falls back to its "global-only" branch and silently
# hides all Daedalus-ingested content.
allowed_libraries=_all_library_uids(),
limit=getattr(django_settings, "SEARCH_DEFAULT_LIMIT", 20), limit=getattr(django_settings, "SEARCH_DEFAULT_LIMIT", 20),
vector_top_k=getattr(django_settings, "SEARCH_VECTOR_TOP_K", 50), vector_top_k=getattr(django_settings, "SEARCH_VECTOR_TOP_K", 50),
fulltext_top_k=getattr(django_settings, "SEARCH_FULLTEXT_TOP_K", 30), fulltext_top_k=getattr(django_settings, "SEARCH_FULLTEXT_TOP_K", 30),