diff --git a/mnemosyne/library/tests/test_search_views_admin_scope.py b/mnemosyne/library/tests/test_search_views_admin_scope.py new file mode 100644 index 0000000..e70903e --- /dev/null +++ b/mnemosyne/library/tests/test_search_views_admin_scope.py @@ -0,0 +1,180 @@ +"""Tests for the admin-UI search views' interaction with ``allowed_libraries``. + +The two views in ``library/views.py`` that back the Mnemosyne HTML +search surface (``search_page`` for the global page, ``library_search`` +for the per-library page) are ``@login_required`` debug/admin tools for +Django-authenticated operators — not MCP endpoints. They must therefore +see *every* library, including Daedalus workspace-scoped ones whose +``workspace_id`` is non-null. + +``SearchService`` unconditionally appends ``_WORKSPACE_SCOPE_CLAUSE`` to +every Cypher query. Without ``workspace_id`` or ``allowed_libraries`` +set, the clause matches only libraries with ``workspace_id IS NULL`` — +so a Daedalus-ingested document is silently invisible to the admin UI. + +These tests cover: + +* ``_all_library_uids`` — returns every UID when Neo4j is reachable, + returns ``[]`` when it isn't, and swallows unexpected model errors + rather than 500-ing the whole page. +* Defence in depth: make sure the admin views actually pass that list + into ``SearchRequest`` so a future refactor doesn't silently + re-introduce the "zero results for workspace libraries" bug. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from django.contrib.auth import get_user_model +from django.test import TestCase +from django.urls import reverse + +from library import views + +User = get_user_model() + + +# --------------------------------------------------------------------------- +# _all_library_uids +# --------------------------------------------------------------------------- + + +class AllLibraryUidsHelperTests(TestCase): + """Cover every branch of ``library.views._all_library_uids``.""" + + def test_returns_empty_when_neo4j_unavailable(self): + """Helper must not touch ``Library.nodes`` if Neo4j is down.""" + with patch("library.views.neo4j_available", return_value=False): + self.assertEqual(views._all_library_uids(), []) + + def test_returns_every_library_uid(self): + """All nodes returned from ``Library.nodes.all()`` are enumerated.""" + fake_libs = [ + SimpleNamespace(uid="lib_workspace_1"), + SimpleNamespace(uid="lib_global_2"), + SimpleNamespace(uid="lib_workspace_3"), + ] + fake_nodes = MagicMock() + fake_nodes.all.return_value = fake_libs + fake_library_cls = SimpleNamespace(nodes=fake_nodes) + + with patch("library.views.neo4j_available", return_value=True), \ + patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}): + result = views._all_library_uids() + + self.assertEqual( + sorted(result), + ["lib_global_2", "lib_workspace_1", "lib_workspace_3"], + ) + + def test_skips_nodes_with_empty_uid(self): + """Defensive filter — a node with no uid would break the Cypher IN.""" + fake_libs = [ + SimpleNamespace(uid="lib_ok"), + SimpleNamespace(uid=""), + SimpleNamespace(uid=None), + SimpleNamespace(uid="lib_ok_2"), + ] + fake_nodes = MagicMock() + fake_nodes.all.return_value = fake_libs + fake_library_cls = SimpleNamespace(nodes=fake_nodes) + + with patch("library.views.neo4j_available", return_value=True), \ + patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}): + result = views._all_library_uids() + + self.assertEqual(sorted(result), ["lib_ok", "lib_ok_2"]) + + def test_returns_empty_on_unexpected_exception(self): + """Neo4j hiccup must not break the whole search page — log & degrade.""" + fake_nodes = MagicMock() + fake_nodes.all.side_effect = RuntimeError("neo4j blew up") + fake_library_cls = SimpleNamespace(nodes=fake_nodes) + + with patch("library.views.neo4j_available", return_value=True), \ + patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}): + self.assertEqual(views._all_library_uids(), []) + + +# --------------------------------------------------------------------------- +# search_page wiring +# --------------------------------------------------------------------------- + + +class SearchPageAllowedLibrariesTests(TestCase): + """Verify ``search_page`` forwards every library UID as ``allowed_libraries``. + + Stubs out ``_all_library_uids`` and ``SearchService`` so the test + asserts on the ``SearchRequest`` that actually reaches the service + — not on Neo4j behaviour — keeping the test hermetic. + """ + + def setUp(self): + self.user = User.objects.create_user( + username="admin", email="a@example.com", password="pw" + ) + self.client.force_login(self.user) + + def _patched_search(self): + """Return a (request_capture, patch_context) pair. + + The patch captures the ``SearchRequest`` that ``SearchService.search`` + is called with so assertions can run after the view returns. + """ + capture: dict = {} + + def fake_search(self, request): + capture["request"] = request + # Return a minimally-shaped response to satisfy the template. + return SimpleNamespace( + query=request.query, + candidates=[], + images=[], + total_candidates=0, + search_time_ms=0.0, + reranker_used=False, + reranker_model=None, + search_types_used=[], + ) + + return capture, patch( + "library.services.search.SearchService.search", fake_search + ) + + def test_search_page_forwards_allowed_libraries(self): + capture, patched = self._patched_search() + with patch( + "library.views._all_library_uids", + return_value=["lib_ws_a", "lib_ws_b"], + ), patched: + response = self.client.post( + reverse("library:search"), + {"query": "contract renewal", "rerank": "on"}, + ) + + self.assertEqual(response.status_code, 200) + req = capture["request"] + self.assertEqual(req.query, "contract renewal") + self.assertEqual(req.allowed_libraries, ["lib_ws_a", "lib_ws_b"]) + # workspace_id must stay None so the "admin sees everything" + # second branch of _WORKSPACE_SCOPE_CLAUSE is the active one. + self.assertIsNone(req.workspace_id) + + def test_search_page_empty_library_list_collapses_to_none(self): + """When Neo4j is down / there are zero libraries, fall back cleanly. + + ``SearchRequest.__post_init__`` converts an empty list to ``None``, + which reverts to the legacy global-only behaviour. That's + acceptable as a degraded fallback: the page is already broken if + Neo4j has nothing in it. + """ + capture, patched = self._patched_search() + with patch("library.views._all_library_uids", return_value=[]), patched: + self.client.post( + reverse("library:search"), + {"query": "anything", "rerank": "on"}, + ) + + self.assertIsNone(capture["request"].allowed_libraries) diff --git a/mnemosyne/library/views.py b/mnemosyne/library/views.py index 2e36a17..1d4be7e 100644 --- a/mnemosyne/library/views.py +++ b/mnemosyne/library/views.py @@ -140,6 +140,43 @@ def library_detail(request, uid): _MAX_QUERY_IMAGE_BYTES = 8 * 1024 * 1024 +def _all_library_uids() -> list[str]: + """Return the UIDs of every Library node in Neo4j. + + The Django-side HTML search views (``search_page`` and + ``library_search``) are admin/debug tools gated by ``@login_required`` + against a local Django account; they are not exposed to external + MCP callers and have no workspace-scoping contract to honour. + + The underlying ``SearchService`` always appends + ``_WORKSPACE_SCOPE_CLAUSE`` to every Cypher query, and that clause's + default branch — "``$workspace_id`` IS NULL AND ``$allowed_libraries`` + IS NULL" — only matches libraries whose own ``workspace_id`` is + ``NULL``. So an authenticated admin searching from the UI would + silently miss every Daedalus-ingested document, because those + libraries always carry a non-null ``workspace_id``. + + Passing the full set of library UIDs as ``allowed_libraries`` flips + the clause into its second branch + (``lib.uid IN $allowed_libraries``) which matches every library + regardless of ``workspace_id``. This reuses the exact mechanism + Phase-2 chat turns use for "user-managed libraries"; we're simply + granting the admin access to all of them. Returning ``[]`` is fine + when Neo4j is unreachable — ``SearchRequest.__post_init__`` + collapses an empty list to ``None``, reverting to the legacy global + behaviour. + """ + if not neo4j_available(): + return [] + try: + from .models import Library + + return [lib.uid for lib in Library.nodes.all() if lib.uid] + except Exception as exc: # pragma: no cover - Neo4j unreachable paths + logger.warning("Failed to enumerate library UIDs for search: %s", exc) + return [] + + @login_required def library_search(request, uid): """ @@ -191,12 +228,19 @@ def library_search(request, uid): from .services.search import SearchRequest, SearchService + # Admin UI searches every library the operator can see — including + # Daedalus workspace-scoped ones. See ``_all_library_uids`` for + # why this is needed (the default scope clause branch hides any + # library with a non-null ``workspace_id``). + allowed = _all_library_uids() + def _make_request(rerank: bool) -> "SearchRequest": return SearchRequest( query=query, query_image=image_bytes, query_image_ext=image_ext, library_uid=uid, + allowed_libraries=allowed, limit=getattr(django_settings, "SEARCH_DEFAULT_LIMIT", 20), vector_top_k=getattr(django_settings, "SEARCH_VECTOR_TOP_K", 50), fulltext_top_k=getattr(django_settings, "SEARCH_FULLTEXT_TOP_K", 30), @@ -786,6 +830,11 @@ def search_page(request): query=query, library_uid=library_uid or None, library_type=library_type or None, + # Admin UI sees everything — workspace-scoped libraries + # included. Without this, ``_WORKSPACE_SCOPE_CLAUSE`` + # falls back to its "global-only" branch and silently + # hides all Daedalus-ingested content. + allowed_libraries=_all_library_uids(), limit=getattr(django_settings, "SEARCH_DEFAULT_LIMIT", 20), vector_top_k=getattr(django_settings, "SEARCH_VECTOR_TOP_K", 50), fulltext_top_k=getattr(django_settings, "SEARCH_FULLTEXT_TOP_K", 30),