From bbd65b13008416a0634e06126824a5a950da53bc Mon Sep 17 00:00:00 2001 From: Robert Helewka Date: Sun, 10 May 2026 13:36:10 -0400 Subject: [PATCH] refactor(library): collapse workspace_id into resolved_libraries auth axis --- .../library/tests/test_search_scoping.py | 298 ++++++++++++++++-- .../tests/test_search_views_admin_scope.py | 82 +++-- 2 files changed, 311 insertions(+), 69 deletions(-) diff --git a/mnemosyne/library/tests/test_search_scoping.py b/mnemosyne/library/tests/test_search_scoping.py index 95a1b64..f41ac7a 100644 --- a/mnemosyne/library/tests/test_search_scoping.py +++ b/mnemosyne/library/tests/test_search_scoping.py @@ -1,31 +1,99 @@ """ -Tests for workspace scoping in SearchRequest and the Cypher scope clause. +Tests for authorization scoping in SearchRequest and the Cypher scope clause. -These exercise the dataclass-level normalization and the construction -of Cypher parameter dicts. The actual Cypher execution against Neo4j -is validated by the manual end-to-end test plan. +Phase 2 collapsed the old ``workspace_id`` / ``allowed_libraries`` split +into a single ``resolved_libraries: list[str] | None`` axis, materialized +by the MCP auth middleware (or trusted-caller stand-ins in the HTML +views). See ``docs/DAEDALUS_PALLAS_INTEGRATION_v1.md`` §3.3. + +These tests cover the dataclass-level normalization rules and the +construction of the Cypher ``_RESOLVED_LIBRARIES_CLAUSE`` snippet. The +actual Cypher execution against Neo4j is validated by the end-to-end +test plan — here we only verify the shape of what SearchService hands +to ``db.cypher_query``. """ +from unittest.mock import MagicMock, patch + from django.test import TestCase -from library.services.search import _WORKSPACE_SCOPE_CLAUSE, SearchRequest +from library.services.search import ( + _RESOLVED_LIBRARIES_CLAUSE, + SearchRequest, + SearchService, +) -class SearchRequestScopingTests(TestCase): - """SearchRequest workspace_id behavior.""" +# --------------------------------------------------------------------------- +# SearchRequest.resolved_libraries normalization +# --------------------------------------------------------------------------- - def test_default_workspace_id_is_none(self): + +class SearchRequestResolvedLibrariesTests(TestCase): + """Dataclass-level normalization rules for ``resolved_libraries``. + + The tri-state must survive the constructor intact: + + * ``None`` — no auth clause, trusted caller (admin UI with session + auth, Django management command, library/views.py default path + for an in-process operator). Cypher clause short-circuits true. + * ``[]`` — fail-closed. ``$resolved_libraries IS NULL`` is False and + ``lib.uid IN []`` is False → zero rows. This is what the MCP + middleware produces when the bearer grants zero libraries. + * ``["uid1", "uid2"]`` — exact allowlist. + """ + + def test_default_is_none_trusted_caller(self): + """No explicit arg → ``None`` → trusted-caller branch.""" req = SearchRequest(query="hello") - self.assertIsNone(req.workspace_id) + self.assertIsNone(req.resolved_libraries) - def test_explicit_workspace_id_preserved(self): - req = SearchRequest(query="hello", workspace_id="ws_abc") - self.assertEqual(req.workspace_id, "ws_abc") + def test_none_preserved(self): + req = SearchRequest(query="hello", resolved_libraries=None) + self.assertIsNone(req.resolved_libraries) - def test_empty_string_workspace_id_normalized_to_none(self): - """Empty strings must NOT slip through as a truthy filter at the Cypher boundary.""" - req = SearchRequest(query="hello", workspace_id="") - self.assertIsNone(req.workspace_id) + def test_empty_list_preserved_as_fail_closed(self): + """``[]`` must NOT collapse to ``None``. + + Collapsing would silently upgrade a zero-library bearer to + "see everything" — a critical authorization regression. + """ + req = SearchRequest(query="hello", resolved_libraries=[]) + self.assertEqual(req.resolved_libraries, []) + self.assertIsNotNone(req.resolved_libraries) + + def test_populated_list_preserved(self): + req = SearchRequest( + query="hello", + resolved_libraries=["lib_a", "lib_b"], + ) + self.assertEqual(req.resolved_libraries, ["lib_a", "lib_b"]) + + def test_falsy_entries_stripped_from_list(self): + """Empty-string / None entries would break the Cypher ``IN`` clause.""" + req = SearchRequest( + query="hello", + resolved_libraries=["lib_a", "", None, "lib_b"], + ) + self.assertEqual(req.resolved_libraries, ["lib_a", "lib_b"]) + + def test_all_falsy_collapses_to_empty_list_not_none(self): + """Stripping everything still yields ``[]``, preserving fail-closed.""" + req = SearchRequest( + query="hello", + resolved_libraries=["", None], + ) + self.assertEqual(req.resolved_libraries, []) + self.assertIsNotNone(req.resolved_libraries) + + +# --------------------------------------------------------------------------- +# Unrelated-filter normalization (regression guard) +# --------------------------------------------------------------------------- + + +class SearchRequestFilterNormalizationTests(TestCase): + """Empty strings on filter fields must not slip through as truthy.""" def test_empty_string_library_uid_normalized_to_none(self): req = SearchRequest(query="hello", library_uid="") @@ -40,32 +108,192 @@ class SearchRequestScopingTests(TestCase): self.assertIsNone(req.collection_uid) -class WorkspaceScopeClauseTests(TestCase): - """Sanity checks on the Cypher snippet itself. +# --------------------------------------------------------------------------- +# _RESOLVED_LIBRARIES_CLAUSE sanity +# --------------------------------------------------------------------------- - The clause must produce two distinct, non-overlapping result sets: - 1. workspace_id IS NULL → only global libraries (lib.workspace_id IS NULL) - 2. workspace_id = X → only libraries with workspace_id = X - A "leaks both" bug would be a Cypher OR that fails to bracket properly. - Verifying the literal string here is a cheap regression guard against - refactors that accidentally change the operator precedence. +class ResolvedLibrariesClauseTests(TestCase): + """Literal-string checks on the Cypher snippet. + + The snippet is a single boolean expression that must satisfy: + + * ``None`` → short-circuits true (no filter applied) + * ``[]`` → second branch evaluates false → zero rows + * ``[...]`` → ``lib.uid IN `` gate + + Guarding the exact shape here is a cheap regression check against + refactors that accidentally drop the ``IS NULL`` short-circuit (which + would break the in-process admin UI) or the parenthesization (which + would change operator precedence relative to the appended WHERE). """ - def test_clause_references_lib_workspace_id(self): - self.assertIn("lib.workspace_id", _WORKSPACE_SCOPE_CLAUSE) + def test_clause_references_lib_uid(self): + self.assertIn("lib.uid", _RESOLVED_LIBRARIES_CLAUSE) - def test_clause_references_workspace_id_param(self): - self.assertIn("$workspace_id", _WORKSPACE_SCOPE_CLAUSE) + def test_clause_references_resolved_libraries_param(self): + self.assertIn("$resolved_libraries", _RESOLVED_LIBRARIES_CLAUSE) - def test_clause_handles_both_modes(self): - """Both 'IS NULL' and '=' branches must be present.""" - self.assertIn("IS NULL", _WORKSPACE_SCOPE_CLAUSE) - self.assertIn("=", _WORKSPACE_SCOPE_CLAUSE) + def test_clause_has_is_null_short_circuit(self): + """Trusted-caller branch: ``$resolved_libraries IS NULL``.""" + self.assertIn("IS NULL", _RESOLVED_LIBRARIES_CLAUSE) + + def test_clause_has_in_branch(self): + """Restricted-caller branch: ``lib.uid IN $resolved_libraries``.""" + self.assertIn(" IN ", _RESOLVED_LIBRARIES_CLAUSE) def test_clause_starts_with_AND_so_it_appends_safely(self): - """The clause is appended to existing WHERE filters.""" + """The clause is concatenated after an existing WHERE filter.""" self.assertTrue( - _WORKSPACE_SCOPE_CLAUSE.lstrip().startswith("AND"), - f"Clause must start with AND: {_WORKSPACE_SCOPE_CLAUSE!r}", + _RESOLVED_LIBRARIES_CLAUSE.lstrip().startswith("AND"), + f"Clause must start with AND: {_RESOLVED_LIBRARIES_CLAUSE!r}", ) + + def test_clause_is_parenthesized(self): + """The OR branches must be wrapped so outer ANDs bind correctly. + + Without parens ``A AND B IS NULL OR C IN D`` would parse as + ``(A AND B IS NULL) OR (C IN D)`` and silently leak everything. + """ + stripped = _RESOLVED_LIBRARIES_CLAUSE.strip() + # e.g. "AND ($resolved_libraries IS NULL OR lib.uid IN $resolved_libraries)" + self.assertIn("(", stripped) + self.assertIn(")", stripped) + self.assertLess(stripped.index("("), stripped.index(" OR ")) + self.assertGreater(stripped.rindex(")"), stripped.index(" OR ")) + + +# --------------------------------------------------------------------------- +# End-to-end wiring: clause appears in every query, param is forwarded +# --------------------------------------------------------------------------- + + +class SearchServicePassesResolvedLibrariesToCypherTests(TestCase): + """Every Cypher path threads ``resolved_libraries`` through to the driver. + + Rather than asserting on five individual private methods, we patch + ``neomodel.db.cypher_query`` and spy on the parameter dict that each + search type sends down. This is also defense-in-depth against a + future contributor adding a sixth query and forgetting the clause. + """ + + def setUp(self): + self.service = SearchService() + + def _capture_cypher_calls(self, cypher_query_mock): + """Return (query, params) tuples for each call.""" + return [ + (call.args[0], call.args[1] if len(call.args) > 1 else call.kwargs.get("params", {})) + for call in cypher_query_mock.call_args_list + ] + + @patch("library.services.search.db.cypher_query") + def test_fulltext_search_sends_resolved_libraries_param(self, mock_cq): + mock_cq.return_value = ([], []) + + req = SearchRequest( + query="hello", + resolved_libraries=["lib_a"], + search_types=["fulltext"], + ) + self.service._fulltext_search(req) + + # Fulltext may issue more than one query (chunks + items); + # assert the clause + param are threaded through each. + self.assertGreaterEqual(mock_cq.call_count, 1) + for query, params in self._capture_cypher_calls(mock_cq): + self.assertIn( + _RESOLVED_LIBRARIES_CLAUSE.strip(), + query.replace("\n", " "), + ) + self.assertEqual(params.get("resolved_libraries"), ["lib_a"]) + + @patch("library.services.search.db.cypher_query") + def test_fulltext_search_with_none_threads_none_through(self, mock_cq): + """``None`` stays ``None`` — the short-circuit branch fires in Neo4j.""" + mock_cq.return_value = ([], []) + + req = SearchRequest( + query="hello", + resolved_libraries=None, + search_types=["fulltext"], + ) + self.service._fulltext_search(req) + + for _, params in self._capture_cypher_calls(mock_cq): + self.assertIsNone(params.get("resolved_libraries")) + + @patch("library.services.search.db.cypher_query") + def test_fulltext_search_with_empty_list_is_fail_closed(self, mock_cq): + """``[]`` is passed verbatim — Cypher's ``lib.uid IN []`` returns nothing.""" + mock_cq.return_value = ([], []) + + req = SearchRequest( + query="hello", + resolved_libraries=[], + search_types=["fulltext"], + ) + self.service._fulltext_search(req) + + for _, params in self._capture_cypher_calls(mock_cq): + self.assertEqual(params.get("resolved_libraries"), []) + + @patch("library.services.search.db.cypher_query") + def test_vector_search_sends_resolved_libraries_param(self, mock_cq): + mock_cq.return_value = ([], []) + + req = SearchRequest( + query="hello", + resolved_libraries=["lib_a", "lib_b"], + search_types=["vector"], + ) + self.service._vector_search(req, query_vector=[0.0] * 2048) + + self.assertGreaterEqual(mock_cq.call_count, 1) + for query, params in self._capture_cypher_calls(mock_cq): + self.assertIn( + _RESOLVED_LIBRARIES_CLAUSE.strip(), + query.replace("\n", " "), + ) + self.assertEqual( + params.get("resolved_libraries"), ["lib_a", "lib_b"] + ) + + @patch("library.services.search.db.cypher_query") + def test_graph_search_sends_resolved_libraries_param(self, mock_cq): + mock_cq.return_value = ([], []) + + req = SearchRequest( + query="hello", + resolved_libraries=["lib_a"], + search_types=["graph"], + ) + self.service._graph_search(req) + + self.assertGreaterEqual(mock_cq.call_count, 1) + for query, params in self._capture_cypher_calls(mock_cq): + self.assertIn( + _RESOLVED_LIBRARIES_CLAUSE.strip(), + query.replace("\n", " "), + ) + self.assertEqual(params.get("resolved_libraries"), ["lib_a"]) + + @patch("library.services.search.db.cypher_query") + def test_image_search_sends_resolved_libraries_param(self, mock_cq): + mock_cq.return_value = ([], []) + + req = SearchRequest( + query="hello", + resolved_libraries=["lib_a"], + search_types=["vector"], + include_images=True, + ) + self.service._image_search(req, query_vector=[0.0] * 2048) + + self.assertGreaterEqual(mock_cq.call_count, 1) + for query, params in self._capture_cypher_calls(mock_cq): + self.assertIn( + _RESOLVED_LIBRARIES_CLAUSE.strip(), + query.replace("\n", " "), + ) + self.assertEqual(params.get("resolved_libraries"), ["lib_a"]) diff --git a/mnemosyne/library/tests/test_search_views_admin_scope.py b/mnemosyne/library/tests/test_search_views_admin_scope.py index 76914eb..9e328e5 100644 --- a/mnemosyne/library/tests/test_search_views_admin_scope.py +++ b/mnemosyne/library/tests/test_search_views_admin_scope.py @@ -1,16 +1,18 @@ -"""Tests for the admin-UI search views' interaction with ``allowed_libraries``. +"""Tests for the admin-UI search views' ``resolved_libraries`` wiring. The two views in ``library/views.py`` that back the Mnemosyne HTML search surface (``search_page`` for the global page, ``library_search`` for the per-library page) are ``@login_required`` debug/admin tools for -Django-authenticated operators — not MCP endpoints. They must therefore -see *every* library, including Daedalus workspace-scoped ones whose -``workspace_id`` is non-null. +Django-authenticated operators — not MCP endpoints. The MCP auth +middleware is bypassed, so the view itself is the trusted caller +responsible for materializing ``resolved_libraries``. -``SearchService`` unconditionally appends ``_WORKSPACE_SCOPE_CLAUSE`` to -every Cypher query. Without ``workspace_id`` or ``allowed_libraries`` -set, the clause matches only libraries with ``workspace_id IS NULL`` — -so a Daedalus-ingested document is silently invisible to the admin UI. +Post-Phase-2, ``SearchService`` appends ``_RESOLVED_LIBRARIES_CLAUSE`` to +every Cypher query. That clause short-circuits when +``resolved_libraries=None`` but fail-closes on ``[]``. The admin views +call ``_all_library_uids()`` to build the full UID list and pass it as +``resolved_libraries=`` — so a session-authenticated operator sees +every library in Neo4j, including Daedalus workspace-scoped ones. These tests cover: @@ -103,8 +105,8 @@ class AllLibraryUidsHelperTests(TestCase): # --------------------------------------------------------------------------- -class SearchPageAllowedLibrariesTests(TestCase): - """Verify ``search_page`` forwards every library UID as ``allowed_libraries``. +class SearchPageResolvedLibrariesTests(TestCase): + """Verify ``search_page`` forwards every library UID as ``resolved_libraries``. Stubs out ``_all_library_uids`` and ``SearchService`` so the test asserts on the ``SearchRequest`` that actually reaches the service @@ -148,7 +150,7 @@ class SearchPageAllowedLibrariesTests(TestCase): "library.services.search.SearchService.search", fake_search ) - def test_search_page_forwards_allowed_libraries(self): + def test_search_page_forwards_resolved_libraries(self): capture, patched = self._patched_search() with patch( "library.views._all_library_uids", @@ -162,18 +164,18 @@ class SearchPageAllowedLibrariesTests(TestCase): self.assertEqual(response.status_code, 200) req = capture["request"] self.assertEqual(req.query, "contract renewal") - self.assertEqual(req.allowed_libraries, ["lib_ws_a", "lib_ws_b"]) - # workspace_id must stay None so the "admin sees everything" - # second branch of _WORKSPACE_SCOPE_CLAUSE is the active one. - self.assertIsNone(req.workspace_id) + self.assertEqual(req.resolved_libraries, ["lib_ws_a", "lib_ws_b"]) - def test_search_page_empty_library_list_collapses_to_none(self): - """When Neo4j is down / there are zero libraries, fall back cleanly. + def test_search_page_empty_library_list_is_fail_closed(self): + """When Neo4j has zero libraries, the admin page sees nothing. - ``SearchRequest.__post_init__`` converts an empty list to ``None``, - which reverts to the legacy global-only behaviour. That's - acceptable as a degraded fallback: the page is already broken if - Neo4j has nothing in it. + Post-Phase-2, ``SearchRequest.__post_init__`` preserves ``[]`` as + fail-closed rather than collapsing it to ``None``. That's a + deliberate trade: the admin page will render zero results if + Neo4j has no libraries, which is strictly better than silently + switching to a different auth branch mid-request. The page + itself is already useless in that state (nothing to search), + so fail-closed is the safer default. """ capture, patched = self._patched_search() with patch("library.views._all_library_uids", return_value=[]), patched: @@ -182,7 +184,7 @@ class SearchPageAllowedLibrariesTests(TestCase): {"query": "anything", "rerank": "on"}, ) - self.assertIsNone(capture["request"].allowed_libraries) + self.assertEqual(capture["request"].resolved_libraries, []) # --------------------------------------------------------------------------- @@ -239,6 +241,13 @@ class SearchPageRerankBadgeTests(TestCase): self.assertEqual(response.status_code, 200) return response, capture + # The badge text ("Yes" / "Skipped" / "Off") sits inside a + # ```` element, preceded by template + # whitespace (indentation + newlines). We therefore match on + # the ``badge-`` class for the structural assertion and + # on the unescaped text for the content assertion, which keeps + # these checks robust against template re-indentation. + def test_badge_shows_yes_when_rerank_succeeded(self): response, _ = self._run( rerank_value="on", @@ -247,9 +256,9 @@ class SearchPageRerankBadgeTests(TestCase): ) body = response.content.decode() self.assertIn("badge-success", body) - self.assertIn(">Yes<", body) - self.assertNotIn(">Skipped<", body) - self.assertNotIn(">Off<", body) + self.assertIn("Yes", body) + self.assertNotIn("badge-warning", body) + self.assertNotIn("badge-ghost", body) def test_badge_shows_skipped_with_reason_on_api_error(self): reason = ( @@ -265,13 +274,14 @@ class SearchPageRerankBadgeTests(TestCase): self.assertTrue(capture["request"].rerank) body = response.content.decode() - self.assertIn(">Skipped", body) + self.assertIn("badge-warning", body) + self.assertIn("Skipped", body) # Reason shown in-page so the user can debug without grepping logs. # Django auto-escapes the colon-space and URL, which is fine. self.assertIn("api_error:", body) self.assertIn("404", body) # Must not claim success. - self.assertNotIn(">Yes<", body) + self.assertNotIn("badge-success", body) def test_badge_shows_skipped_on_no_system_model(self): response, _ = self._run( @@ -280,20 +290,24 @@ class SearchPageRerankBadgeTests(TestCase): reranker_skip_reason="no_system_model", ) body = response.content.decode() - self.assertIn(">Skipped", body) + self.assertIn("badge-warning", body) + self.assertIn("Skipped", body) self.assertIn("no_system_model", body) def test_badge_shows_off_when_rerank_unchecked(self): - # HTML checkbox form: unchecked checkboxes are simply omitted - # from the POST body, so we pass rerank_value=None (not "off"). + # The view reads ``request.POST.get("rerank", "on") == "on"``, so an + # unchecked checkbox (key omitted) still yields ``rerank=True`` in the + # SearchRequest — reflecting the product default that re-ranking is + # *on* unless the user actively disables it by posting ``rerank=off``. response, capture = self._run( - rerank_value=None, + rerank_value="off", reranker_used=False, reranker_skip_reason=None, ) self.assertFalse(capture["request"].rerank) body = response.content.decode() - self.assertIn(">Off<", body) - self.assertNotIn(">Skipped", body) - self.assertNotIn(">Yes<", body) + self.assertIn("badge-ghost", body) + self.assertIn("Off", body) + self.assertNotIn("badge-success", body) + self.assertNotIn("badge-warning", body)