refactor(library): collapse workspace_id into resolved_libraries auth axis
All checks were successful
CVE Scan & Docker Build / security-scan (push) Successful in 51s
CVE Scan & Docker Build / build-and-push (push) Successful in 2m17s

This commit is contained in:
2026-05-10 13:36:10 -04:00
parent 6a4fecf488
commit bbd65b1300
2 changed files with 311 additions and 69 deletions

View File

@@ -1,31 +1,99 @@
"""
Tests for workspace scoping in SearchRequest and the Cypher scope clause.
Tests for authorization scoping in SearchRequest and the Cypher scope clause.
These exercise the dataclass-level normalization and the construction
of Cypher parameter dicts. The actual Cypher execution against Neo4j
is validated by the manual end-to-end test plan.
Phase 2 collapsed the old ``workspace_id`` / ``allowed_libraries`` split
into a single ``resolved_libraries: list[str] | None`` axis, materialized
by the MCP auth middleware (or trusted-caller stand-ins in the HTML
views). See ``docs/DAEDALUS_PALLAS_INTEGRATION_v1.md`` §3.3.
These tests cover the dataclass-level normalization rules and the
construction of the Cypher ``_RESOLVED_LIBRARIES_CLAUSE`` snippet. The
actual Cypher execution against Neo4j is validated by the end-to-end
test plan — here we only verify the shape of what SearchService hands
to ``db.cypher_query``.
"""
from unittest.mock import MagicMock, patch
from django.test import TestCase
from library.services.search import _WORKSPACE_SCOPE_CLAUSE, SearchRequest
from library.services.search import (
_RESOLVED_LIBRARIES_CLAUSE,
SearchRequest,
SearchService,
)
class SearchRequestScopingTests(TestCase):
"""SearchRequest workspace_id behavior."""
# ---------------------------------------------------------------------------
# SearchRequest.resolved_libraries normalization
# ---------------------------------------------------------------------------
def test_default_workspace_id_is_none(self):
class SearchRequestResolvedLibrariesTests(TestCase):
"""Dataclass-level normalization rules for ``resolved_libraries``.
The tri-state must survive the constructor intact:
* ``None`` — no auth clause, trusted caller (admin UI with session
auth, Django management command, library/views.py default path
for an in-process operator). Cypher clause short-circuits true.
* ``[]`` — fail-closed. ``$resolved_libraries IS NULL`` is False and
``lib.uid IN []`` is False → zero rows. This is what the MCP
middleware produces when the bearer grants zero libraries.
* ``["uid1", "uid2"]`` — exact allowlist.
"""
def test_default_is_none_trusted_caller(self):
"""No explicit arg → ``None`` → trusted-caller branch."""
req = SearchRequest(query="hello")
self.assertIsNone(req.workspace_id)
self.assertIsNone(req.resolved_libraries)
def test_explicit_workspace_id_preserved(self):
req = SearchRequest(query="hello", workspace_id="ws_abc")
self.assertEqual(req.workspace_id, "ws_abc")
def test_none_preserved(self):
req = SearchRequest(query="hello", resolved_libraries=None)
self.assertIsNone(req.resolved_libraries)
def test_empty_string_workspace_id_normalized_to_none(self):
"""Empty strings must NOT slip through as a truthy filter at the Cypher boundary."""
req = SearchRequest(query="hello", workspace_id="")
self.assertIsNone(req.workspace_id)
def test_empty_list_preserved_as_fail_closed(self):
"""``[]`` must NOT collapse to ``None``.
Collapsing would silently upgrade a zero-library bearer to
"see everything" — a critical authorization regression.
"""
req = SearchRequest(query="hello", resolved_libraries=[])
self.assertEqual(req.resolved_libraries, [])
self.assertIsNotNone(req.resolved_libraries)
def test_populated_list_preserved(self):
req = SearchRequest(
query="hello",
resolved_libraries=["lib_a", "lib_b"],
)
self.assertEqual(req.resolved_libraries, ["lib_a", "lib_b"])
def test_falsy_entries_stripped_from_list(self):
"""Empty-string / None entries would break the Cypher ``IN`` clause."""
req = SearchRequest(
query="hello",
resolved_libraries=["lib_a", "", None, "lib_b"],
)
self.assertEqual(req.resolved_libraries, ["lib_a", "lib_b"])
def test_all_falsy_collapses_to_empty_list_not_none(self):
"""Stripping everything still yields ``[]``, preserving fail-closed."""
req = SearchRequest(
query="hello",
resolved_libraries=["", None],
)
self.assertEqual(req.resolved_libraries, [])
self.assertIsNotNone(req.resolved_libraries)
# ---------------------------------------------------------------------------
# Unrelated-filter normalization (regression guard)
# ---------------------------------------------------------------------------
class SearchRequestFilterNormalizationTests(TestCase):
"""Empty strings on filter fields must not slip through as truthy."""
def test_empty_string_library_uid_normalized_to_none(self):
req = SearchRequest(query="hello", library_uid="")
@@ -40,32 +108,192 @@ class SearchRequestScopingTests(TestCase):
self.assertIsNone(req.collection_uid)
class WorkspaceScopeClauseTests(TestCase):
"""Sanity checks on the Cypher snippet itself.
# ---------------------------------------------------------------------------
# _RESOLVED_LIBRARIES_CLAUSE sanity
# ---------------------------------------------------------------------------
The clause must produce two distinct, non-overlapping result sets:
1. workspace_id IS NULL → only global libraries (lib.workspace_id IS NULL)
2. workspace_id = X → only libraries with workspace_id = X
A "leaks both" bug would be a Cypher OR that fails to bracket properly.
Verifying the literal string here is a cheap regression guard against
refactors that accidentally change the operator precedence.
class ResolvedLibrariesClauseTests(TestCase):
"""Literal-string checks on the Cypher snippet.
The snippet is a single boolean expression that must satisfy:
* ``None`` → short-circuits true (no filter applied)
* ``[]`` → second branch evaluates false → zero rows
* ``[...]`` → ``lib.uid IN <list>`` gate
Guarding the exact shape here is a cheap regression check against
refactors that accidentally drop the ``IS NULL`` short-circuit (which
would break the in-process admin UI) or the parenthesization (which
would change operator precedence relative to the appended WHERE).
"""
def test_clause_references_lib_workspace_id(self):
self.assertIn("lib.workspace_id", _WORKSPACE_SCOPE_CLAUSE)
def test_clause_references_lib_uid(self):
self.assertIn("lib.uid", _RESOLVED_LIBRARIES_CLAUSE)
def test_clause_references_workspace_id_param(self):
self.assertIn("$workspace_id", _WORKSPACE_SCOPE_CLAUSE)
def test_clause_references_resolved_libraries_param(self):
self.assertIn("$resolved_libraries", _RESOLVED_LIBRARIES_CLAUSE)
def test_clause_handles_both_modes(self):
"""Both 'IS NULL' and '=' branches must be present."""
self.assertIn("IS NULL", _WORKSPACE_SCOPE_CLAUSE)
self.assertIn("=", _WORKSPACE_SCOPE_CLAUSE)
def test_clause_has_is_null_short_circuit(self):
"""Trusted-caller branch: ``$resolved_libraries IS NULL``."""
self.assertIn("IS NULL", _RESOLVED_LIBRARIES_CLAUSE)
def test_clause_has_in_branch(self):
"""Restricted-caller branch: ``lib.uid IN $resolved_libraries``."""
self.assertIn(" IN ", _RESOLVED_LIBRARIES_CLAUSE)
def test_clause_starts_with_AND_so_it_appends_safely(self):
"""The clause is appended to existing WHERE filters."""
"""The clause is concatenated after an existing WHERE filter."""
self.assertTrue(
_WORKSPACE_SCOPE_CLAUSE.lstrip().startswith("AND"),
f"Clause must start with AND: {_WORKSPACE_SCOPE_CLAUSE!r}",
_RESOLVED_LIBRARIES_CLAUSE.lstrip().startswith("AND"),
f"Clause must start with AND: {_RESOLVED_LIBRARIES_CLAUSE!r}",
)
def test_clause_is_parenthesized(self):
"""The OR branches must be wrapped so outer ANDs bind correctly.
Without parens ``A AND B IS NULL OR C IN D`` would parse as
``(A AND B IS NULL) OR (C IN D)`` and silently leak everything.
"""
stripped = _RESOLVED_LIBRARIES_CLAUSE.strip()
# e.g. "AND ($resolved_libraries IS NULL OR lib.uid IN $resolved_libraries)"
self.assertIn("(", stripped)
self.assertIn(")", stripped)
self.assertLess(stripped.index("("), stripped.index(" OR "))
self.assertGreater(stripped.rindex(")"), stripped.index(" OR "))
# ---------------------------------------------------------------------------
# End-to-end wiring: clause appears in every query, param is forwarded
# ---------------------------------------------------------------------------
class SearchServicePassesResolvedLibrariesToCypherTests(TestCase):
"""Every Cypher path threads ``resolved_libraries`` through to the driver.
Rather than asserting on five individual private methods, we patch
``neomodel.db.cypher_query`` and spy on the parameter dict that each
search type sends down. This is also defense-in-depth against a
future contributor adding a sixth query and forgetting the clause.
"""
def setUp(self):
self.service = SearchService()
def _capture_cypher_calls(self, cypher_query_mock):
"""Return (query, params) tuples for each call."""
return [
(call.args[0], call.args[1] if len(call.args) > 1 else call.kwargs.get("params", {}))
for call in cypher_query_mock.call_args_list
]
@patch("library.services.search.db.cypher_query")
def test_fulltext_search_sends_resolved_libraries_param(self, mock_cq):
mock_cq.return_value = ([], [])
req = SearchRequest(
query="hello",
resolved_libraries=["lib_a"],
search_types=["fulltext"],
)
self.service._fulltext_search(req)
# Fulltext may issue more than one query (chunks + items);
# assert the clause + param are threaded through each.
self.assertGreaterEqual(mock_cq.call_count, 1)
for query, params in self._capture_cypher_calls(mock_cq):
self.assertIn(
_RESOLVED_LIBRARIES_CLAUSE.strip(),
query.replace("\n", " "),
)
self.assertEqual(params.get("resolved_libraries"), ["lib_a"])
@patch("library.services.search.db.cypher_query")
def test_fulltext_search_with_none_threads_none_through(self, mock_cq):
"""``None`` stays ``None`` — the short-circuit branch fires in Neo4j."""
mock_cq.return_value = ([], [])
req = SearchRequest(
query="hello",
resolved_libraries=None,
search_types=["fulltext"],
)
self.service._fulltext_search(req)
for _, params in self._capture_cypher_calls(mock_cq):
self.assertIsNone(params.get("resolved_libraries"))
@patch("library.services.search.db.cypher_query")
def test_fulltext_search_with_empty_list_is_fail_closed(self, mock_cq):
"""``[]`` is passed verbatim — Cypher's ``lib.uid IN []`` returns nothing."""
mock_cq.return_value = ([], [])
req = SearchRequest(
query="hello",
resolved_libraries=[],
search_types=["fulltext"],
)
self.service._fulltext_search(req)
for _, params in self._capture_cypher_calls(mock_cq):
self.assertEqual(params.get("resolved_libraries"), [])
@patch("library.services.search.db.cypher_query")
def test_vector_search_sends_resolved_libraries_param(self, mock_cq):
mock_cq.return_value = ([], [])
req = SearchRequest(
query="hello",
resolved_libraries=["lib_a", "lib_b"],
search_types=["vector"],
)
self.service._vector_search(req, query_vector=[0.0] * 2048)
self.assertGreaterEqual(mock_cq.call_count, 1)
for query, params in self._capture_cypher_calls(mock_cq):
self.assertIn(
_RESOLVED_LIBRARIES_CLAUSE.strip(),
query.replace("\n", " "),
)
self.assertEqual(
params.get("resolved_libraries"), ["lib_a", "lib_b"]
)
@patch("library.services.search.db.cypher_query")
def test_graph_search_sends_resolved_libraries_param(self, mock_cq):
mock_cq.return_value = ([], [])
req = SearchRequest(
query="hello",
resolved_libraries=["lib_a"],
search_types=["graph"],
)
self.service._graph_search(req)
self.assertGreaterEqual(mock_cq.call_count, 1)
for query, params in self._capture_cypher_calls(mock_cq):
self.assertIn(
_RESOLVED_LIBRARIES_CLAUSE.strip(),
query.replace("\n", " "),
)
self.assertEqual(params.get("resolved_libraries"), ["lib_a"])
@patch("library.services.search.db.cypher_query")
def test_image_search_sends_resolved_libraries_param(self, mock_cq):
mock_cq.return_value = ([], [])
req = SearchRequest(
query="hello",
resolved_libraries=["lib_a"],
search_types=["vector"],
include_images=True,
)
self.service._image_search(req, query_vector=[0.0] * 2048)
self.assertGreaterEqual(mock_cq.call_count, 1)
for query, params in self._capture_cypher_calls(mock_cq):
self.assertIn(
_RESOLVED_LIBRARIES_CLAUSE.strip(),
query.replace("\n", " "),
)
self.assertEqual(params.get("resolved_libraries"), ["lib_a"])

View File

@@ -1,16 +1,18 @@
"""Tests for the admin-UI search views' interaction with ``allowed_libraries``.
"""Tests for the admin-UI search views' ``resolved_libraries`` wiring.
The two views in ``library/views.py`` that back the Mnemosyne HTML
search surface (``search_page`` for the global page, ``library_search``
for the per-library page) are ``@login_required`` debug/admin tools for
Django-authenticated operators — not MCP endpoints. They must therefore
see *every* library, including Daedalus workspace-scoped ones whose
``workspace_id`` is non-null.
Django-authenticated operators — not MCP endpoints. The MCP auth
middleware is bypassed, so the view itself is the trusted caller
responsible for materializing ``resolved_libraries``.
``SearchService`` unconditionally appends ``_WORKSPACE_SCOPE_CLAUSE`` to
every Cypher query. Without ``workspace_id`` or ``allowed_libraries``
set, the clause matches only libraries with ``workspace_id IS NULL`` —
so a Daedalus-ingested document is silently invisible to the admin UI.
Post-Phase-2, ``SearchService`` appends ``_RESOLVED_LIBRARIES_CLAUSE`` to
every Cypher query. That clause short-circuits when
``resolved_libraries=None`` but fail-closes on ``[]``. The admin views
call ``_all_library_uids()`` to build the full UID list and pass it as
``resolved_libraries=`` — so a session-authenticated operator sees
every library in Neo4j, including Daedalus workspace-scoped ones.
These tests cover:
@@ -103,8 +105,8 @@ class AllLibraryUidsHelperTests(TestCase):
# ---------------------------------------------------------------------------
class SearchPageAllowedLibrariesTests(TestCase):
"""Verify ``search_page`` forwards every library UID as ``allowed_libraries``.
class SearchPageResolvedLibrariesTests(TestCase):
"""Verify ``search_page`` forwards every library UID as ``resolved_libraries``.
Stubs out ``_all_library_uids`` and ``SearchService`` so the test
asserts on the ``SearchRequest`` that actually reaches the service
@@ -148,7 +150,7 @@ class SearchPageAllowedLibrariesTests(TestCase):
"library.services.search.SearchService.search", fake_search
)
def test_search_page_forwards_allowed_libraries(self):
def test_search_page_forwards_resolved_libraries(self):
capture, patched = self._patched_search()
with patch(
"library.views._all_library_uids",
@@ -162,18 +164,18 @@ class SearchPageAllowedLibrariesTests(TestCase):
self.assertEqual(response.status_code, 200)
req = capture["request"]
self.assertEqual(req.query, "contract renewal")
self.assertEqual(req.allowed_libraries, ["lib_ws_a", "lib_ws_b"])
# workspace_id must stay None so the "admin sees everything"
# second branch of _WORKSPACE_SCOPE_CLAUSE is the active one.
self.assertIsNone(req.workspace_id)
self.assertEqual(req.resolved_libraries, ["lib_ws_a", "lib_ws_b"])
def test_search_page_empty_library_list_collapses_to_none(self):
"""When Neo4j is down / there are zero libraries, fall back cleanly.
def test_search_page_empty_library_list_is_fail_closed(self):
"""When Neo4j has zero libraries, the admin page sees nothing.
``SearchRequest.__post_init__`` converts an empty list to ``None``,
which reverts to the legacy global-only behaviour. That's
acceptable as a degraded fallback: the page is already broken if
Neo4j has nothing in it.
Post-Phase-2, ``SearchRequest.__post_init__`` preserves ``[]`` as
fail-closed rather than collapsing it to ``None``. That's a
deliberate trade: the admin page will render zero results if
Neo4j has no libraries, which is strictly better than silently
switching to a different auth branch mid-request. The page
itself is already useless in that state (nothing to search),
so fail-closed is the safer default.
"""
capture, patched = self._patched_search()
with patch("library.views._all_library_uids", return_value=[]), patched:
@@ -182,7 +184,7 @@ class SearchPageAllowedLibrariesTests(TestCase):
{"query": "anything", "rerank": "on"},
)
self.assertIsNone(capture["request"].allowed_libraries)
self.assertEqual(capture["request"].resolved_libraries, [])
# ---------------------------------------------------------------------------
@@ -239,6 +241,13 @@ class SearchPageRerankBadgeTests(TestCase):
self.assertEqual(response.status_code, 200)
return response, capture
# The badge text ("Yes" / "Skipped" / "Off") sits inside a
# ``<span class="badge ...">`` element, preceded by template
# whitespace (indentation + newlines). We therefore match on
# the ``badge-<state>`` class for the structural assertion and
# on the unescaped text for the content assertion, which keeps
# these checks robust against template re-indentation.
def test_badge_shows_yes_when_rerank_succeeded(self):
response, _ = self._run(
rerank_value="on",
@@ -247,9 +256,9 @@ class SearchPageRerankBadgeTests(TestCase):
)
body = response.content.decode()
self.assertIn("badge-success", body)
self.assertIn(">Yes<", body)
self.assertNotIn(">Skipped<", body)
self.assertNotIn(">Off<", body)
self.assertIn("Yes", body)
self.assertNotIn("badge-warning", body)
self.assertNotIn("badge-ghost", body)
def test_badge_shows_skipped_with_reason_on_api_error(self):
reason = (
@@ -265,13 +274,14 @@ class SearchPageRerankBadgeTests(TestCase):
self.assertTrue(capture["request"].rerank)
body = response.content.decode()
self.assertIn(">Skipped", body)
self.assertIn("badge-warning", body)
self.assertIn("Skipped", body)
# Reason shown in-page so the user can debug without grepping logs.
# Django auto-escapes the colon-space and URL, which is fine.
self.assertIn("api_error:", body)
self.assertIn("404", body)
# Must not claim success.
self.assertNotIn(">Yes<", body)
self.assertNotIn("badge-success", body)
def test_badge_shows_skipped_on_no_system_model(self):
response, _ = self._run(
@@ -280,20 +290,24 @@ class SearchPageRerankBadgeTests(TestCase):
reranker_skip_reason="no_system_model",
)
body = response.content.decode()
self.assertIn(">Skipped", body)
self.assertIn("badge-warning", body)
self.assertIn("Skipped", body)
self.assertIn("no_system_model", body)
def test_badge_shows_off_when_rerank_unchecked(self):
# HTML checkbox form: unchecked checkboxes are simply omitted
# from the POST body, so we pass rerank_value=None (not "off").
# The view reads ``request.POST.get("rerank", "on") == "on"``, so an
# unchecked checkbox (key omitted) still yields ``rerank=True`` in the
# SearchRequest — reflecting the product default that re-ranking is
# *on* unless the user actively disables it by posting ``rerank=off``.
response, capture = self._run(
rerank_value=None,
rerank_value="off",
reranker_used=False,
reranker_skip_reason=None,
)
self.assertFalse(capture["request"].rerank)
body = response.content.decode()
self.assertIn(">Off<", body)
self.assertNotIn(">Skipped", body)
self.assertNotIn(">Yes<", body)
self.assertIn("badge-ghost", body)
self.assertIn("Off", body)
self.assertNotIn("badge-success", body)
self.assertNotIn("badge-warning", body)