Files
mnemosyne/mnemosyne/library/tests/test_search.py
Robert Helewka a945b382e6
All checks were successful
CVE Scan & Docker Build / security-scan (push) Successful in 50s
CVE Scan & Docker Build / build-and-push (push) Successful in 2m30s
feat: add init sidecar for migrations and setup on compose up
Introduces a one-shot `init` service in docker-compose that runs Postgres
migrations, Neo4j index setup, and library-type seeding on every `up`.
Long-running services (`app`, `mcp`, `worker`) now depend on its
successful completion via `service_completed_successfully`, blocking the
stack on configuration errors (missing embedding model, dimension
mismatch, unreachable DB) rather than serving silent zero-result
searches.

Also standardizes reranker test fixtures to use the `/v1` OpenAI-style
base URL convention used across other service clients.
2026-05-10 08:01:58 -04:00

365 lines
14 KiB
Python

"""
Tests for the SearchService.
Neo4j queries and embedding calls are mocked — no external services needed.
"""
from unittest.mock import MagicMock, patch
from django.test import TestCase, override_settings
from library.services.fusion import SearchCandidate
from library.services.search import SearchRequest, SearchResponse, SearchService
class SearchServiceInitTest(TestCase):
"""Tests for SearchService initialization."""
def test_creates_without_user(self):
"""Service can be created without a user."""
service = SearchService()
self.assertIsNone(service.user)
def test_creates_with_user(self):
"""Service stores user for usage tracking."""
user = MagicMock()
service = SearchService(user=user)
self.assertEqual(service.user, user)
class SearchServiceSearchTest(TestCase):
"""Tests for SearchService.search() orchestration."""
@patch("library.services.search.SearchService._image_search")
@patch("library.services.search.SearchService._rerank")
@patch("library.services.search.SearchService._graph_search")
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._vector_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_calls_all_types(
self, mock_embed, mock_vector, mock_fulltext, mock_graph,
mock_rerank, mock_image
):
"""Search dispatches to all enabled search types."""
mock_embed.return_value = [0.1] * 2048
mock_vector.return_value = [
SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="Test",
library_type="technical", text_preview="preview",
chunk_s3_key="s3/key", chunk_index=0, score=0.9,
source="vector",
)
]
mock_fulltext.return_value = []
mock_graph.return_value = []
mock_rerank.return_value = (None, None, "no_system_model")
mock_image.return_value = []
request = SearchRequest(
query="test query",
search_types=["vector", "fulltext", "graph"],
)
service = SearchService()
response = service.search(request)
mock_embed.assert_called_once()
mock_vector.assert_called_once()
mock_fulltext.assert_called_once()
mock_graph.assert_called_once()
self.assertIsInstance(response, SearchResponse)
self.assertEqual(response.query, "test query")
self.assertGreater(len(response.candidates), 0)
@patch("library.services.search.SearchService._embed_query")
def test_search_without_embedding_model(self, mock_embed):
"""Search continues without vector search if no embedding model."""
mock_embed.return_value = None
request = SearchRequest(
query="test",
search_types=["vector"],
rerank=False,
include_images=False,
)
service = SearchService()
response = service.search(request)
# No candidates since only vector was requested and no embedding
self.assertEqual(len(response.candidates), 0)
@patch("library.services.search.SearchService._rerank")
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_with_reranking(self, mock_embed, mock_fulltext, mock_rerank):
"""Search applies reranking when enabled."""
mock_embed.return_value = None
mock_fulltext.return_value = [
SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="Test",
library_type="technical", text_preview="preview",
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
source="fulltext",
)
]
reranked_candidate = SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="Test",
library_type="technical", text_preview="preview",
chunk_s3_key="s3/key", chunk_index=0, score=0.95,
source="fulltext",
)
mock_rerank.return_value = (
[reranked_candidate], "qwen3-vl-reranker-2b", None,
)
request = SearchRequest(
query="test",
search_types=["fulltext"],
rerank=True,
include_images=False,
)
service = SearchService()
response = service.search(request)
self.assertTrue(response.reranker_used)
self.assertEqual(response.reranker_model, "qwen3-vl-reranker-2b")
self.assertAlmostEqual(response.candidates[0].score, 0.95)
# Successful rerank → no skip reason surfaced to UI / API.
self.assertIsNone(response.reranker_skip_reason)
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_without_reranking(self, mock_embed, mock_fulltext):
"""Search skips reranking when disabled."""
mock_embed.return_value = None
mock_fulltext.return_value = [
SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="Test",
library_type="technical", text_preview="preview",
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
source="fulltext",
)
]
request = SearchRequest(
query="test",
search_types=["fulltext"],
rerank=False,
include_images=False,
)
service = SearchService()
response = service.search(request)
self.assertFalse(response.reranker_used)
self.assertIsNone(response.reranker_model)
# ``rerank=False`` means "not requested", not "skipped" — so no
# reason is reported. Template distinguishes this from the
# skip case by looking at the original request's rerank flag.
self.assertIsNone(response.reranker_skip_reason)
@patch("library.services.search.SearchService._rerank")
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_reports_skip_reason_no_system_model(
self, mock_embed, mock_fulltext, mock_rerank
):
"""Rerank requested but no system model → ``no_system_model`` surfaced."""
mock_embed.return_value = None
mock_fulltext.return_value = [
SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="Test",
library_type="technical", text_preview="preview",
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
source="fulltext",
)
]
mock_rerank.return_value = (None, None, "no_system_model")
request = SearchRequest(
query="test",
search_types=["fulltext"],
rerank=True,
include_images=False,
)
service = SearchService()
response = service.search(request)
self.assertFalse(response.reranker_used)
self.assertIsNone(response.reranker_model)
self.assertEqual(response.reranker_skip_reason, "no_system_model")
@patch("library.services.search.SearchService._rerank")
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_reports_skip_reason_api_error(
self, mock_embed, mock_fulltext, mock_rerank
):
"""Rerank API raising → ``api_error: ...`` surfaced in response."""
mock_embed.return_value = None
mock_fulltext.return_value = [
SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="Test",
library_type="technical", text_preview="preview",
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
source="fulltext",
)
]
mock_rerank.return_value = (
None, None,
"api_error: 404 Client Error: Not Found for url: "
"http://pan.helu.ca:8400/v1/v1/rerank",
)
request = SearchRequest(
query="test",
search_types=["fulltext"],
rerank=True,
include_images=False,
)
service = SearchService()
response = service.search(request)
self.assertFalse(response.reranker_used)
self.assertIsNotNone(response.reranker_skip_reason)
self.assertTrue(
response.reranker_skip_reason.startswith("api_error:"),
f"expected api_error: prefix, got {response.reranker_skip_reason!r}",
)
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_reports_skip_reason_no_candidates(
self, mock_embed, mock_fulltext
):
"""Rerank requested but fusion produced nothing → ``no_candidates``."""
mock_embed.return_value = None
mock_fulltext.return_value = []
request = SearchRequest(
query="test",
search_types=["fulltext"],
rerank=True,
include_images=False,
)
service = SearchService()
response = service.search(request)
self.assertFalse(response.reranker_used)
self.assertEqual(response.reranker_skip_reason, "no_candidates")
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_respects_limit(self, mock_embed, mock_fulltext):
"""Search trims results to requested limit."""
mock_embed.return_value = None
mock_fulltext.return_value = [
SearchCandidate(
chunk_uid=f"c{i}", item_uid=f"i{i}", item_title=f"Title {i}",
library_type="technical", text_preview=f"preview {i}",
chunk_s3_key=f"s3/{i}", chunk_index=i, score=0.5 - i * 0.01,
source="fulltext",
)
for i in range(20)
]
request = SearchRequest(
query="test",
search_types=["fulltext"],
limit=5,
rerank=False,
include_images=False,
)
service = SearchService()
response = service.search(request)
self.assertLessEqual(len(response.candidates), 5)
@patch("library.services.search.SearchService._embed_query")
def test_search_tracks_types_used(self, mock_embed):
"""Response lists which search types actually ran."""
mock_embed.return_value = None
request = SearchRequest(
query="test",
search_types=["fulltext"],
rerank=False,
include_images=False,
)
service = SearchService()
# Mock fulltext to return empty — type not added to used list
with patch.object(service, "_fulltext_search", return_value=[]):
response = service.search(request)
# Fulltext was called but returned empty, so not in used types
self.assertEqual(response.search_types_used, [])
class SearchServiceHelperTest(TestCase):
"""Tests for SearchService helper methods."""
def test_get_type_embedding_instruction(self):
"""Returns embedding instruction for known library type."""
service = SearchService()
instruction = service._get_type_embedding_instruction("technical")
self.assertIn("technical", instruction.lower())
def test_get_type_embedding_instruction_unknown(self):
"""Returns empty string for unknown library type."""
service = SearchService()
instruction = service._get_type_embedding_instruction("nonexistent")
self.assertEqual(instruction, "")
def test_get_reranker_instruction_from_type(self):
"""Resolves reranker instruction from library_type in request."""
service = SearchService()
request = SearchRequest(query="test", library_type="fiction")
instruction = service._get_reranker_instruction(request, [])
self.assertIn("fiction", instruction.lower())
def test_get_reranker_instruction_generic_for_unscoped(self):
"""
Unscoped queries get the generic instruction even when candidates
all share a library_type — type-specific instructions could bias
the reranker against minority-type results.
"""
service = SearchService()
request = SearchRequest(query="test")
candidates = [
SearchCandidate(
chunk_uid=f"c{i}", item_uid="i1", item_title="T",
library_type="technical", text_preview="p",
chunk_s3_key="s3", chunk_index=0, score=0.5,
source="vector",
)
for i in range(5)
]
instruction = service._get_reranker_instruction(request, candidates)
self.assertEqual(instruction, SearchService.GENERIC_RERANKER_INSTRUCTION)
def test_get_reranker_instruction_generic_when_no_context(self):
"""Returns the generic instruction when no library scope is set."""
service = SearchService()
request = SearchRequest(query="test")
candidates = [
SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="T",
library_type="", text_preview="p",
chunk_s3_key="s3", chunk_index=0, score=0.5,
source="vector",
)
]
instruction = service._get_reranker_instruction(request, candidates)
self.assertEqual(instruction, SearchService.GENERIC_RERANKER_INSTRUCTION)