Files
mnemosyne/mnemosyne/library/tests/test_search.py
Robert Helewka 634845fee0 feat: add Phase 3 hybrid search with Synesis reranking
Implement hybrid search pipeline combining vector, fulltext, and graph
search across Neo4j, with cross-attention reranking via Synesis
(Qwen3-VL-Reranker-2B) `/v1/rerank` endpoint.

- Add SearchService with vector, fulltext, and graph search strategies
- Add SynesisRerankerClient for multimodal reranking via HTTP API
- Add search API endpoint (POST /search/) with filtering by library,
  collection, and library_type
- Add SearchRequest/Response serializers and image search results
- Add "nonfiction" to library_type choices
- Consolidate reranker stack from two models to single Synesis service
- Handle image analysis_status as "skipped" when analysis is unavailable
- Add comprehensive tests for search pipeline and reranker client
2026-03-29 18:09:50 +00:00

260 lines
9.5 KiB
Python

"""
Tests for the SearchService.
Neo4j queries and embedding calls are mocked — no external services needed.
"""
from unittest.mock import MagicMock, patch
from django.test import TestCase, override_settings
from library.services.fusion import SearchCandidate
from library.services.search import SearchRequest, SearchResponse, SearchService
class SearchServiceInitTest(TestCase):
"""Tests for SearchService initialization."""
def test_creates_without_user(self):
"""Service can be created without a user."""
service = SearchService()
self.assertIsNone(service.user)
def test_creates_with_user(self):
"""Service stores user for usage tracking."""
user = MagicMock()
service = SearchService(user=user)
self.assertEqual(service.user, user)
class SearchServiceSearchTest(TestCase):
"""Tests for SearchService.search() orchestration."""
@patch("library.services.search.SearchService._image_search")
@patch("library.services.search.SearchService._rerank")
@patch("library.services.search.SearchService._graph_search")
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._vector_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_calls_all_types(
self, mock_embed, mock_vector, mock_fulltext, mock_graph,
mock_rerank, mock_image
):
"""Search dispatches to all enabled search types."""
mock_embed.return_value = [0.1] * 2048
mock_vector.return_value = [
SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="Test",
library_type="technical", text_preview="preview",
chunk_s3_key="s3/key", chunk_index=0, score=0.9,
source="vector",
)
]
mock_fulltext.return_value = []
mock_graph.return_value = []
mock_rerank.return_value = (None, None)
mock_image.return_value = []
request = SearchRequest(
query="test query",
search_types=["vector", "fulltext", "graph"],
)
service = SearchService()
response = service.search(request)
mock_embed.assert_called_once()
mock_vector.assert_called_once()
mock_fulltext.assert_called_once()
mock_graph.assert_called_once()
self.assertIsInstance(response, SearchResponse)
self.assertEqual(response.query, "test query")
self.assertGreater(len(response.candidates), 0)
@patch("library.services.search.SearchService._embed_query")
def test_search_without_embedding_model(self, mock_embed):
"""Search continues without vector search if no embedding model."""
mock_embed.return_value = None
request = SearchRequest(
query="test",
search_types=["vector"],
rerank=False,
include_images=False,
)
service = SearchService()
response = service.search(request)
# No candidates since only vector was requested and no embedding
self.assertEqual(len(response.candidates), 0)
@patch("library.services.search.SearchService._rerank")
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_with_reranking(self, mock_embed, mock_fulltext, mock_rerank):
"""Search applies reranking when enabled."""
mock_embed.return_value = None
mock_fulltext.return_value = [
SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="Test",
library_type="technical", text_preview="preview",
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
source="fulltext",
)
]
reranked_candidate = SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="Test",
library_type="technical", text_preview="preview",
chunk_s3_key="s3/key", chunk_index=0, score=0.95,
source="fulltext",
)
mock_rerank.return_value = ([reranked_candidate], "qwen3-vl-reranker-2b")
request = SearchRequest(
query="test",
search_types=["fulltext"],
rerank=True,
include_images=False,
)
service = SearchService()
response = service.search(request)
self.assertTrue(response.reranker_used)
self.assertEqual(response.reranker_model, "qwen3-vl-reranker-2b")
self.assertAlmostEqual(response.candidates[0].score, 0.95)
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_without_reranking(self, mock_embed, mock_fulltext):
"""Search skips reranking when disabled."""
mock_embed.return_value = None
mock_fulltext.return_value = [
SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="Test",
library_type="technical", text_preview="preview",
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
source="fulltext",
)
]
request = SearchRequest(
query="test",
search_types=["fulltext"],
rerank=False,
include_images=False,
)
service = SearchService()
response = service.search(request)
self.assertFalse(response.reranker_used)
self.assertIsNone(response.reranker_model)
@patch("library.services.search.SearchService._fulltext_search")
@patch("library.services.search.SearchService._embed_query")
def test_search_respects_limit(self, mock_embed, mock_fulltext):
"""Search trims results to requested limit."""
mock_embed.return_value = None
mock_fulltext.return_value = [
SearchCandidate(
chunk_uid=f"c{i}", item_uid=f"i{i}", item_title=f"Title {i}",
library_type="technical", text_preview=f"preview {i}",
chunk_s3_key=f"s3/{i}", chunk_index=i, score=0.5 - i * 0.01,
source="fulltext",
)
for i in range(20)
]
request = SearchRequest(
query="test",
search_types=["fulltext"],
limit=5,
rerank=False,
include_images=False,
)
service = SearchService()
response = service.search(request)
self.assertLessEqual(len(response.candidates), 5)
@patch("library.services.search.SearchService._embed_query")
def test_search_tracks_types_used(self, mock_embed):
"""Response lists which search types actually ran."""
mock_embed.return_value = None
request = SearchRequest(
query="test",
search_types=["fulltext"],
rerank=False,
include_images=False,
)
service = SearchService()
# Mock fulltext to return empty — type not added to used list
with patch.object(service, "_fulltext_search", return_value=[]):
response = service.search(request)
# Fulltext was called but returned empty, so not in used types
self.assertEqual(response.search_types_used, [])
class SearchServiceHelperTest(TestCase):
"""Tests for SearchService helper methods."""
def test_get_type_embedding_instruction(self):
"""Returns embedding instruction for known library type."""
service = SearchService()
instruction = service._get_type_embedding_instruction("technical")
self.assertIn("technical", instruction.lower())
def test_get_type_embedding_instruction_unknown(self):
"""Returns empty string for unknown library type."""
service = SearchService()
instruction = service._get_type_embedding_instruction("nonexistent")
self.assertEqual(instruction, "")
def test_get_reranker_instruction_from_type(self):
"""Resolves reranker instruction from library_type in request."""
service = SearchService()
request = SearchRequest(query="test", library_type="fiction")
instruction = service._get_reranker_instruction(request, [])
self.assertIn("fiction", instruction.lower())
def test_get_reranker_instruction_from_candidates(self):
"""Detects dominant library type from candidate list."""
service = SearchService()
request = SearchRequest(query="test")
candidates = [
SearchCandidate(
chunk_uid=f"c{i}", item_uid="i1", item_title="T",
library_type="technical", text_preview="p",
chunk_s3_key="s3", chunk_index=0, score=0.5,
source="vector",
)
for i in range(5)
]
instruction = service._get_reranker_instruction(request, candidates)
self.assertIn("technical", instruction.lower())
def test_get_reranker_instruction_empty_when_no_context(self):
"""Returns empty when no library type context available."""
service = SearchService()
request = SearchRequest(query="test")
candidates = [
SearchCandidate(
chunk_uid="c1", item_uid="i1", item_title="T",
library_type="", text_preview="p",
chunk_s3_key="s3", chunk_index=0, score=0.5,
source="vector",
)
]
instruction = service._get_reranker_instruction(request, candidates)
self.assertEqual(instruction, "")