Implement hybrid search pipeline combining vector, fulltext, and graph search across Neo4j, with cross-attention reranking via Synesis (Qwen3-VL-Reranker-2B) `/v1/rerank` endpoint. - Add SearchService with vector, fulltext, and graph search strategies - Add SynesisRerankerClient for multimodal reranking via HTTP API - Add search API endpoint (POST /search/) with filtering by library, collection, and library_type - Add SearchRequest/Response serializers and image search results - Add "nonfiction" to library_type choices - Consolidate reranker stack from two models to single Synesis service - Handle image analysis_status as "skipped" when analysis is unavailable - Add comprehensive tests for search pipeline and reranker client
260 lines
9.5 KiB
Python
260 lines
9.5 KiB
Python
"""
|
|
Tests for the SearchService.
|
|
|
|
Neo4j queries and embedding calls are mocked — no external services needed.
|
|
"""
|
|
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
from django.test import TestCase, override_settings
|
|
|
|
from library.services.fusion import SearchCandidate
|
|
from library.services.search import SearchRequest, SearchResponse, SearchService
|
|
|
|
|
|
class SearchServiceInitTest(TestCase):
|
|
"""Tests for SearchService initialization."""
|
|
|
|
def test_creates_without_user(self):
|
|
"""Service can be created without a user."""
|
|
service = SearchService()
|
|
self.assertIsNone(service.user)
|
|
|
|
def test_creates_with_user(self):
|
|
"""Service stores user for usage tracking."""
|
|
user = MagicMock()
|
|
service = SearchService(user=user)
|
|
self.assertEqual(service.user, user)
|
|
|
|
|
|
class SearchServiceSearchTest(TestCase):
|
|
"""Tests for SearchService.search() orchestration."""
|
|
|
|
@patch("library.services.search.SearchService._image_search")
|
|
@patch("library.services.search.SearchService._rerank")
|
|
@patch("library.services.search.SearchService._graph_search")
|
|
@patch("library.services.search.SearchService._fulltext_search")
|
|
@patch("library.services.search.SearchService._vector_search")
|
|
@patch("library.services.search.SearchService._embed_query")
|
|
def test_search_calls_all_types(
|
|
self, mock_embed, mock_vector, mock_fulltext, mock_graph,
|
|
mock_rerank, mock_image
|
|
):
|
|
"""Search dispatches to all enabled search types."""
|
|
mock_embed.return_value = [0.1] * 2048
|
|
mock_vector.return_value = [
|
|
SearchCandidate(
|
|
chunk_uid="c1", item_uid="i1", item_title="Test",
|
|
library_type="technical", text_preview="preview",
|
|
chunk_s3_key="s3/key", chunk_index=0, score=0.9,
|
|
source="vector",
|
|
)
|
|
]
|
|
mock_fulltext.return_value = []
|
|
mock_graph.return_value = []
|
|
mock_rerank.return_value = (None, None)
|
|
mock_image.return_value = []
|
|
|
|
request = SearchRequest(
|
|
query="test query",
|
|
search_types=["vector", "fulltext", "graph"],
|
|
)
|
|
|
|
service = SearchService()
|
|
response = service.search(request)
|
|
|
|
mock_embed.assert_called_once()
|
|
mock_vector.assert_called_once()
|
|
mock_fulltext.assert_called_once()
|
|
mock_graph.assert_called_once()
|
|
|
|
self.assertIsInstance(response, SearchResponse)
|
|
self.assertEqual(response.query, "test query")
|
|
self.assertGreater(len(response.candidates), 0)
|
|
|
|
@patch("library.services.search.SearchService._embed_query")
|
|
def test_search_without_embedding_model(self, mock_embed):
|
|
"""Search continues without vector search if no embedding model."""
|
|
mock_embed.return_value = None
|
|
|
|
request = SearchRequest(
|
|
query="test",
|
|
search_types=["vector"],
|
|
rerank=False,
|
|
include_images=False,
|
|
)
|
|
|
|
service = SearchService()
|
|
response = service.search(request)
|
|
|
|
# No candidates since only vector was requested and no embedding
|
|
self.assertEqual(len(response.candidates), 0)
|
|
|
|
@patch("library.services.search.SearchService._rerank")
|
|
@patch("library.services.search.SearchService._fulltext_search")
|
|
@patch("library.services.search.SearchService._embed_query")
|
|
def test_search_with_reranking(self, mock_embed, mock_fulltext, mock_rerank):
|
|
"""Search applies reranking when enabled."""
|
|
mock_embed.return_value = None
|
|
mock_fulltext.return_value = [
|
|
SearchCandidate(
|
|
chunk_uid="c1", item_uid="i1", item_title="Test",
|
|
library_type="technical", text_preview="preview",
|
|
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
|
|
source="fulltext",
|
|
)
|
|
]
|
|
reranked_candidate = SearchCandidate(
|
|
chunk_uid="c1", item_uid="i1", item_title="Test",
|
|
library_type="technical", text_preview="preview",
|
|
chunk_s3_key="s3/key", chunk_index=0, score=0.95,
|
|
source="fulltext",
|
|
)
|
|
mock_rerank.return_value = ([reranked_candidate], "qwen3-vl-reranker-2b")
|
|
|
|
request = SearchRequest(
|
|
query="test",
|
|
search_types=["fulltext"],
|
|
rerank=True,
|
|
include_images=False,
|
|
)
|
|
|
|
service = SearchService()
|
|
response = service.search(request)
|
|
|
|
self.assertTrue(response.reranker_used)
|
|
self.assertEqual(response.reranker_model, "qwen3-vl-reranker-2b")
|
|
self.assertAlmostEqual(response.candidates[0].score, 0.95)
|
|
|
|
@patch("library.services.search.SearchService._fulltext_search")
|
|
@patch("library.services.search.SearchService._embed_query")
|
|
def test_search_without_reranking(self, mock_embed, mock_fulltext):
|
|
"""Search skips reranking when disabled."""
|
|
mock_embed.return_value = None
|
|
mock_fulltext.return_value = [
|
|
SearchCandidate(
|
|
chunk_uid="c1", item_uid="i1", item_title="Test",
|
|
library_type="technical", text_preview="preview",
|
|
chunk_s3_key="s3/key", chunk_index=0, score=0.5,
|
|
source="fulltext",
|
|
)
|
|
]
|
|
|
|
request = SearchRequest(
|
|
query="test",
|
|
search_types=["fulltext"],
|
|
rerank=False,
|
|
include_images=False,
|
|
)
|
|
|
|
service = SearchService()
|
|
response = service.search(request)
|
|
|
|
self.assertFalse(response.reranker_used)
|
|
self.assertIsNone(response.reranker_model)
|
|
|
|
@patch("library.services.search.SearchService._fulltext_search")
|
|
@patch("library.services.search.SearchService._embed_query")
|
|
def test_search_respects_limit(self, mock_embed, mock_fulltext):
|
|
"""Search trims results to requested limit."""
|
|
mock_embed.return_value = None
|
|
mock_fulltext.return_value = [
|
|
SearchCandidate(
|
|
chunk_uid=f"c{i}", item_uid=f"i{i}", item_title=f"Title {i}",
|
|
library_type="technical", text_preview=f"preview {i}",
|
|
chunk_s3_key=f"s3/{i}", chunk_index=i, score=0.5 - i * 0.01,
|
|
source="fulltext",
|
|
)
|
|
for i in range(20)
|
|
]
|
|
|
|
request = SearchRequest(
|
|
query="test",
|
|
search_types=["fulltext"],
|
|
limit=5,
|
|
rerank=False,
|
|
include_images=False,
|
|
)
|
|
|
|
service = SearchService()
|
|
response = service.search(request)
|
|
|
|
self.assertLessEqual(len(response.candidates), 5)
|
|
|
|
@patch("library.services.search.SearchService._embed_query")
|
|
def test_search_tracks_types_used(self, mock_embed):
|
|
"""Response lists which search types actually ran."""
|
|
mock_embed.return_value = None
|
|
|
|
request = SearchRequest(
|
|
query="test",
|
|
search_types=["fulltext"],
|
|
rerank=False,
|
|
include_images=False,
|
|
)
|
|
|
|
service = SearchService()
|
|
|
|
# Mock fulltext to return empty — type not added to used list
|
|
with patch.object(service, "_fulltext_search", return_value=[]):
|
|
response = service.search(request)
|
|
|
|
# Fulltext was called but returned empty, so not in used types
|
|
self.assertEqual(response.search_types_used, [])
|
|
|
|
|
|
class SearchServiceHelperTest(TestCase):
|
|
"""Tests for SearchService helper methods."""
|
|
|
|
def test_get_type_embedding_instruction(self):
|
|
"""Returns embedding instruction for known library type."""
|
|
service = SearchService()
|
|
instruction = service._get_type_embedding_instruction("technical")
|
|
self.assertIn("technical", instruction.lower())
|
|
|
|
def test_get_type_embedding_instruction_unknown(self):
|
|
"""Returns empty string for unknown library type."""
|
|
service = SearchService()
|
|
instruction = service._get_type_embedding_instruction("nonexistent")
|
|
self.assertEqual(instruction, "")
|
|
|
|
def test_get_reranker_instruction_from_type(self):
|
|
"""Resolves reranker instruction from library_type in request."""
|
|
service = SearchService()
|
|
request = SearchRequest(query="test", library_type="fiction")
|
|
instruction = service._get_reranker_instruction(request, [])
|
|
self.assertIn("fiction", instruction.lower())
|
|
|
|
def test_get_reranker_instruction_from_candidates(self):
|
|
"""Detects dominant library type from candidate list."""
|
|
service = SearchService()
|
|
request = SearchRequest(query="test")
|
|
candidates = [
|
|
SearchCandidate(
|
|
chunk_uid=f"c{i}", item_uid="i1", item_title="T",
|
|
library_type="technical", text_preview="p",
|
|
chunk_s3_key="s3", chunk_index=0, score=0.5,
|
|
source="vector",
|
|
)
|
|
for i in range(5)
|
|
]
|
|
|
|
instruction = service._get_reranker_instruction(request, candidates)
|
|
self.assertIn("technical", instruction.lower())
|
|
|
|
def test_get_reranker_instruction_empty_when_no_context(self):
|
|
"""Returns empty when no library type context available."""
|
|
service = SearchService()
|
|
request = SearchRequest(query="test")
|
|
candidates = [
|
|
SearchCandidate(
|
|
chunk_uid="c1", item_uid="i1", item_title="T",
|
|
library_type="", text_preview="p",
|
|
chunk_s3_key="s3", chunk_index=0, score=0.5,
|
|
source="vector",
|
|
)
|
|
]
|
|
|
|
instruction = service._get_reranker_instruction(request, candidates)
|
|
self.assertEqual(instruction, "")
|