fix(search): require library match and preserve raw scores for RRF

Replace OPTIONAL MATCH with MATCH for Library-Collection-Item paths to ensure results are properly scoped to libraries, and remove per-query score normalization since RRF fuses results by rank rather than score magnitude.
2026-04-26 06:35:11 -04:00
parent 4a35aa126f
commit 388b37e471
3 changed files with 55 additions and 360 deletions
--- a/mnemosyne/library/services/search.py
+++ b/mnemosyne/library/services/search.py
@@ -247,7 +247,7 @@ class SearchService:
            CALL db.index.vector.queryNodes('chunk_embedding_index', $top_k, $query_vector)
            YIELD node AS chunk, score
            MATCH (item:Item)-[:HAS_CHUNK]->(chunk)
-            OPTIONAL MATCH (lib:Library)-[:CONTAINS]->(col:Collection)-[:CONTAINS]->(item)
+            MATCH (lib:Library)-[:CONTAINS]->(col:Collection)-[:CONTAINS]->(item)
            WHERE ($library_uid IS NULL OR lib.uid = $library_uid)
              AND ($library_type IS NULL OR lib.library_type = $library_type)
              AND ($collection_uid IS NULL OR col.uid = $collection_uid)
@@ -352,7 +352,7 @@ class SearchService:
            CALL db.index.fulltext.queryNodes('chunk_text_fulltext', $query)
            YIELD node AS chunk, score
            MATCH (item:Item)-[:HAS_CHUNK]->(chunk)
-            OPTIONAL MATCH (lib:Library)-[:CONTAINS]->(col:Collection)-[:CONTAINS]->(item)
+            MATCH (lib:Library)-[:CONTAINS]->(col:Collection)-[:CONTAINS]->(item)
            WHERE ($library_uid IS NULL OR lib.uid = $library_uid)
              AND ($library_type IS NULL OR lib.library_type = $library_type)
              AND ($collection_uid IS NULL OR col.uid = $collection_uid)
@@ -374,15 +374,13 @@ class SearchService:

        try:
            results, _ = db.cypher_query(cypher, params)
-            # Normalize BM25 scores to 0-1 range
-            max_score = max((float(r[7]) for r in results if r[7]), default=1.0)
+            # Keep raw BM25 scores — RRF fuses by rank, not by score magnitude.
            for row in results:
                uid = row[0]
                if not uid:
                    continue
                raw_score = float(row[7]) if row[7] else 0.0
-                normalized = raw_score / max_score if max_score > 0 else 0.0
-                if uid not in candidates or normalized > candidates[uid].score:
+                if uid not in candidates or raw_score > candidates[uid].score:
                    candidates[uid] = SearchCandidate(
                        chunk_uid=uid,
                        text_preview=row[1] or "",
@@ -391,7 +389,7 @@ class SearchService:
                        item_uid=row[4] or "",
                        item_title=row[5] or "",
                        library_type=row[6] or "",
-                        score=normalized,
+                        score=raw_score,
                        source="fulltext",
                    )
        except Exception as exc:
@@ -409,7 +407,7 @@ class SearchService:
            YIELD node AS concept, score AS concept_score
            MATCH (chunk:Chunk)-[:MENTIONS]->(concept)
            MATCH (item:Item)-[:HAS_CHUNK]->(chunk)
-            OPTIONAL MATCH (lib:Library)-[:CONTAINS]->(col:Collection)-[:CONTAINS]->(item)
+            MATCH (lib:Library)-[:CONTAINS]->(:Collection)-[:CONTAINS]->(item)
            WHERE ($library_uid IS NULL OR lib.uid = $library_uid)
              AND ($library_type IS NULL OR lib.library_type = $library_type)
            RETURN chunk.uid AS chunk_uid, chunk.text_preview AS text_preview,
@@ -430,14 +428,13 @@ class SearchService:

        try:
            results, _ = db.cypher_query(cypher, params)
-            max_score = max((float(r[7]) for r in results if r[7]), default=1.0)
+            # Raw scores already include the 0.8 concept downweight from Cypher.
            for row in results:
                uid = row[0]
                if not uid:
                    continue
                raw_score = float(row[7]) if row[7] else 0.0
-                normalized = raw_score / max_score if max_score > 0 else 0.0
-                if uid not in candidates or normalized > candidates[uid].score:
+                if uid not in candidates or raw_score > candidates[uid].score:
                    candidates[uid] = SearchCandidate(
                        chunk_uid=uid,
                        text_preview=row[1] or "",
@@ -446,7 +443,7 @@ class SearchService:
                        item_uid=row[4] or "",
                        item_title=row[5] or "",
                        library_type=row[6] or "",
-                        score=normalized,
+                        score=raw_score,
                        source="fulltext",
                    )
        except Exception as exc:
@@ -476,17 +473,17 @@ class SearchService:
            LIMIT 10
            MATCH (chunk:Chunk)-[:MENTIONS]->(concept)
            MATCH (item:Item)-[:HAS_CHUNK]->(chunk)
-            OPTIONAL MATCH (lib:Library)-[:CONTAINS]->(col:Collection)-[:CONTAINS]->(item)
+            MATCH (lib:Library)-[:CONTAINS]->(:Collection)-[:CONTAINS]->(item)
            WHERE ($library_uid IS NULL OR lib.uid = $library_uid)
              AND ($library_type IS NULL OR lib.library_type = $library_type)
-            WITH chunk, item, lib, concept, concept_score,
-                 count(DISTINCT concept) AS concept_count
-            RETURN DISTINCT chunk.uid AS chunk_uid, chunk.text_preview AS text_preview,
+            WITH chunk, item, lib,
+                 max(concept_score) AS score,
+                 collect(DISTINCT concept.name)[..5] AS concept_names
+            RETURN chunk.uid AS chunk_uid, chunk.text_preview AS text_preview,
                   chunk.chunk_s3_key AS chunk_s3_key, chunk.chunk_index AS chunk_index,
                   item.uid AS item_uid, item.title AS item_title,
                   lib.library_type AS library_type,
-                   concept_score AS score,
-                   collect(concept.name)[..5] AS concept_names
+                   score, concept_names
            ORDER BY score DESC
            LIMIT $limit
        """
@@ -504,16 +501,12 @@ class SearchService:
            logger.error("Graph search failed: %s", exc)
            return []

-        # Normalize scores
-        max_score = max((float(r[7]) for r in results if r[7]), default=1.0)
-
        candidates = []
        for row in results:
            uid = row[0]
            if not uid:
                continue
            raw_score = float(row[7]) if row[7] else 0.0
-            normalized = raw_score / max_score if max_score > 0 else 0.0
            concept_names = row[8] if len(row) > 8 else []

            candidates.append(
@@ -525,7 +518,7 @@ class SearchService:
                    item_uid=row[4] or "",
                    item_title=row[5] or "",
                    library_type=row[6] or "",
-                    score=normalized,
+                    score=raw_score,
                    source="graph",
                    metadata={"concepts": concept_names},
                )
@@ -562,7 +555,7 @@ class SearchService:
            YIELD node AS emb_node, score
            MATCH (img:Image)-[:HAS_EMBEDDING]->(emb_node)
            MATCH (item:Item)-[:HAS_IMAGE]->(img)
-            OPTIONAL MATCH (lib:Library)-[:CONTAINS]->(col:Collection)-[:CONTAINS]->(item)
+            MATCH (lib:Library)-[:CONTAINS]->(:Collection)-[:CONTAINS]->(item)
            WHERE ($library_uid IS NULL OR lib.uid = $library_uid)
              AND ($library_type IS NULL OR lib.library_type = $library_type)
            RETURN img.uid AS image_uid, img.image_type AS image_type,
@@ -642,11 +635,13 @@ class SearchService:

        try:
            client = RerankerClient(reranker_model, user=self.user)
+            # Don't pass top_n — let the reranker score every candidate so
+            # cross-attention can promote items the RRF stage ranked low.
+            # Final trimming to request.limit happens in search().
            reranked = client.rerank(
                query=request.query,
                candidates=candidates_to_rerank,
                instruction=instruction,
-                top_n=request.limit,
                query_image=request.query_image,
            )
            return reranked, reranker_model.name
@@ -660,22 +655,27 @@ class SearchService:
    # Helpers
    # ------------------------------------------------------------------

+    GENERIC_RERANKER_INSTRUCTION = (
+        "Re-rank these passages by relevance to the query."
+    )
+
    def _get_reranker_instruction(
        self, request: SearchRequest, candidates: list[SearchCandidate]
    ) -> str:
        """
        Get the content-type-aware reranker instruction.

-        If scoped to a library or library type, use that type's instruction.
-        If mixed types, use a generic instruction.
+        Scoped queries (by library or library type) use that type's
+        instruction. Unscoped queries — even when results happen to
+        come mostly from one type — use a generic instruction so the
+        reranker is not biased toward the majority type.

        :param request: SearchRequest.
-        :param candidates: Candidates (used to detect dominant library type).
+        :param candidates: Candidates (unused; kept for API stability).
        :returns: Reranker instruction string.
        """
        from library.content_types import get_library_type_config

-        # Use explicit library type from request
        if request.library_type:
            try:
                config = get_library_type_config(request.library_type)
@@ -683,25 +683,12 @@ class SearchService:
            except ValueError:
                pass

-        # Use library UID to look up type
        if request.library_uid:
-            return self._get_library_reranker_instruction(request.library_uid)
+            instruction = self._get_library_reranker_instruction(request.library_uid)
+            if instruction:
+                return instruction

-        # Detect dominant type from candidates
-        type_counts: dict[str, int] = {}
-        for c in candidates:
-            if c.library_type:
-                type_counts[c.library_type] = type_counts.get(c.library_type, 0) + 1
-
-        if type_counts:
-            dominant_type = max(type_counts, key=type_counts.get)
-            try:
-                config = get_library_type_config(dominant_type)
-                return config.get("reranker_instruction", "")
-            except ValueError:
-                pass
-
-        return ""
+        return self.GENERIC_RERANKER_INSTRUCTION

    def _get_library_reranker_instruction(self, library_uid: str) -> str:
        """Get reranker_instruction from a Library node."""
@@ -710,7 +697,12 @@ class SearchService:

            lib = Library.nodes.get(uid=library_uid)
            return lib.reranker_instruction or ""
-        except Exception:
+        except Exception as exc:
+            logger.warning(
+                "Failed to load reranker_instruction for library_uid=%s: %s",
+                library_uid,
+                exc,
+            )
            return ""

    def _get_embedding_instruction(self, library_uid: str) -> str:
@@ -720,7 +712,12 @@ class SearchService:

            lib = Library.nodes.get(uid=library_uid)
            return lib.embedding_instruction or ""
-        except Exception:
+        except Exception as exc:
+            logger.warning(
+                "Failed to load embedding_instruction for library_uid=%s: %s",
+                library_uid,
+                exc,
+            )
            return ""

    def _get_type_embedding_instruction(self, library_type: str) -> str:
--- a/mnemosyne/library/tests/test_search.py
+++ b/mnemosyne/library/tests/test_search.py
@@ -225,8 +225,12 @@ class SearchServiceHelperTest(TestCase):
        instruction = service._get_reranker_instruction(request, [])
        self.assertIn("fiction", instruction.lower())

-    def test_get_reranker_instruction_from_candidates(self):
-        """Detects dominant library type from candidate list."""
+    def test_get_reranker_instruction_generic_for_unscoped(self):
+        """
+        Unscoped queries get the generic instruction even when candidates
+        all share a library_type — type-specific instructions could bias
+        the reranker against minority-type results.
+        """
        service = SearchService()
        request = SearchRequest(query="test")
        candidates = [
@@ -240,10 +244,10 @@ class SearchServiceHelperTest(TestCase):
        ]

        instruction = service._get_reranker_instruction(request, candidates)
-        self.assertIn("technical", instruction.lower())
+        self.assertEqual(instruction, SearchService.GENERIC_RERANKER_INSTRUCTION)

-    def test_get_reranker_instruction_empty_when_no_context(self):
-        """Returns empty when no library type context available."""
+    def test_get_reranker_instruction_generic_when_no_context(self):
+        """Returns the generic instruction when no library scope is set."""
        service = SearchService()
        request = SearchRequest(query="test")
        candidates = [
@@ -256,4 +260,4 @@ class SearchServiceHelperTest(TestCase):
        ]

        instruction = service._get_reranker_instruction(request, candidates)
-        self.assertEqual(instruction, "")
+        self.assertEqual(instruction, SearchService.GENERIC_RERANKER_INSTRUCTION)