From 4cf022e61589339558a3bbd8cfc2f2206229145f Mon Sep 17 00:00:00 2001 From: Robert Helewka Date: Fri, 8 May 2026 05:58:36 -0400 Subject: [PATCH] feat: add image query support to search service and library UI - Add `query_image_ext` field to `SearchRequest` (defaults to "png") - Embed query from image when supplied and model supports multimodal, with fallback to text embedding on failure or unsupported model - Add search form to library detail page with optional image upload, shown only when multimodal embeddings are available - Display side-by-side baseline vs re-ranked results with query mode indicator, timing stats, and score/rank change highlighting --- mnemosyne/library/services/search.py | 26 +++ .../templates/library/library_detail.html | 173 ++++++++++++++++++ mnemosyne/library/urls.py | 1 + mnemosyne/library/views.py | 132 ++++++++++++- .../templates/mnemosyne/dashboard.html | 5 +- 5 files changed, 334 insertions(+), 3 deletions(-) diff --git a/mnemosyne/library/services/search.py b/mnemosyne/library/services/search.py index c4d4ea0..234ce39 100644 --- a/mnemosyne/library/services/search.py +++ b/mnemosyne/library/services/search.py @@ -60,6 +60,7 @@ class SearchRequest: query: str query_image: Optional[bytes] = None + query_image_ext: str = "png" library_uid: Optional[str] = None library_type: Optional[str] = None collection_uid: Optional[str] = None @@ -263,6 +264,31 @@ class SearchService: try: client = EmbeddingClient(embedding_model, user=self.user) + + # Prefer image embedding when an image is supplied AND the system + # model supports multimodal. Text still flows through fulltext + # search and the reranker independently. + if request.query_image and embedding_model.supports_multimodal: + vector = client.embed_image( + request.query_image, image_ext=request.query_image_ext + ) + if vector is not None: + logger.debug( + "Query embedded from image dimensions=%d ext=%s", + len(vector), + request.query_image_ext, + ) + return vector + logger.warning( + "Image embedding returned None — falling back to text query" + ) + elif request.query_image: + logger.warning( + "query_image supplied but model %s lacks supports_multimodal " + "— falling back to text", + embedding_model.name, + ) + vector = client.embed_text(query_text) logger.debug( "Query embedded dimensions=%d instruction_len=%d", diff --git a/mnemosyne/library/templates/library/library_detail.html b/mnemosyne/library/templates/library/library_detail.html index 59de668..1991d39 100644 --- a/mnemosyne/library/templates/library/library_detail.html +++ b/mnemosyne/library/templates/library/library_detail.html @@ -49,6 +49,179 @@ + +
+
+

Search this library

+

+ Runs the query twice — once without the re-ranker, once with — so you can see what the re-ranker changed. +

+ +
+ {% csrf_token %} +
+
+ + +
+ + {% if multimodal_available %} +
+ + +
+ {% else %} +
+ Multimodal embeddings not configured — image search disabled. +
+ {% endif %} +
+
+ + {% if search_error %} +
+ {{ search_error }} +
+ {% endif %} + + {% if results_baseline and results_reranked %} +
+
+
Query mode
+
+ {% if search_used_image %} + image + text + {% else %} + text + {% endif %} +
+
+
+
Baseline time
+
{{ results_baseline.search_time_ms|floatformat:0 }}ms
+
+
+
Re-ranked time
+
{{ results_reranked.search_time_ms|floatformat:0 }}ms
+
+
+
Re-ranker
+
+ {% if results_reranked.reranker_used %} + {{ results_reranked.reranker_model|default:"on" }} + {% else %} + unavailable + {% endif %} +
+
+
+ +
+ +
+

Without re-ranker (fusion order)

+ {% if results_baseline.candidates %} +
+ {% for candidate in results_baseline.candidates %} +
+
+
+
+
#{{ forloop.counter }}
+ + {{ candidate.item_title }} + + chunk #{{ candidate.chunk_index }} +

{{ candidate.text_preview }}

+
+
+ {{ candidate.score|floatformat:4 }} + {{ candidate.source }} +
+
+
+
+ {% endfor %} +
+ {% else %} +
No results.
+ {% endif %} +
+ + +
+

With re-ranker

+ {% if results_reranked.candidates %} +
+ {% for candidate in results_reranked.candidates %} +
+
+
+
+
+ #{{ forloop.counter }} + {% if candidate.rank_delta_kind == "new" %} + new + {% elif candidate.rank_delta_kind == "up" %} + {{ candidate.rank_delta_label }} + {% elif candidate.rank_delta_kind == "down" %} + {{ candidate.rank_delta_label }} + {% else %} + = + {% endif %} +
+ + {{ candidate.item_title }} + + chunk #{{ candidate.chunk_index }} +

{{ candidate.text_preview }}

+
+
+ {{ candidate.score|floatformat:4 }} + {{ candidate.source }} +
+
+
+
+ {% endfor %} +
+ {% else %} +
No results.
+ {% endif %} +
+
+ + {% if results_reranked.images %} +

Image results

+
+ {% for image in results_reranked.images %} +
+
+
{{ image.image_type }}
+

{{ image.description }}

+
+ {{ image.item_title|truncatechars:30 }} + {{ image.score|floatformat:3 }} +
+
+
+ {% endfor %} +
+ {% endif %} + {% endif %} +
+
+

Collections

diff --git a/mnemosyne/library/urls.py b/mnemosyne/library/urls.py index 2a5dde8..7d78944 100644 --- a/mnemosyne/library/urls.py +++ b/mnemosyne/library/urls.py @@ -18,6 +18,7 @@ urlpatterns = [ path("", views.library_list, name="library-list"), path("create/", views.library_create, name="library-create"), path("/", views.library_detail, name="library-detail"), + path("/search/", views.library_search, name="library-search"), path("/edit/", views.library_edit, name="library-edit"), path("/delete/", views.library_delete, name="library-delete"), # Collection CRUD diff --git a/mnemosyne/library/views.py b/mnemosyne/library/views.py index 76dfdc1..e57a406 100644 --- a/mnemosyne/library/views.py +++ b/mnemosyne/library/views.py @@ -92,6 +92,31 @@ def library_create(request): return render(request, "library/library_form.html", {"form": form, "editing": False}) +def _library_detail_context(library): + """ + Build the base context for the library detail page. + + Shared between ``library_detail`` and ``library_search`` so the search + POST handler renders the same page chrome plus its results layered on + top. + """ + from llm_manager.models import LLMModel + + embedding_model = LLMModel.get_system_embedding_model() + multimodal_available = bool(embedding_model and embedding_model.supports_multimodal) + + return { + "library": library, + "collections": library.collections.all(), + "multimodal_available": multimodal_available, + "search_query": "", + "search_used_image": False, + "results_baseline": None, + "results_reranked": None, + "search_error": None, + } + + @login_required def library_detail(request, uid): """View library details and its collections.""" @@ -99,17 +124,120 @@ def library_detail(request, uid): from .models import Library lib = Library.nodes.get(uid=uid) - collections = lib.collections.all() except Exception as e: messages.error(request, f"Library not found: {e}") return redirect("library:library-list") return render( request, "library/library_detail.html", - {"library": lib, "collections": collections}, + _library_detail_context(lib), ) +# Cap query-image uploads at 8 MB. Multimodal embedders happily accept +# larger payloads but they're slow and almost never come from a real +# in-browser screenshot/photo. +_MAX_QUERY_IMAGE_BYTES = 8 * 1024 * 1024 + + +@login_required +def library_search(request, uid): + """ + Run an A/B search (with and without re-ranker) scoped to a single + library, and re-render ``library_detail.html`` with both result sets. + """ + try: + from .models import Library + + lib = Library.nodes.get(uid=uid) + except Exception as e: + messages.error(request, f"Library not found: {e}") + return redirect("library:library-list") + + context = _library_detail_context(lib) + + if request.method != "POST": + return redirect("library:library-detail", uid=uid) + + query = (request.POST.get("query") or "").strip() + context["search_query"] = query + + image_bytes = None + image_ext = "png" + uploaded = request.FILES.get("query_image") + if uploaded and context["multimodal_available"]: + if uploaded.size > _MAX_QUERY_IMAGE_BYTES: + context["search_error"] = ( + f"Image too large ({uploaded.size} bytes). " + f"Max is {_MAX_QUERY_IMAGE_BYTES} bytes." + ) + return render(request, "library/library_detail.html", context) + image_bytes = uploaded.read() + # Derive extension from the filename; default to png. The embedder + # only uses this to set the MIME type for the multimodal request. + _, ext = os.path.splitext(uploaded.name or "") + if ext.startswith("."): + ext = ext[1:].lower() + if ext: + image_ext = ext + context["search_used_image"] = True + + if not query and not image_bytes: + context["search_error"] = "Enter a query (text or image) before searching." + return render(request, "library/library_detail.html", context) + + try: + from django.conf import settings as django_settings + + from .services.search import SearchRequest, SearchService + + def _make_request(rerank: bool) -> "SearchRequest": + return SearchRequest( + query=query, + query_image=image_bytes, + query_image_ext=image_ext, + library_uid=uid, + limit=getattr(django_settings, "SEARCH_DEFAULT_LIMIT", 20), + vector_top_k=getattr(django_settings, "SEARCH_VECTOR_TOP_K", 50), + fulltext_top_k=getattr(django_settings, "SEARCH_FULLTEXT_TOP_K", 30), + rerank=rerank, + include_images=True, + ) + + service = SearchService(user=request.user) + baseline = service.search(_make_request(rerank=False)) + reranked = service.search(_make_request(rerank=True)) + + # Annotate the reranked candidates with a rank-delta label so the + # template can render a badge without doing arithmetic. ``new`` = + # the reranker pulled this in from outside the baseline top-N. + baseline_pos = {c.chunk_uid: i for i, c in enumerate(baseline.candidates)} + for new_index, cand in enumerate(reranked.candidates): + old_index = baseline_pos.get(cand.chunk_uid) + if old_index is None: + cand.rank_delta_label = "new" + cand.rank_delta_kind = "new" + else: + delta = old_index - new_index # +N == moved up + if delta > 0: + cand.rank_delta_label = f"↑{delta}" + cand.rank_delta_kind = "up" + elif delta < 0: + cand.rank_delta_label = f"↓{-delta}" + cand.rank_delta_kind = "down" + else: + cand.rank_delta_label = "=" + cand.rank_delta_kind = "same" + + context["results_baseline"] = baseline + context["results_reranked"] = reranked + except Exception as exc: + logger.error("Library search failed: %s", exc, exc_info=True) + context["search_error"] = str(exc) + + return render(request, "library/library_detail.html", context) + + @login_required def library_edit(request, uid): """Edit an existing library.""" diff --git a/mnemosyne/mnemosyne/templates/mnemosyne/dashboard.html b/mnemosyne/mnemosyne/templates/mnemosyne/dashboard.html index ad6e9ab..33a0dfb 100644 --- a/mnemosyne/mnemosyne/templates/mnemosyne/dashboard.html +++ b/mnemosyne/mnemosyne/templates/mnemosyne/dashboard.html @@ -3,6 +3,7 @@ {% block nav_items_desktop %}
  • Dashboard
  • Libraries
  • +
  • Search
  • LLM Manager
  • API Keys
  • {% endblock %} @@ -10,6 +11,7 @@ {% block nav_items %}
  • Dashboard
  • Libraries
  • +
  • Search
  • LLM Manager
  • API Keys
  • {% endblock %} @@ -79,7 +81,8 @@