feat: add image query support to search service and library UI
- Add `query_image_ext` field to `SearchRequest` (defaults to "png") - Embed query from image when supplied and model supports multimodal, with fallback to text embedding on failure or unsupported model - Add search form to library detail page with optional image upload, shown only when multimodal embeddings are available - Display side-by-side baseline vs re-ranked results with query mode indicator, timing stats, and score/rank change highlighting
This commit is contained in:
@@ -60,6 +60,7 @@ class SearchRequest:
|
||||
|
||||
query: str
|
||||
query_image: Optional[bytes] = None
|
||||
query_image_ext: str = "png"
|
||||
library_uid: Optional[str] = None
|
||||
library_type: Optional[str] = None
|
||||
collection_uid: Optional[str] = None
|
||||
@@ -263,6 +264,31 @@ class SearchService:
|
||||
|
||||
try:
|
||||
client = EmbeddingClient(embedding_model, user=self.user)
|
||||
|
||||
# Prefer image embedding when an image is supplied AND the system
|
||||
# model supports multimodal. Text still flows through fulltext
|
||||
# search and the reranker independently.
|
||||
if request.query_image and embedding_model.supports_multimodal:
|
||||
vector = client.embed_image(
|
||||
request.query_image, image_ext=request.query_image_ext
|
||||
)
|
||||
if vector is not None:
|
||||
logger.debug(
|
||||
"Query embedded from image dimensions=%d ext=%s",
|
||||
len(vector),
|
||||
request.query_image_ext,
|
||||
)
|
||||
return vector
|
||||
logger.warning(
|
||||
"Image embedding returned None — falling back to text query"
|
||||
)
|
||||
elif request.query_image:
|
||||
logger.warning(
|
||||
"query_image supplied but model %s lacks supports_multimodal "
|
||||
"— falling back to text",
|
||||
embedding_model.name,
|
||||
)
|
||||
|
||||
vector = client.embed_text(query_text)
|
||||
logger.debug(
|
||||
"Query embedded dimensions=%d instruction_len=%d",
|
||||
|
||||
@@ -49,6 +49,179 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Search this library -->
|
||||
<div class="card bg-base-200 shadow-sm mb-6">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title">Search this library</h2>
|
||||
<p class="text-sm opacity-70 -mt-1">
|
||||
Runs the query twice — once without the re-ranker, once with — so you can see what the re-ranker changed.
|
||||
</p>
|
||||
|
||||
<form method="post" action="{% url 'library:library-search' uid=library.uid %}"
|
||||
enctype="multipart/form-data" class="mt-2">
|
||||
{% csrf_token %}
|
||||
<div class="flex flex-col gap-3">
|
||||
<div class="join w-full">
|
||||
<input type="text" name="query" value="{{ search_query }}"
|
||||
placeholder="Text query…"
|
||||
class="input input-bordered join-item w-full">
|
||||
<button type="submit" class="btn btn-primary join-item">Compare with re-ranker</button>
|
||||
</div>
|
||||
|
||||
{% if multimodal_available %}
|
||||
<div class="form-control">
|
||||
<label class="label py-0">
|
||||
<span class="label-text">Image query (optional)</span>
|
||||
<span class="label-text-alt opacity-60">used for the vector embedding when present</span>
|
||||
</label>
|
||||
<input type="file" name="query_image" accept="image/*"
|
||||
class="file-input file-input-bordered file-input-sm w-full max-w-md">
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="text-sm opacity-60">
|
||||
Multimodal embeddings not configured — image search disabled.
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</form>
|
||||
|
||||
{% if search_error %}
|
||||
<div class="alert alert-error mt-4">
|
||||
<span>{{ search_error }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if results_baseline and results_reranked %}
|
||||
<div class="stats shadow mt-4 w-full">
|
||||
<div class="stat">
|
||||
<div class="stat-title">Query mode</div>
|
||||
<div class="stat-value text-base">
|
||||
{% if search_used_image %}
|
||||
<span class="badge badge-secondary">image + text</span>
|
||||
{% else %}
|
||||
<span class="badge badge-ghost">text</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-title">Baseline time</div>
|
||||
<div class="stat-value text-base">{{ results_baseline.search_time_ms|floatformat:0 }}ms</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-title">Re-ranked time</div>
|
||||
<div class="stat-value text-base">{{ results_reranked.search_time_ms|floatformat:0 }}ms</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-title">Re-ranker</div>
|
||||
<div class="stat-value text-base">
|
||||
{% if results_reranked.reranker_used %}
|
||||
<span class="badge badge-success">{{ results_reranked.reranker_model|default:"on" }}</span>
|
||||
{% else %}
|
||||
<span class="badge badge-warning">unavailable</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid grid-cols-1 lg:grid-cols-2 gap-4 mt-4">
|
||||
<!-- Without reranker -->
|
||||
<div>
|
||||
<h3 class="font-semibold mb-2">Without re-ranker (fusion order)</h3>
|
||||
{% if results_baseline.candidates %}
|
||||
<div class="flex flex-col gap-2">
|
||||
{% for candidate in results_baseline.candidates %}
|
||||
<div class="card bg-base-100 shadow-sm">
|
||||
<div class="card-body py-3">
|
||||
<div class="flex justify-between items-start gap-3">
|
||||
<div class="flex-1 min-w-0">
|
||||
<div class="text-xs opacity-60">#{{ forloop.counter }}</div>
|
||||
<a href="{% url 'library:item-detail' uid=candidate.item_uid %}"
|
||||
class="link link-primary font-medium">
|
||||
{{ candidate.item_title }}
|
||||
</a>
|
||||
<span class="badge badge-xs badge-outline ml-1">chunk #{{ candidate.chunk_index }}</span>
|
||||
<p class="text-sm opacity-80 mt-1 line-clamp-3">{{ candidate.text_preview }}</p>
|
||||
</div>
|
||||
<div class="flex flex-col items-end gap-1">
|
||||
<span class="badge badge-primary badge-sm">{{ candidate.score|floatformat:4 }}</span>
|
||||
<span class="badge badge-ghost badge-xs">{{ candidate.source }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="opacity-60 text-sm">No results.</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- With reranker -->
|
||||
<div>
|
||||
<h3 class="font-semibold mb-2">With re-ranker</h3>
|
||||
{% if results_reranked.candidates %}
|
||||
<div class="flex flex-col gap-2">
|
||||
{% for candidate in results_reranked.candidates %}
|
||||
<div class="card bg-base-100 shadow-sm">
|
||||
<div class="card-body py-3">
|
||||
<div class="flex justify-between items-start gap-3">
|
||||
<div class="flex-1 min-w-0">
|
||||
<div class="text-xs opacity-60 flex items-center gap-2">
|
||||
<span>#{{ forloop.counter }}</span>
|
||||
{% if candidate.rank_delta_kind == "new" %}
|
||||
<span class="badge badge-success badge-xs">new</span>
|
||||
{% elif candidate.rank_delta_kind == "up" %}
|
||||
<span class="badge badge-success badge-xs">{{ candidate.rank_delta_label }}</span>
|
||||
{% elif candidate.rank_delta_kind == "down" %}
|
||||
<span class="badge badge-warning badge-xs">{{ candidate.rank_delta_label }}</span>
|
||||
{% else %}
|
||||
<span class="badge badge-ghost badge-xs">=</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
<a href="{% url 'library:item-detail' uid=candidate.item_uid %}"
|
||||
class="link link-primary font-medium">
|
||||
{{ candidate.item_title }}
|
||||
</a>
|
||||
<span class="badge badge-xs badge-outline ml-1">chunk #{{ candidate.chunk_index }}</span>
|
||||
<p class="text-sm opacity-80 mt-1 line-clamp-3">{{ candidate.text_preview }}</p>
|
||||
</div>
|
||||
<div class="flex flex-col items-end gap-1">
|
||||
<span class="badge badge-primary badge-sm">{{ candidate.score|floatformat:4 }}</span>
|
||||
<span class="badge badge-ghost badge-xs">{{ candidate.source }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="opacity-60 text-sm">No results.</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if results_reranked.images %}
|
||||
<h3 class="font-semibold mt-6 mb-2">Image results</h3>
|
||||
<div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-3">
|
||||
{% for image in results_reranked.images %}
|
||||
<div class="card bg-base-100 shadow-sm">
|
||||
<div class="card-body p-3">
|
||||
<div class="badge badge-sm badge-outline mb-1">{{ image.image_type }}</div>
|
||||
<p class="text-xs opacity-80 line-clamp-2">{{ image.description }}</p>
|
||||
<div class="flex justify-between items-center mt-1">
|
||||
<a href="{% url 'library:item-detail' uid=image.item_uid %}"
|
||||
class="text-xs link link-primary">{{ image.item_title|truncatechars:30 }}</a>
|
||||
<span class="badge badge-primary badge-xs">{{ image.score|floatformat:3 }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Collections -->
|
||||
<div class="flex justify-between items-center mb-4">
|
||||
<h2 class="text-xl font-bold">Collections</h2>
|
||||
|
||||
@@ -18,6 +18,7 @@ urlpatterns = [
|
||||
path("", views.library_list, name="library-list"),
|
||||
path("create/", views.library_create, name="library-create"),
|
||||
path("<str:uid>/", views.library_detail, name="library-detail"),
|
||||
path("<str:uid>/search/", views.library_search, name="library-search"),
|
||||
path("<str:uid>/edit/", views.library_edit, name="library-edit"),
|
||||
path("<str:uid>/delete/", views.library_delete, name="library-delete"),
|
||||
# Collection CRUD
|
||||
|
||||
@@ -92,6 +92,31 @@ def library_create(request):
|
||||
return render(request, "library/library_form.html", {"form": form, "editing": False})
|
||||
|
||||
|
||||
def _library_detail_context(library):
|
||||
"""
|
||||
Build the base context for the library detail page.
|
||||
|
||||
Shared between ``library_detail`` and ``library_search`` so the search
|
||||
POST handler renders the same page chrome plus its results layered on
|
||||
top.
|
||||
"""
|
||||
from llm_manager.models import LLMModel
|
||||
|
||||
embedding_model = LLMModel.get_system_embedding_model()
|
||||
multimodal_available = bool(embedding_model and embedding_model.supports_multimodal)
|
||||
|
||||
return {
|
||||
"library": library,
|
||||
"collections": library.collections.all(),
|
||||
"multimodal_available": multimodal_available,
|
||||
"search_query": "",
|
||||
"search_used_image": False,
|
||||
"results_baseline": None,
|
||||
"results_reranked": None,
|
||||
"search_error": None,
|
||||
}
|
||||
|
||||
|
||||
@login_required
|
||||
def library_detail(request, uid):
|
||||
"""View library details and its collections."""
|
||||
@@ -99,17 +124,120 @@ def library_detail(request, uid):
|
||||
from .models import Library
|
||||
|
||||
lib = Library.nodes.get(uid=uid)
|
||||
collections = lib.collections.all()
|
||||
except Exception as e:
|
||||
messages.error(request, f"Library not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
return render(
|
||||
request,
|
||||
"library/library_detail.html",
|
||||
{"library": lib, "collections": collections},
|
||||
_library_detail_context(lib),
|
||||
)
|
||||
|
||||
|
||||
# Cap query-image uploads at 8 MB. Multimodal embedders happily accept
|
||||
# larger payloads but they're slow and almost never come from a real
|
||||
# in-browser screenshot/photo.
|
||||
_MAX_QUERY_IMAGE_BYTES = 8 * 1024 * 1024
|
||||
|
||||
|
||||
@login_required
|
||||
def library_search(request, uid):
|
||||
"""
|
||||
Run an A/B search (with and without re-ranker) scoped to a single
|
||||
library, and re-render ``library_detail.html`` with both result sets.
|
||||
"""
|
||||
try:
|
||||
from .models import Library
|
||||
|
||||
lib = Library.nodes.get(uid=uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Library not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
|
||||
context = _library_detail_context(lib)
|
||||
|
||||
if request.method != "POST":
|
||||
return redirect("library:library-detail", uid=uid)
|
||||
|
||||
query = (request.POST.get("query") or "").strip()
|
||||
context["search_query"] = query
|
||||
|
||||
image_bytes = None
|
||||
image_ext = "png"
|
||||
uploaded = request.FILES.get("query_image")
|
||||
if uploaded and context["multimodal_available"]:
|
||||
if uploaded.size > _MAX_QUERY_IMAGE_BYTES:
|
||||
context["search_error"] = (
|
||||
f"Image too large ({uploaded.size} bytes). "
|
||||
f"Max is {_MAX_QUERY_IMAGE_BYTES} bytes."
|
||||
)
|
||||
return render(request, "library/library_detail.html", context)
|
||||
image_bytes = uploaded.read()
|
||||
# Derive extension from the filename; default to png. The embedder
|
||||
# only uses this to set the MIME type for the multimodal request.
|
||||
_, ext = os.path.splitext(uploaded.name or "")
|
||||
if ext.startswith("."):
|
||||
ext = ext[1:].lower()
|
||||
if ext:
|
||||
image_ext = ext
|
||||
context["search_used_image"] = True
|
||||
|
||||
if not query and not image_bytes:
|
||||
context["search_error"] = "Enter a query (text or image) before searching."
|
||||
return render(request, "library/library_detail.html", context)
|
||||
|
||||
try:
|
||||
from django.conf import settings as django_settings
|
||||
|
||||
from .services.search import SearchRequest, SearchService
|
||||
|
||||
def _make_request(rerank: bool) -> "SearchRequest":
|
||||
return SearchRequest(
|
||||
query=query,
|
||||
query_image=image_bytes,
|
||||
query_image_ext=image_ext,
|
||||
library_uid=uid,
|
||||
limit=getattr(django_settings, "SEARCH_DEFAULT_LIMIT", 20),
|
||||
vector_top_k=getattr(django_settings, "SEARCH_VECTOR_TOP_K", 50),
|
||||
fulltext_top_k=getattr(django_settings, "SEARCH_FULLTEXT_TOP_K", 30),
|
||||
rerank=rerank,
|
||||
include_images=True,
|
||||
)
|
||||
|
||||
service = SearchService(user=request.user)
|
||||
baseline = service.search(_make_request(rerank=False))
|
||||
reranked = service.search(_make_request(rerank=True))
|
||||
|
||||
# Annotate the reranked candidates with a rank-delta label so the
|
||||
# template can render a badge without doing arithmetic. ``new`` =
|
||||
# the reranker pulled this in from outside the baseline top-N.
|
||||
baseline_pos = {c.chunk_uid: i for i, c in enumerate(baseline.candidates)}
|
||||
for new_index, cand in enumerate(reranked.candidates):
|
||||
old_index = baseline_pos.get(cand.chunk_uid)
|
||||
if old_index is None:
|
||||
cand.rank_delta_label = "new"
|
||||
cand.rank_delta_kind = "new"
|
||||
else:
|
||||
delta = old_index - new_index # +N == moved up
|
||||
if delta > 0:
|
||||
cand.rank_delta_label = f"↑{delta}"
|
||||
cand.rank_delta_kind = "up"
|
||||
elif delta < 0:
|
||||
cand.rank_delta_label = f"↓{-delta}"
|
||||
cand.rank_delta_kind = "down"
|
||||
else:
|
||||
cand.rank_delta_label = "="
|
||||
cand.rank_delta_kind = "same"
|
||||
|
||||
context["results_baseline"] = baseline
|
||||
context["results_reranked"] = reranked
|
||||
except Exception as exc:
|
||||
logger.error("Library search failed: %s", exc, exc_info=True)
|
||||
context["search_error"] = str(exc)
|
||||
|
||||
return render(request, "library/library_detail.html", context)
|
||||
|
||||
|
||||
@login_required
|
||||
def library_edit(request, uid):
|
||||
"""Edit an existing library."""
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
{% block nav_items_desktop %}
|
||||
<li><a href="{% url 'dashboard' %}" class="font-semibold">Dashboard</a></li>
|
||||
<li><a href="{% url 'library:library-list' %}">Libraries</a></li>
|
||||
<li><a href="{% url 'library:search' %}">Search</a></li>
|
||||
<li><a href="{% url 'llm_manager:dashboard' %}">LLM Manager</a></li>
|
||||
<li><a href="{% url 'themis:key-list' %}">API Keys</a></li>
|
||||
{% endblock %}
|
||||
@@ -10,6 +11,7 @@
|
||||
{% block nav_items %}
|
||||
<li><a href="{% url 'dashboard' %}">Dashboard</a></li>
|
||||
<li><a href="{% url 'library:library-list' %}">Libraries</a></li>
|
||||
<li><a href="{% url 'library:search' %}">Search</a></li>
|
||||
<li><a href="{% url 'llm_manager:dashboard' %}">LLM Manager</a></li>
|
||||
<li><a href="{% url 'themis:key-list' %}">API Keys</a></li>
|
||||
{% endblock %}
|
||||
@@ -79,7 +81,8 @@
|
||||
<div>
|
||||
<h2 class="text-xl font-semibold mb-3">Quick Actions</h2>
|
||||
<div class="flex flex-wrap gap-3">
|
||||
<a href="{% url 'library:library-create' %}" class="btn btn-primary btn-sm">New Library</a>
|
||||
<a href="{% url 'library:search' %}" class="btn btn-primary btn-sm">Search Knowledge</a>
|
||||
<a href="{% url 'library:library-create' %}" class="btn btn-secondary btn-sm">New Library</a>
|
||||
<a href="{% url 'llm_manager:api_create' %}" class="btn btn-secondary btn-sm">Add LLM API</a>
|
||||
<a href="{% url 'themis:key-create' %}" class="btn btn-accent btn-sm">Add API Key</a>
|
||||
<a href="{% url 'themis:profile-settings' %}" class="btn btn-ghost btn-sm">Profile Settings</a>
|
||||
|
||||
Reference in New Issue
Block a user