Add vision analysis capabilities to the embedding pipeline

- Introduced a new vision analysis service to classify, describe, and extract text from images.
- Enhanced the Image model with fields for OCR text, vision model name, and analysis status.
- Added a new "nonfiction" library type with specific chunking and embedding configurations.
- Updated content types to include vision prompts for various library types.
- Integrated vision analysis into the embedding pipeline, allowing for image analysis during document processing.
- Implemented metrics to track vision analysis performance and usage.
- Updated UI components to display vision analysis results and statuses in item details and the embedding dashboard.
- Added migration for new vision model fields and usage tracking.
This commit is contained in:
2026-03-22 15:14:34 +00:00
parent 6585beed20
commit 90db904959
11 changed files with 914 additions and 19 deletions

View File

@@ -0,0 +1,52 @@
"""
Add is_system_vision_model to LLMModel and vision_analysis purpose to LLMUsage.
"""
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("llm_manager", "0002_add_bedrock_api_type"),
]
operations = [
migrations.AddField(
model_name="llmmodel",
name="is_system_vision_model",
field=models.BooleanField(
default=False,
help_text=(
"Mark this as the system-wide vision model for image analysis. "
"Only ONE vision model should have this set to True."
),
),
),
migrations.AddIndex(
model_name="llmmodel",
index=models.Index(
fields=["is_system_vision_model", "model_type"],
name="llm_manager__is_syst_b2f4e7_idx",
),
),
migrations.AlterField(
model_name="llmusage",
name="purpose",
field=models.CharField(
choices=[
("responder", "RAG Responder"),
("reviewer", "RAG Reviewer"),
("embeddings", "Document Embeddings"),
("search", "Vector Search"),
("reranking", "Re-ranking"),
("multimodal_embed", "Multimodal Embedding"),
("vision_analysis", "Vision Analysis"),
("other", "Other"),
],
db_index=True,
default="other",
max_length=50,
),
),
]

View File

@@ -179,6 +179,13 @@ class LLMModel(models.Model):
"Only ONE reranker model should have this set to True."
),
)
is_system_vision_model = models.BooleanField(
default=False,
help_text=(
"Mark this as the system-wide vision model for image analysis. "
"Only ONE vision model should have this set to True."
),
)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
@@ -191,6 +198,7 @@ class LLMModel(models.Model):
models.Index(fields=["is_system_embedding_model", "model_type"]),
models.Index(fields=["is_system_chat_model", "model_type"]),
models.Index(fields=["is_system_reranker_model", "model_type"]),
models.Index(fields=["is_system_vision_model", "model_type"]),
]
def __str__(self):
@@ -223,6 +231,15 @@ class LLMModel(models.Model):
model_type="reranker",
).first()
@classmethod
def get_system_vision_model(cls):
"""Get the system-wide vision model for image analysis."""
return cls.objects.filter(
is_system_vision_model=True,
is_active=True,
model_type__in=["vision", "chat"],
).first()
class LLMUsage(models.Model):
"""
@@ -259,6 +276,7 @@ class LLMUsage(models.Model):
("search", "Vector Search"),
("reranking", "Re-ranking"),
("multimodal_embed", "Multimodal Embedding"),
("vision_analysis", "Vision Analysis"),
("other", "Other"),
],
default="other",