Add vision analysis capabilities to the embedding pipeline
- Introduced a new vision analysis service to classify, describe, and extract text from images. - Enhanced the Image model with fields for OCR text, vision model name, and analysis status. - Added a new "nonfiction" library type with specific chunking and embedding configurations. - Updated content types to include vision prompts for various library types. - Integrated vision analysis into the embedding pipeline, allowing for image analysis during document processing. - Implemented metrics to track vision analysis performance and usage. - Updated UI components to display vision analysis results and statuses in item details and the embedding dashboard. - Added migration for new vision model fields and usage tracking.
This commit is contained in:
@@ -0,0 +1,52 @@
|
||||
"""
|
||||
Add is_system_vision_model to LLMModel and vision_analysis purpose to LLMUsage.
|
||||
"""
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("llm_manager", "0002_add_bedrock_api_type"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="llmmodel",
|
||||
name="is_system_vision_model",
|
||||
field=models.BooleanField(
|
||||
default=False,
|
||||
help_text=(
|
||||
"Mark this as the system-wide vision model for image analysis. "
|
||||
"Only ONE vision model should have this set to True."
|
||||
),
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="llmmodel",
|
||||
index=models.Index(
|
||||
fields=["is_system_vision_model", "model_type"],
|
||||
name="llm_manager__is_syst_b2f4e7_idx",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name="llmusage",
|
||||
name="purpose",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("responder", "RAG Responder"),
|
||||
("reviewer", "RAG Reviewer"),
|
||||
("embeddings", "Document Embeddings"),
|
||||
("search", "Vector Search"),
|
||||
("reranking", "Re-ranking"),
|
||||
("multimodal_embed", "Multimodal Embedding"),
|
||||
("vision_analysis", "Vision Analysis"),
|
||||
("other", "Other"),
|
||||
],
|
||||
db_index=True,
|
||||
default="other",
|
||||
max_length=50,
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -179,6 +179,13 @@ class LLMModel(models.Model):
|
||||
"Only ONE reranker model should have this set to True."
|
||||
),
|
||||
)
|
||||
is_system_vision_model = models.BooleanField(
|
||||
default=False,
|
||||
help_text=(
|
||||
"Mark this as the system-wide vision model for image analysis. "
|
||||
"Only ONE vision model should have this set to True."
|
||||
),
|
||||
)
|
||||
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
@@ -191,6 +198,7 @@ class LLMModel(models.Model):
|
||||
models.Index(fields=["is_system_embedding_model", "model_type"]),
|
||||
models.Index(fields=["is_system_chat_model", "model_type"]),
|
||||
models.Index(fields=["is_system_reranker_model", "model_type"]),
|
||||
models.Index(fields=["is_system_vision_model", "model_type"]),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
@@ -223,6 +231,15 @@ class LLMModel(models.Model):
|
||||
model_type="reranker",
|
||||
).first()
|
||||
|
||||
@classmethod
|
||||
def get_system_vision_model(cls):
|
||||
"""Get the system-wide vision model for image analysis."""
|
||||
return cls.objects.filter(
|
||||
is_system_vision_model=True,
|
||||
is_active=True,
|
||||
model_type__in=["vision", "chat"],
|
||||
).first()
|
||||
|
||||
|
||||
class LLMUsage(models.Model):
|
||||
"""
|
||||
@@ -259,6 +276,7 @@ class LLMUsage(models.Model):
|
||||
("search", "Vector Search"),
|
||||
("reranking", "Re-ranking"),
|
||||
("multimodal_embed", "Multimodal Embedding"),
|
||||
("vision_analysis", "Vision Analysis"),
|
||||
("other", "Other"),
|
||||
],
|
||||
default="other",
|
||||
|
||||
Reference in New Issue
Block a user