Add vision analysis capabilities to the embedding pipeline

- Introduced a new vision analysis service to classify, describe, and extract text from images. - Enhanced the Image model with fields for OCR text, vision model name, and analysis status. - Added a new "nonfiction" library type with specific chunking and embedding configurations. - Updated content types to include vision prompts for various library types. - Integrated vision analysis into the embedding pipeline, allowing for image analysis during document processing. - Implemented metrics to track vision analysis performance and usage. - Updated UI components to display vision analysis results and statuses in item details and the embedding dashboard. - Added migration for new vision model fields and usage tracking.
2026-03-22 15:14:34 +00:00
parent 6585beed20
commit 90db904959
11 changed files with 914 additions and 19 deletions
--- a/mnemosyne/library/models.py
+++ b/mnemosyne/library/models.py
@@ -60,6 +60,7 @@ class Library(StructuredNode):
        required=True,
        choices={
            "fiction": "Fiction",
+            "nonfiction": "Non-Fiction",
            "technical": "Technical",
            "music": "Music",
            "film": "Film",
@@ -219,6 +220,12 @@ class Image(StructuredNode):
        choices={
            "cover": "Cover",
            "diagram": "Diagram",
+            "chart": "Chart",
+            "table": "Table",
+            "screenshot": "Screenshot",
+            "illustration": "Illustration",
+            "map": "Map",
+            "portrait": "Portrait",
            "artwork": "Artwork",
            "still": "Still",
            "photo": "Photo",
@@ -227,10 +234,24 @@ class Image(StructuredNode):
    description = StringProperty(default="")
    metadata = JSONProperty(default={})

+    # Vision analysis fields (Phase 2B)
+    ocr_text = StringProperty(default="")  # Visible text extracted by vision model
+    vision_model_name = StringProperty(default="")  # Which vision model analyzed this
+    analysis_status = StringProperty(
+        default="pending",
+        choices={
+            "pending": "Pending",
+            "completed": "Completed",
+            "failed": "Failed",
+            "skipped": "Skipped",
+        },
+    )
+
    created_at = DateTimeProperty(default_now=True)

    # Relationships
    embeddings = RelationshipTo("ImageEmbedding", "HAS_EMBEDDING")
+    concepts = RelationshipTo("Concept", "DEPICTS")

    def __str__(self):
        return f"Image {self.image_type} ({self.uid})"