Add Themis application with custom widgets, views, and utilities
- Implemented custom form widgets for date, time, and datetime fields with DaisyUI styling. - Created utility functions for formatting dates, times, and numbers according to user preferences. - Developed views for profile settings, API key management, and notifications, including health check endpoints. - Added URL configurations for Themis tests and main application routes. - Established test cases for custom widgets to ensure proper functionality and integration. - Defined project metadata and dependencies in pyproject.toml for package management.
This commit is contained in:
63
mnemosyne/.env copy.example
Normal file
63
mnemosyne/.env copy.example
Normal file
@@ -0,0 +1,63 @@
|
||||
# =============================================================================
|
||||
# Mnemosyne Django Environment Variables
|
||||
# =============================================================================
|
||||
# Copy this file to .env and configure for your environment
|
||||
# This file contains all variables read by Django settings.py
|
||||
|
||||
# --- Security ---
|
||||
SECRET_KEY=change-me-to-a-real-secret-key
|
||||
DEBUG=True
|
||||
ALLOWED_HOSTS=localhost,127.0.0.1,mnemosyne.ouranos.helu.ca
|
||||
CSRF_TRUSTED_ORIGINS=http://localhost:8000,https://mnemosyne.ouranos.helu.ca
|
||||
|
||||
# --- PostgreSQL Database ---
|
||||
DATABASE_URL=postgres://mnemosyne:password@portia.incus:5432/mnemosyne
|
||||
|
||||
# --- Neo4j Graph Database ---
|
||||
NEOMODEL_NEO4J_BOLT_URL=bolt://neo4j:password@ariel.incus:25554
|
||||
|
||||
# --- Memcached ---
|
||||
KVDB_LOCATION=127.0.0.1:11211
|
||||
KVDB_PREFIX=mnemosyne
|
||||
|
||||
# --- Celery / RabbitMQ ---
|
||||
CELERY_BROKER_URL=amqp://mnemosyne:password@oberon.incus:5672/mnemosyne
|
||||
CELERY_RESULT_BACKEND=rpc://
|
||||
CELERY_TASK_ALWAYS_EAGER=False
|
||||
|
||||
# --- S3 Storage (Incus bucket, MinIO-backed) ---
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_STORAGE_BUCKET_NAME=mnemosyne-content
|
||||
AWS_S3_ENDPOINT_URL=
|
||||
AWS_S3_USE_SSL=False
|
||||
AWS_S3_VERIFY=False
|
||||
AWS_S3_REGION_NAME=us-east-1
|
||||
# Set to True to use local FileSystemStorage instead of S3 (dev/test)
|
||||
USE_LOCAL_STORAGE=True
|
||||
|
||||
# --- Email (smtp4dev on Oberon) ---
|
||||
EMAIL_HOST=oberon.incus
|
||||
EMAIL_PORT=22025
|
||||
EMAIL_USE_TLS=False
|
||||
|
||||
# --- LLM API Encryption ---
|
||||
# Encryption key for LLM API keys stored in the database
|
||||
# Generate with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
LLM_API_SECRETS_ENCRYPTION_KEY=
|
||||
|
||||
# --- Embedding Pipeline (Phase 2) ---
|
||||
# Batch size for embedding API calls (smaller for local GPU, larger for cloud)
|
||||
EMBEDDING_BATCH_SIZE=8
|
||||
# Timeout in seconds for embedding API requests
|
||||
EMBEDDING_TIMEOUT=120
|
||||
|
||||
# --- Logging ---
|
||||
# Valid levels: DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||
LOGGING_LEVEL=INFO
|
||||
CELERY_LOGGING_LEVEL=INFO
|
||||
DJANGO_LOGGING_LEVEL=WARNING
|
||||
|
||||
# --- Localization ---
|
||||
TIME_ZONE=UTC
|
||||
LANGUAGE_CODE=en-us
|
||||
1
mnemosyne/library/__init__.py
Normal file
1
mnemosyne/library/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
default_app_config = "library.apps.LibraryConfig"
|
||||
5
mnemosyne/library/admin.py
Normal file
5
mnemosyne/library/admin.py
Normal file
@@ -0,0 +1,5 @@
|
||||
# Library app does not use standard Django admin (neomodel StructuredNodes
|
||||
# are not Django ORM models). Custom admin views are provided as regular
|
||||
# app views in library/views.py, rendered within Themis's template structure.
|
||||
#
|
||||
# The embedding pipeline dashboard is at /library/embedding/
|
||||
0
mnemosyne/library/api/__init__.py
Normal file
0
mnemosyne/library/api/__init__.py
Normal file
83
mnemosyne/library/api/serializers.py
Normal file
83
mnemosyne/library/api/serializers.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
DRF serializers for the library app.
|
||||
|
||||
Serialize Neo4j neomodel nodes into JSON for the REST API.
|
||||
"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
|
||||
class LibrarySerializer(serializers.Serializer):
|
||||
uid = serializers.CharField(read_only=True)
|
||||
name = serializers.CharField(max_length=200)
|
||||
library_type = serializers.ChoiceField(
|
||||
choices=["fiction", "technical", "music", "film", "art", "journal"]
|
||||
)
|
||||
description = serializers.CharField(required=False, allow_blank=True, default="")
|
||||
chunking_config = serializers.JSONField(required=False, default=dict)
|
||||
embedding_instruction = serializers.CharField(
|
||||
required=False, allow_blank=True, default=""
|
||||
)
|
||||
reranker_instruction = serializers.CharField(
|
||||
required=False, allow_blank=True, default=""
|
||||
)
|
||||
llm_context_prompt = serializers.CharField(
|
||||
required=False, allow_blank=True, default=""
|
||||
)
|
||||
created_at = serializers.DateTimeField(read_only=True)
|
||||
|
||||
|
||||
class CollectionSerializer(serializers.Serializer):
|
||||
uid = serializers.CharField(read_only=True)
|
||||
name = serializers.CharField(max_length=200)
|
||||
description = serializers.CharField(required=False, allow_blank=True, default="")
|
||||
metadata = serializers.JSONField(required=False, default=dict)
|
||||
created_at = serializers.DateTimeField(read_only=True)
|
||||
library_uid = serializers.CharField(
|
||||
required=False, write_only=True, help_text="UID of the parent library"
|
||||
)
|
||||
|
||||
|
||||
class ItemSerializer(serializers.Serializer):
|
||||
uid = serializers.CharField(read_only=True)
|
||||
title = serializers.CharField(max_length=500)
|
||||
item_type = serializers.CharField(required=False, allow_blank=True, default="")
|
||||
s3_key = serializers.CharField(read_only=True)
|
||||
content_hash = serializers.CharField(read_only=True)
|
||||
file_type = serializers.CharField(required=False, allow_blank=True, default="")
|
||||
file_size = serializers.IntegerField(read_only=True)
|
||||
metadata = serializers.JSONField(required=False, default=dict)
|
||||
created_at = serializers.DateTimeField(read_only=True)
|
||||
updated_at = serializers.DateTimeField(read_only=True)
|
||||
collection_uid = serializers.CharField(
|
||||
required=False, write_only=True, help_text="UID of the parent collection"
|
||||
)
|
||||
# Phase 2: Embedding pipeline fields
|
||||
embedding_status = serializers.CharField(read_only=True)
|
||||
embedding_model_name = serializers.CharField(read_only=True)
|
||||
chunk_count = serializers.IntegerField(read_only=True)
|
||||
image_count = serializers.IntegerField(read_only=True)
|
||||
|
||||
|
||||
class ChunkSerializer(serializers.Serializer):
|
||||
uid = serializers.CharField(read_only=True)
|
||||
chunk_index = serializers.IntegerField()
|
||||
chunk_s3_key = serializers.CharField()
|
||||
chunk_size = serializers.IntegerField(required=False, default=0)
|
||||
text_preview = serializers.CharField(required=False, allow_blank=True, default="")
|
||||
created_at = serializers.DateTimeField(read_only=True)
|
||||
|
||||
|
||||
class ConceptSerializer(serializers.Serializer):
|
||||
uid = serializers.CharField(read_only=True)
|
||||
name = serializers.CharField(max_length=200)
|
||||
concept_type = serializers.CharField(required=False, allow_blank=True, default="")
|
||||
|
||||
|
||||
class ImageSerializer(serializers.Serializer):
|
||||
uid = serializers.CharField(read_only=True)
|
||||
s3_key = serializers.CharField()
|
||||
image_type = serializers.CharField(required=False, allow_blank=True, default="")
|
||||
description = serializers.CharField(required=False, allow_blank=True, default="")
|
||||
metadata = serializers.JSONField(required=False, default=dict)
|
||||
created_at = serializers.DateTimeField(read_only=True)
|
||||
24
mnemosyne/library/api/urls.py
Normal file
24
mnemosyne/library/api/urls.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
URL patterns for the library DRF API.
|
||||
"""
|
||||
|
||||
from django.urls import path
|
||||
|
||||
from . import views
|
||||
|
||||
app_name = "library-api"
|
||||
|
||||
urlpatterns = [
|
||||
# Libraries
|
||||
path("libraries/", views.library_list_create, name="library-list"),
|
||||
path("libraries/<str:uid>/", views.library_detail, name="library-detail"),
|
||||
# Collections
|
||||
path("collections/", views.collection_list_create, name="collection-list"),
|
||||
path("collections/<str:uid>/", views.collection_detail, name="collection-detail"),
|
||||
# Items
|
||||
path("items/", views.item_list_create, name="item-list"),
|
||||
path("items/upload/", views.item_upload, name="item-upload"),
|
||||
path("items/<str:uid>/", views.item_detail, name="item-detail"),
|
||||
path("items/<str:uid>/reembed/", views.item_reembed, name="item-reembed"),
|
||||
path("items/<str:uid>/status/", views.item_status, name="item-status"),
|
||||
]
|
||||
426
mnemosyne/library/api/views.py
Normal file
426
mnemosyne/library/api/views.py
Normal file
@@ -0,0 +1,426 @@
|
||||
"""
|
||||
DRF API views for the library app.
|
||||
|
||||
All views are function-based per Red Panda Standards.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
|
||||
from django.core.files.base import ContentFile
|
||||
from django.core.files.storage import default_storage
|
||||
from rest_framework import status
|
||||
from rest_framework.decorators import api_view, parser_classes, permission_classes
|
||||
from rest_framework.parsers import FormParser, JSONParser, MultiPartParser
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
from rest_framework.response import Response
|
||||
|
||||
from library.content_types import get_library_type_config
|
||||
|
||||
from .serializers import (
|
||||
CollectionSerializer,
|
||||
ItemSerializer,
|
||||
LibrarySerializer,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Library API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@api_view(["GET", "POST"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def library_list_create(request):
|
||||
"""List all libraries or create a new one."""
|
||||
from library.models import Library
|
||||
|
||||
if request.method == "GET":
|
||||
libraries = Library.nodes.order_by("name")
|
||||
serializer = LibrarySerializer(libraries, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
# POST — create
|
||||
serializer = LibrarySerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
data = serializer.validated_data
|
||||
|
||||
# Populate defaults from content-type config if not provided
|
||||
library_type = data["library_type"]
|
||||
defaults = get_library_type_config(library_type)
|
||||
|
||||
lib = Library(
|
||||
name=data["name"],
|
||||
library_type=library_type,
|
||||
description=data.get("description", ""),
|
||||
chunking_config=data.get("chunking_config") or defaults["chunking_config"],
|
||||
embedding_instruction=(
|
||||
data.get("embedding_instruction") or defaults["embedding_instruction"]
|
||||
),
|
||||
reranker_instruction=(
|
||||
data.get("reranker_instruction") or defaults["reranker_instruction"]
|
||||
),
|
||||
llm_context_prompt=(
|
||||
data.get("llm_context_prompt") or defaults["llm_context_prompt"]
|
||||
),
|
||||
)
|
||||
lib.save()
|
||||
return Response(LibrarySerializer(lib).data, status=status.HTTP_201_CREATED)
|
||||
|
||||
|
||||
@api_view(["GET", "PUT", "DELETE"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def library_detail(request, uid):
|
||||
"""Retrieve, update, or delete a library."""
|
||||
from library.models import Library
|
||||
|
||||
try:
|
||||
lib = Library.nodes.get(uid=uid)
|
||||
except Library.DoesNotExist:
|
||||
return Response(
|
||||
{"detail": "Library not found."}, status=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
if request.method == "GET":
|
||||
return Response(LibrarySerializer(lib).data)
|
||||
|
||||
if request.method == "PUT":
|
||||
serializer = LibrarySerializer(data=request.data, partial=True)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
data = serializer.validated_data
|
||||
for field in [
|
||||
"name",
|
||||
"library_type",
|
||||
"description",
|
||||
"chunking_config",
|
||||
"embedding_instruction",
|
||||
"reranker_instruction",
|
||||
"llm_context_prompt",
|
||||
]:
|
||||
if field in data:
|
||||
setattr(lib, field, data[field])
|
||||
lib.save()
|
||||
return Response(LibrarySerializer(lib).data)
|
||||
|
||||
# DELETE
|
||||
lib.delete()
|
||||
return Response(status=status.HTTP_204_NO_CONTENT)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Collection API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@api_view(["GET", "POST"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def collection_list_create(request):
|
||||
"""List all collections or create a new one."""
|
||||
from library.models import Collection, Library
|
||||
|
||||
if request.method == "GET":
|
||||
# Optionally filter by library_uid query param
|
||||
library_uid = request.query_params.get("library_uid")
|
||||
if library_uid:
|
||||
try:
|
||||
lib = Library.nodes.get(uid=library_uid)
|
||||
collections = lib.collections.all()
|
||||
except Library.DoesNotExist:
|
||||
return Response(
|
||||
{"detail": "Library not found."}, status=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
else:
|
||||
collections = Collection.nodes.all()
|
||||
serializer = CollectionSerializer(collections, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
# POST
|
||||
serializer = CollectionSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
data = serializer.validated_data
|
||||
|
||||
col = Collection(
|
||||
name=data["name"],
|
||||
description=data.get("description", ""),
|
||||
metadata=data.get("metadata", {}),
|
||||
)
|
||||
col.save()
|
||||
|
||||
# Connect to library if library_uid provided
|
||||
library_uid = data.get("library_uid")
|
||||
if library_uid:
|
||||
try:
|
||||
lib = Library.nodes.get(uid=library_uid)
|
||||
lib.collections.connect(col)
|
||||
col.library.connect(lib)
|
||||
except Library.DoesNotExist:
|
||||
pass
|
||||
|
||||
return Response(CollectionSerializer(col).data, status=status.HTTP_201_CREATED)
|
||||
|
||||
|
||||
@api_view(["GET", "PUT", "DELETE"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def collection_detail(request, uid):
|
||||
"""Retrieve, update, or delete a collection."""
|
||||
from library.models import Collection
|
||||
|
||||
try:
|
||||
col = Collection.nodes.get(uid=uid)
|
||||
except Collection.DoesNotExist:
|
||||
return Response(
|
||||
{"detail": "Collection not found."}, status=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
if request.method == "GET":
|
||||
return Response(CollectionSerializer(col).data)
|
||||
|
||||
if request.method == "PUT":
|
||||
serializer = CollectionSerializer(data=request.data, partial=True)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
data = serializer.validated_data
|
||||
for field in ["name", "description", "metadata"]:
|
||||
if field in data:
|
||||
setattr(col, field, data[field])
|
||||
col.save()
|
||||
return Response(CollectionSerializer(col).data)
|
||||
|
||||
col.delete()
|
||||
return Response(status=status.HTTP_204_NO_CONTENT)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Item API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@api_view(["GET", "POST"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def item_list_create(request):
|
||||
"""List all items or create a new one."""
|
||||
from library.models import Collection, Item
|
||||
|
||||
if request.method == "GET":
|
||||
collection_uid = request.query_params.get("collection_uid")
|
||||
if collection_uid:
|
||||
try:
|
||||
col = Collection.nodes.get(uid=collection_uid)
|
||||
items = col.items.all()
|
||||
except Collection.DoesNotExist:
|
||||
return Response(
|
||||
{"detail": "Collection not found."},
|
||||
status=status.HTTP_404_NOT_FOUND,
|
||||
)
|
||||
else:
|
||||
items = Item.nodes.all()
|
||||
serializer = ItemSerializer(items, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
# POST
|
||||
serializer = ItemSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
data = serializer.validated_data
|
||||
|
||||
item = Item(
|
||||
title=data["title"],
|
||||
item_type=data.get("item_type", ""),
|
||||
file_type=data.get("file_type", ""),
|
||||
metadata=data.get("metadata", {}),
|
||||
)
|
||||
item.save()
|
||||
|
||||
collection_uid = data.get("collection_uid")
|
||||
if collection_uid:
|
||||
try:
|
||||
col = Collection.nodes.get(uid=collection_uid)
|
||||
col.items.connect(item)
|
||||
except Collection.DoesNotExist:
|
||||
pass
|
||||
|
||||
return Response(ItemSerializer(item).data, status=status.HTTP_201_CREATED)
|
||||
|
||||
|
||||
@api_view(["GET", "PUT", "DELETE"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def item_detail(request, uid):
|
||||
"""Retrieve, update, or delete an item."""
|
||||
from library.models import Item
|
||||
|
||||
try:
|
||||
item = Item.nodes.get(uid=uid)
|
||||
except Item.DoesNotExist:
|
||||
return Response(
|
||||
{"detail": "Item not found."}, status=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
if request.method == "GET":
|
||||
return Response(ItemSerializer(item).data)
|
||||
|
||||
if request.method == "PUT":
|
||||
serializer = ItemSerializer(data=request.data, partial=True)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
data = serializer.validated_data
|
||||
for field in ["title", "item_type", "file_type", "metadata"]:
|
||||
if field in data:
|
||||
setattr(item, field, data[field])
|
||||
item.save()
|
||||
return Response(ItemSerializer(item).data)
|
||||
|
||||
item.delete()
|
||||
return Response(status=status.HTTP_204_NO_CONTENT)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Item Upload (Phase 2)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@api_view(["POST"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
@parser_classes([MultiPartParser, FormParser])
|
||||
def item_upload(request):
|
||||
"""
|
||||
Upload a file to create a new Item and trigger embedding.
|
||||
|
||||
Expects multipart form data with:
|
||||
- file: The document file
|
||||
- title: Item title
|
||||
- collection_uid: (optional) UID of parent collection
|
||||
- auto_embed: (optional) Whether to auto-trigger embedding (default: true)
|
||||
"""
|
||||
from library.models import Collection, Item
|
||||
|
||||
uploaded_file = request.FILES.get("file")
|
||||
if not uploaded_file:
|
||||
return Response(
|
||||
{"detail": "No file provided."}, status=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
title = request.data.get("title", uploaded_file.name)
|
||||
collection_uid = request.data.get("collection_uid", "")
|
||||
auto_embed = request.data.get("auto_embed", "true").lower() in ("true", "1", "yes")
|
||||
|
||||
# Determine file type from extension
|
||||
_, ext = os.path.splitext(uploaded_file.name)
|
||||
file_type = ext.lstrip(".").lower()
|
||||
|
||||
# Read file data
|
||||
file_data = uploaded_file.read()
|
||||
content_hash = hashlib.sha256(file_data).hexdigest()
|
||||
|
||||
# Create Item node
|
||||
item = Item(
|
||||
title=title,
|
||||
file_type=file_type,
|
||||
file_size=len(file_data),
|
||||
content_hash=content_hash,
|
||||
embedding_status="pending",
|
||||
)
|
||||
item.save()
|
||||
|
||||
# Store file in S3
|
||||
s3_key = f"items/{item.uid}/original.{file_type}"
|
||||
try:
|
||||
default_storage.save(s3_key, ContentFile(file_data))
|
||||
item.s3_key = s3_key
|
||||
item.save()
|
||||
except Exception as exc:
|
||||
logger.error("Failed to store file to S3: %s", exc)
|
||||
item.delete()
|
||||
return Response(
|
||||
{"detail": f"File storage failed: {exc}"},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
# Connect to collection if specified
|
||||
if collection_uid:
|
||||
try:
|
||||
col = Collection.nodes.get(uid=collection_uid)
|
||||
col.items.connect(item)
|
||||
except Exception:
|
||||
logger.warning("Collection not found: %s", collection_uid)
|
||||
|
||||
# Auto-trigger embedding
|
||||
task_id = None
|
||||
if auto_embed:
|
||||
try:
|
||||
from library.tasks import embed_item
|
||||
|
||||
task = embed_item.delay(item.uid, request.user.id)
|
||||
task_id = task.id
|
||||
logger.info(
|
||||
"Auto-triggered embedding item_uid=%s task_id=%s",
|
||||
item.uid,
|
||||
task_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to queue embedding task: %s", exc)
|
||||
|
||||
return Response(
|
||||
{
|
||||
**ItemSerializer(item).data,
|
||||
"task_id": task_id,
|
||||
},
|
||||
status=status.HTTP_201_CREATED,
|
||||
)
|
||||
|
||||
|
||||
@api_view(["POST"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def item_reembed(request, uid):
|
||||
"""Trigger re-embedding for an existing Item."""
|
||||
from library.models import Item
|
||||
|
||||
try:
|
||||
item = Item.nodes.get(uid=uid)
|
||||
except Item.DoesNotExist:
|
||||
return Response(
|
||||
{"detail": "Item not found."}, status=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
try:
|
||||
from library.tasks import reembed_item
|
||||
|
||||
task = reembed_item.delay(uid, request.user.id)
|
||||
return Response(
|
||||
{
|
||||
"detail": "Re-embedding queued.",
|
||||
"item_uid": uid,
|
||||
"task_id": task.id,
|
||||
}
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to queue reembed task: %s", exc)
|
||||
return Response(
|
||||
{"detail": f"Failed to queue task: {exc}"},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
|
||||
@api_view(["GET"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def item_status(request, uid):
|
||||
"""Get embedding status for an Item."""
|
||||
from library.models import Item
|
||||
|
||||
try:
|
||||
item = Item.nodes.get(uid=uid)
|
||||
except Item.DoesNotExist:
|
||||
return Response(
|
||||
{"detail": "Item not found."}, status=status.HTTP_404_NOT_FOUND
|
||||
)
|
||||
|
||||
return Response(
|
||||
{
|
||||
"uid": item.uid,
|
||||
"title": item.title,
|
||||
"embedding_status": item.embedding_status,
|
||||
"embedding_model_name": item.embedding_model_name,
|
||||
"chunk_count": item.chunk_count,
|
||||
"image_count": item.image_count,
|
||||
"error_message": item.error_message,
|
||||
}
|
||||
)
|
||||
7
mnemosyne/library/apps.py
Normal file
7
mnemosyne/library/apps.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class LibraryConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "library"
|
||||
verbose_name = "Library"
|
||||
163
mnemosyne/library/content_types.py
Normal file
163
mnemosyne/library/content_types.py
Normal file
@@ -0,0 +1,163 @@
|
||||
"""
|
||||
Content-type system configuration for Mnemosyne library types.
|
||||
|
||||
Each library type has a default configuration that governs chunking,
|
||||
embedding, re-ranking, and LLM context injection.
|
||||
"""
|
||||
|
||||
# Default configurations per library type.
|
||||
# These are loaded into Library nodes via the load_library_types management command.
|
||||
LIBRARY_TYPE_DEFAULTS = {
|
||||
"fiction": {
|
||||
"chunking_config": {
|
||||
"strategy": "chapter_aware",
|
||||
"chunk_size": 1024,
|
||||
"chunk_overlap": 128,
|
||||
"respect_boundaries": ["chapter", "scene", "paragraph"],
|
||||
},
|
||||
"embedding_instruction": (
|
||||
"Represent this passage from a work of fiction for retrieval. "
|
||||
"Focus on narrative elements: characters, plot events, themes, "
|
||||
"setting, and emotional tone."
|
||||
),
|
||||
"reranker_instruction": (
|
||||
"Re-rank passages from fiction based on narrative relevance to the query. "
|
||||
"Prioritize character actions, dialogue, plot developments, and thematic elements."
|
||||
),
|
||||
"llm_context_prompt": (
|
||||
"The following excerpts are from fiction (novels, short stories, etc.). "
|
||||
"Treat this as creative/narrative content. Respect the literary context — "
|
||||
"characters, settings, and events are fictional. Cite specific passages "
|
||||
"when answering."
|
||||
),
|
||||
},
|
||||
"technical": {
|
||||
"chunking_config": {
|
||||
"strategy": "section_aware",
|
||||
"chunk_size": 512,
|
||||
"chunk_overlap": 64,
|
||||
"respect_boundaries": ["section", "subsection", "code_block", "list"],
|
||||
},
|
||||
"embedding_instruction": (
|
||||
"Represent this passage from technical documentation for retrieval. "
|
||||
"Focus on procedures, configurations, API references, code examples, "
|
||||
"and technical concepts."
|
||||
),
|
||||
"reranker_instruction": (
|
||||
"Re-rank passages from technical documentation based on procedural relevance. "
|
||||
"Prioritize step-by-step instructions, code examples, and specific configurations."
|
||||
),
|
||||
"llm_context_prompt": (
|
||||
"The following excerpts are from technical documentation (manuals, guides, "
|
||||
"reference material). Provide precise, actionable answers. Include code "
|
||||
"examples and exact configurations when available. Cite source sections."
|
||||
),
|
||||
},
|
||||
"music": {
|
||||
"chunking_config": {
|
||||
"strategy": "song_level",
|
||||
"chunk_size": 512,
|
||||
"chunk_overlap": 32,
|
||||
"respect_boundaries": ["song", "verse", "chorus"],
|
||||
},
|
||||
"embedding_instruction": (
|
||||
"Represent this music content (lyrics, liner notes, metadata) for retrieval. "
|
||||
"Focus on artist, album, genre, lyrical themes, and musical elements."
|
||||
),
|
||||
"reranker_instruction": (
|
||||
"Re-rank music content based on relevance to the query. "
|
||||
"Consider artist, genre, lyrical themes, and musical characteristics."
|
||||
),
|
||||
"llm_context_prompt": (
|
||||
"The following excerpts are song lyrics and music metadata. "
|
||||
"Consider the artistic and cultural context. Reference specific "
|
||||
"songs, albums, and artists when answering."
|
||||
),
|
||||
},
|
||||
"film": {
|
||||
"chunking_config": {
|
||||
"strategy": "scene_level",
|
||||
"chunk_size": 768,
|
||||
"chunk_overlap": 64,
|
||||
"respect_boundaries": ["scene", "act", "sequence"],
|
||||
},
|
||||
"embedding_instruction": (
|
||||
"Represent this film content (scripts, synopses, reviews) for retrieval. "
|
||||
"Focus on scenes, characters, visual elements, dialogue, and narrative structure."
|
||||
),
|
||||
"reranker_instruction": (
|
||||
"Re-rank film content based on cinematic relevance. "
|
||||
"Prioritize scene descriptions, character interactions, and visual elements."
|
||||
),
|
||||
"llm_context_prompt": (
|
||||
"The following excerpts are from film-related content (scripts, synopses, "
|
||||
"reviews). Consider the cinematic context — visual storytelling, "
|
||||
"direction, and performance. Cite specific scenes and films."
|
||||
),
|
||||
},
|
||||
"art": {
|
||||
"chunking_config": {
|
||||
"strategy": "description_level",
|
||||
"chunk_size": 512,
|
||||
"chunk_overlap": 32,
|
||||
"respect_boundaries": ["artwork", "description", "analysis"],
|
||||
},
|
||||
"embedding_instruction": (
|
||||
"Represent this art content (descriptions, catalogs, analysis) for retrieval. "
|
||||
"Focus on visual elements, style, medium, artist, period, and artistic movements."
|
||||
),
|
||||
"reranker_instruction": (
|
||||
"Re-rank art content based on visual and stylistic relevance. "
|
||||
"Prioritize descriptions of artwork, technique, composition, and artistic context."
|
||||
),
|
||||
"llm_context_prompt": (
|
||||
"The following excerpts describe artworks and artistic content. "
|
||||
"Consider visual elements, artistic technique, historical context, "
|
||||
"and the artist's intent. Reference specific works and movements."
|
||||
),
|
||||
},
|
||||
"journal": {
|
||||
"chunking_config": {
|
||||
"strategy": "entry_level",
|
||||
"chunk_size": 512,
|
||||
"chunk_overlap": 32,
|
||||
"respect_boundaries": ["entry", "date", "paragraph"],
|
||||
},
|
||||
"embedding_instruction": (
|
||||
"Represent this personal journal entry for retrieval. "
|
||||
"Focus on temporal context, personal reflections, mentioned people "
|
||||
"and places, and emotional content."
|
||||
),
|
||||
"reranker_instruction": (
|
||||
"Re-rank journal entries based on temporal and thematic relevance. "
|
||||
"Prioritize entries matching the time period, people, places, or topics in the query."
|
||||
),
|
||||
"llm_context_prompt": (
|
||||
"The following excerpts are from personal journal entries. "
|
||||
"This is private, reflective content. Respect the personal nature — "
|
||||
"answer with sensitivity. Note dates and temporal context when relevant."
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_library_type_config(library_type):
|
||||
"""
|
||||
Get the default configuration for a library type.
|
||||
|
||||
Args:
|
||||
library_type: One of 'fiction', 'technical', 'music', 'film', 'art', 'journal'
|
||||
|
||||
Returns:
|
||||
dict with keys: chunking_config, embedding_instruction,
|
||||
reranker_instruction, llm_context_prompt
|
||||
|
||||
Raises:
|
||||
ValueError: If library_type is not recognized
|
||||
"""
|
||||
if library_type not in LIBRARY_TYPE_DEFAULTS:
|
||||
raise ValueError(
|
||||
f"Unknown library type '{library_type}'. "
|
||||
f"Valid types: {', '.join(LIBRARY_TYPE_DEFAULTS.keys())}"
|
||||
)
|
||||
return LIBRARY_TYPE_DEFAULTS[library_type]
|
||||
100
mnemosyne/library/forms.py
Normal file
100
mnemosyne/library/forms.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Django forms for Library admin views.
|
||||
|
||||
These forms are used by the custom admin views for Library, Collection,
|
||||
and Item CRUD. They are plain Django forms (not ModelForms) because
|
||||
neomodel StructuredNodes are not Django ORM models.
|
||||
"""
|
||||
|
||||
from django import forms
|
||||
|
||||
from .content_types import LIBRARY_TYPE_DEFAULTS
|
||||
|
||||
|
||||
LIBRARY_TYPE_CHOICES = [
|
||||
(key, key.capitalize()) for key in LIBRARY_TYPE_DEFAULTS.keys()
|
||||
]
|
||||
|
||||
|
||||
class LibraryForm(forms.Form):
|
||||
"""Form for creating/editing a Library node."""
|
||||
|
||||
name = forms.CharField(
|
||||
max_length=200,
|
||||
widget=forms.TextInput(attrs={"class": "input input-bordered w-full"}),
|
||||
)
|
||||
library_type = forms.ChoiceField(
|
||||
choices=LIBRARY_TYPE_CHOICES,
|
||||
widget=forms.Select(attrs={"class": "select select-bordered w-full"}),
|
||||
)
|
||||
description = forms.CharField(
|
||||
required=False,
|
||||
widget=forms.Textarea(
|
||||
attrs={"class": "textarea textarea-bordered w-full", "rows": 3}
|
||||
),
|
||||
)
|
||||
embedding_instruction = forms.CharField(
|
||||
required=False,
|
||||
widget=forms.Textarea(
|
||||
attrs={"class": "textarea textarea-bordered w-full", "rows": 3}
|
||||
),
|
||||
)
|
||||
reranker_instruction = forms.CharField(
|
||||
required=False,
|
||||
widget=forms.Textarea(
|
||||
attrs={"class": "textarea textarea-bordered w-full", "rows": 3}
|
||||
),
|
||||
)
|
||||
llm_context_prompt = forms.CharField(
|
||||
required=False,
|
||||
widget=forms.Textarea(
|
||||
attrs={"class": "textarea textarea-bordered w-full", "rows": 3}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class CollectionForm(forms.Form):
|
||||
"""Form for creating/editing a Collection node."""
|
||||
|
||||
name = forms.CharField(
|
||||
max_length=200,
|
||||
widget=forms.TextInput(attrs={"class": "input input-bordered w-full"}),
|
||||
)
|
||||
description = forms.CharField(
|
||||
required=False,
|
||||
widget=forms.Textarea(
|
||||
attrs={"class": "textarea textarea-bordered w-full", "rows": 3}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class ItemForm(forms.Form):
|
||||
"""Form for creating/editing an Item node."""
|
||||
|
||||
title = forms.CharField(
|
||||
max_length=500,
|
||||
widget=forms.TextInput(attrs={"class": "input input-bordered w-full"}),
|
||||
)
|
||||
item_type = forms.CharField(
|
||||
required=False,
|
||||
max_length=100,
|
||||
widget=forms.TextInput(attrs={"class": "input input-bordered w-full"}),
|
||||
)
|
||||
file_type = forms.CharField(
|
||||
required=False,
|
||||
max_length=50,
|
||||
widget=forms.TextInput(attrs={"class": "input input-bordered w-full"}),
|
||||
)
|
||||
file = forms.FileField(
|
||||
required=False,
|
||||
widget=forms.ClearableFileInput(
|
||||
attrs={"class": "file-input file-input-bordered w-full"}
|
||||
),
|
||||
help_text="Upload a document (PDF, EPUB, DOCX, PPTX, TXT, etc.)",
|
||||
)
|
||||
auto_embed = forms.BooleanField(
|
||||
required=False,
|
||||
initial=True,
|
||||
widget=forms.CheckboxInput(attrs={"class": "checkbox checkbox-primary"}),
|
||||
help_text="Automatically start embedding after upload",
|
||||
)
|
||||
0
mnemosyne/library/management/__init__.py
Normal file
0
mnemosyne/library/management/__init__.py
Normal file
0
mnemosyne/library/management/commands/__init__.py
Normal file
0
mnemosyne/library/management/commands/__init__.py
Normal file
43
mnemosyne/library/management/commands/embed_collection.py
Normal file
43
mnemosyne/library/management/commands/embed_collection.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""
|
||||
Management command to embed all items in a Collection.
|
||||
|
||||
Usage:
|
||||
python manage.py embed_collection <collection_uid>
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Queue embedding tasks for all items in a Collection."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"collection_uid", type=str, help="UID of the Collection to embed"
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
collection_uid = options["collection_uid"]
|
||||
|
||||
try:
|
||||
from library.models import Collection
|
||||
|
||||
col = Collection.nodes.get(uid=collection_uid)
|
||||
except Exception as exc:
|
||||
raise CommandError(f"Collection not found: {collection_uid} ({exc})")
|
||||
|
||||
items = col.items.all()
|
||||
self.stdout.write(f"Collection: {col.name} ({len(items)} items)")
|
||||
|
||||
from library.tasks import embed_collection
|
||||
|
||||
task = embed_collection.delay(collection_uid)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"Batch task queued: {task.id} ({len(items)} items)"
|
||||
)
|
||||
)
|
||||
68
mnemosyne/library/management/commands/embed_item.py
Normal file
68
mnemosyne/library/management/commands/embed_item.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""
|
||||
Management command to embed a single Item via the CLI.
|
||||
|
||||
Usage:
|
||||
python manage.py embed_item <item_uid>
|
||||
python manage.py embed_item <item_uid> --sync
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Run the embedding pipeline for a single Item."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("item_uid", type=str, help="UID of the Item to embed")
|
||||
parser.add_argument(
|
||||
"--sync",
|
||||
action="store_true",
|
||||
help="Run synchronously instead of queueing a Celery task",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
item_uid = options["item_uid"]
|
||||
sync = options["sync"]
|
||||
|
||||
# Verify item exists
|
||||
try:
|
||||
from library.models import Item
|
||||
|
||||
item = Item.nodes.get(uid=item_uid)
|
||||
except Exception as exc:
|
||||
raise CommandError(f"Item not found: {item_uid} ({exc})")
|
||||
|
||||
self.stdout.write(f"Item: {item.title} (type={item.file_type}, status={item.embedding_status})")
|
||||
|
||||
if sync:
|
||||
self.stdout.write("Running embedding pipeline synchronously...")
|
||||
from library.services.pipeline import EmbeddingPipeline
|
||||
|
||||
pipeline = EmbeddingPipeline()
|
||||
|
||||
def progress_cb(percent, message):
|
||||
self.stdout.write(f" [{percent:3d}%] {message}")
|
||||
|
||||
try:
|
||||
result = pipeline.process_item(item_uid, progress_callback=progress_cb)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"\nCompleted: {result.get('chunks_created', 0)} chunks, "
|
||||
f"{result.get('images_stored', 0)} images, "
|
||||
f"{result.get('concepts_extracted', 0)} concepts"
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
raise CommandError(f"Embedding failed: {exc}")
|
||||
else:
|
||||
self.stdout.write("Queueing embedding task...")
|
||||
from library.tasks import embed_item
|
||||
|
||||
task = embed_item.delay(item_uid)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f"Task queued: {task.id}")
|
||||
)
|
||||
132
mnemosyne/library/management/commands/embedding_status.py
Normal file
132
mnemosyne/library/management/commands/embedding_status.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
Management command to display embedding pipeline status and statistics.
|
||||
|
||||
Usage:
|
||||
python manage.py embedding_status
|
||||
python manage.py embedding_status --library <uid>
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Display embedding pipeline status and statistics."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--library",
|
||||
type=str,
|
||||
default="",
|
||||
help="Filter by library UID",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
library_uid = options["library"]
|
||||
|
||||
try:
|
||||
from neomodel import db
|
||||
except ImportError:
|
||||
self.stderr.write(self.style.ERROR("neomodel not available"))
|
||||
return
|
||||
|
||||
self.stdout.write(self.style.HTTP_INFO("\n=== Mnemosyne Embedding Pipeline Status ===\n"))
|
||||
|
||||
# System embedding model
|
||||
try:
|
||||
from llm_manager.models import LLMModel
|
||||
|
||||
embed_model = LLMModel.get_system_embedding_model()
|
||||
if embed_model:
|
||||
self.stdout.write(
|
||||
f"System Embedding Model: {embed_model.api.name}: {embed_model.name} "
|
||||
f"(dimensions={embed_model.vector_dimensions or '?'})"
|
||||
)
|
||||
else:
|
||||
self.stdout.write(
|
||||
self.style.WARNING("System Embedding Model: NOT CONFIGURED")
|
||||
)
|
||||
|
||||
chat_model = LLMModel.get_system_chat_model()
|
||||
if chat_model:
|
||||
self.stdout.write(f"System Chat Model: {chat_model.api.name}: {chat_model.name}")
|
||||
else:
|
||||
self.stdout.write(
|
||||
self.style.WARNING("System Chat Model: NOT CONFIGURED (concept extraction disabled)")
|
||||
)
|
||||
except Exception as exc:
|
||||
self.stdout.write(self.style.ERROR(f"Could not query LLM models: {exc}"))
|
||||
|
||||
self.stdout.write("")
|
||||
|
||||
# Item status counts
|
||||
try:
|
||||
statuses = ["pending", "processing", "completed", "failed"]
|
||||
self.stdout.write("Item Embedding Status:")
|
||||
|
||||
for status in statuses:
|
||||
if library_uid:
|
||||
query = (
|
||||
"MATCH (l:Library {uid: $lib_uid})-[:CONTAINS]->(c:Collection)"
|
||||
"-[:CONTAINS]->(i:Item {embedding_status: $status}) "
|
||||
"RETURN count(i)"
|
||||
)
|
||||
results, _ = db.cypher_query(
|
||||
query, {"lib_uid": library_uid, "status": status}
|
||||
)
|
||||
else:
|
||||
query = (
|
||||
"MATCH (i:Item {embedding_status: $status}) RETURN count(i)"
|
||||
)
|
||||
results, _ = db.cypher_query(query, {"status": status})
|
||||
|
||||
count = results[0][0] if results else 0
|
||||
style = {
|
||||
"completed": self.style.SUCCESS,
|
||||
"failed": self.style.ERROR,
|
||||
"processing": self.style.WARNING,
|
||||
"pending": self.style.NOTICE,
|
||||
}.get(status, str)
|
||||
self.stdout.write(f" {status:12s}: {style(str(count))}")
|
||||
except Exception as exc:
|
||||
self.stdout.write(self.style.ERROR(f"Could not query items: {exc}"))
|
||||
|
||||
self.stdout.write("")
|
||||
|
||||
# Node counts
|
||||
try:
|
||||
node_types = [
|
||||
("Library", "Library"),
|
||||
("Collection", "Collection"),
|
||||
("Item", "Item"),
|
||||
("Chunk", "Chunk"),
|
||||
("Concept", "Concept"),
|
||||
("Image", "Image"),
|
||||
("ImageEmbedding", "ImageEmbedding"),
|
||||
]
|
||||
self.stdout.write("Graph Node Counts:")
|
||||
for label, display in node_types:
|
||||
results, _ = db.cypher_query(f"MATCH (n:{label}) RETURN count(n)")
|
||||
count = results[0][0] if results else 0
|
||||
self.stdout.write(f" {display:20s}: {count}")
|
||||
except Exception as exc:
|
||||
self.stdout.write(self.style.ERROR(f"Could not query nodes: {exc}"))
|
||||
|
||||
# Chunks with embeddings
|
||||
try:
|
||||
results, _ = db.cypher_query(
|
||||
"MATCH (c:Chunk) WHERE c.embedding IS NOT NULL RETURN count(c)"
|
||||
)
|
||||
embedded = results[0][0] if results else 0
|
||||
results, _ = db.cypher_query("MATCH (c:Chunk) RETURN count(c)")
|
||||
total = results[0][0] if results else 0
|
||||
self.stdout.write(
|
||||
f"\nChunks with embeddings: {embedded}/{total}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self.stdout.write("")
|
||||
94
mnemosyne/library/management/commands/load_library_types.py
Normal file
94
mnemosyne/library/management/commands/load_library_types.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""
|
||||
Management command to load default library type configurations.
|
||||
|
||||
Idempotent — safe to re-run. Creates Library nodes with default content-type
|
||||
configurations if they don't already exist. Does NOT overwrite existing
|
||||
libraries that have been customized.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from library.content_types import LIBRARY_TYPE_DEFAULTS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = (
|
||||
"Load default library type configurations into Neo4j. "
|
||||
"Creates one Library node per type with default chunking, embedding, "
|
||||
"reranker, and LLM context settings. Safe to re-run."
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Update existing libraries with default configurations (overwrites customizations)",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
force = options["force"]
|
||||
|
||||
try:
|
||||
from library.models import Library
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
self.style.ERROR(f"Cannot import library models: {e}")
|
||||
)
|
||||
return
|
||||
|
||||
created_count = 0
|
||||
updated_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
for library_type, config in LIBRARY_TYPE_DEFAULTS.items():
|
||||
display_name = library_type.capitalize()
|
||||
default_name = f"Default {display_name} Library"
|
||||
|
||||
# Check if a library of this type already exists
|
||||
existing = Library.nodes.filter(library_type=library_type)
|
||||
|
||||
if existing:
|
||||
if force:
|
||||
lib = existing[0]
|
||||
lib.chunking_config = config["chunking_config"]
|
||||
lib.embedding_instruction = config["embedding_instruction"]
|
||||
lib.reranker_instruction = config["reranker_instruction"]
|
||||
lib.llm_context_prompt = config["llm_context_prompt"]
|
||||
lib.save()
|
||||
updated_count += 1
|
||||
self.stdout.write(
|
||||
self.style.WARNING(f"Updated: {lib.name} ({library_type})")
|
||||
)
|
||||
else:
|
||||
skipped_count += 1
|
||||
self.stdout.write(
|
||||
self.style.NOTICE(
|
||||
f"Skipped: {existing[0].name} ({library_type}) — already exists"
|
||||
)
|
||||
)
|
||||
else:
|
||||
lib = Library(
|
||||
name=default_name,
|
||||
library_type=library_type,
|
||||
description=f"Default {display_name.lower()} library",
|
||||
chunking_config=config["chunking_config"],
|
||||
embedding_instruction=config["embedding_instruction"],
|
||||
reranker_instruction=config["reranker_instruction"],
|
||||
llm_context_prompt=config["llm_context_prompt"],
|
||||
)
|
||||
lib.save()
|
||||
created_count += 1
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f"Created: {default_name} ({library_type})")
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"\nDone. Created: {created_count}, "
|
||||
f"Updated: {updated_count}, Skipped: {skipped_count}"
|
||||
)
|
||||
)
|
||||
189
mnemosyne/library/management/commands/setup_neo4j_indexes.py
Normal file
189
mnemosyne/library/management/commands/setup_neo4j_indexes.py
Normal file
@@ -0,0 +1,189 @@
|
||||
"""
|
||||
Management command to create Neo4j indexes for Mnemosyne content graph.
|
||||
|
||||
Creates:
|
||||
- Vector indexes (dynamic dimensions from system embedding model) for Chunk, Concept, and ImageEmbedding
|
||||
- Full-text indexes for text search on Chunk.text_preview and Concept.name
|
||||
- Constraint indexes enforced by neomodel (unique properties)
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from neomodel import db
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default vector dimensions (used when no system embedding model is configured)
|
||||
DEFAULT_VECTOR_DIMENSIONS = 4096
|
||||
|
||||
# Full-text index definitions: (index_name, label, properties)
|
||||
FULLTEXT_INDEXES = [
|
||||
("chunk_text_fulltext", "Chunk", ["text_preview"]),
|
||||
("concept_name_fulltext", "Concept", ["name"]),
|
||||
("item_title_fulltext", "Item", ["title"]),
|
||||
("library_name_fulltext", "Library", ["name"]),
|
||||
]
|
||||
|
||||
|
||||
def _get_vector_dimensions():
|
||||
"""
|
||||
Get vector dimensions from the system embedding model.
|
||||
|
||||
Falls back to DEFAULT_VECTOR_DIMENSIONS if no model is configured
|
||||
or the model has no vector_dimensions set.
|
||||
|
||||
:returns: Tuple of (dimensions, source_description).
|
||||
"""
|
||||
try:
|
||||
from llm_manager.models import LLMModel
|
||||
|
||||
model = LLMModel.get_system_embedding_model()
|
||||
if model and model.vector_dimensions:
|
||||
return model.vector_dimensions, f"{model.api.name}: {model.name}"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return DEFAULT_VECTOR_DIMENSIONS, "default (no system embedding model)"
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = (
|
||||
"Create Neo4j vector, full-text, and constraint indexes "
|
||||
"for the Mnemosyne content graph. Vector dimensions are read "
|
||||
"from the system embedding model."
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--drop",
|
||||
action="store_true",
|
||||
help="Drop existing indexes before recreating them",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dimensions",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Override vector dimensions (default: read from system embedding model)",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
drop = options["drop"]
|
||||
override_dims = options["dimensions"]
|
||||
|
||||
# Resolve vector dimensions
|
||||
if override_dims > 0:
|
||||
dimensions = override_dims
|
||||
source = f"CLI override ({override_dims})"
|
||||
else:
|
||||
dimensions, source = _get_vector_dimensions()
|
||||
|
||||
self.stdout.write(
|
||||
self.style.HTTP_INFO(
|
||||
f"Vector dimensions: {dimensions} (source: {source})"
|
||||
)
|
||||
)
|
||||
|
||||
# Vector index definitions (dynamic dimensions)
|
||||
vector_indexes = [
|
||||
("chunk_embedding_index", "Chunk", "embedding", dimensions, "cosine"),
|
||||
("concept_embedding_index", "Concept", "embedding", dimensions, "cosine"),
|
||||
("image_embedding_index", "ImageEmbedding", "embedding", dimensions, "cosine"),
|
||||
]
|
||||
|
||||
# Get existing indexes
|
||||
existing_indexes = self._get_existing_indexes()
|
||||
|
||||
if drop:
|
||||
self._drop_indexes(existing_indexes, vector_indexes)
|
||||
existing_indexes = self._get_existing_indexes()
|
||||
|
||||
# Create vector indexes
|
||||
for name, label, prop, dims, similarity in vector_indexes:
|
||||
if name in existing_indexes:
|
||||
self.stdout.write(
|
||||
self.style.NOTICE(f"Vector index '{name}' already exists, skipping")
|
||||
)
|
||||
continue
|
||||
try:
|
||||
cypher = (
|
||||
f"CREATE VECTOR INDEX {name} IF NOT EXISTS "
|
||||
f"FOR (n:{label}) ON (n.{prop}) "
|
||||
f"OPTIONS {{indexConfig: {{"
|
||||
f"`vector.dimensions`: {dims}, "
|
||||
f"`vector.similarity_function`: '{similarity}'"
|
||||
f"}}}}"
|
||||
)
|
||||
db.cypher_query(cypher)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"Created vector index: {name} ({label}.{prop}, {dims}d {similarity})"
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
self.style.ERROR(f"Failed to create vector index '{name}': {e}")
|
||||
)
|
||||
|
||||
# Create full-text indexes
|
||||
for name, label, properties in FULLTEXT_INDEXES:
|
||||
if name in existing_indexes:
|
||||
self.stdout.write(
|
||||
self.style.NOTICE(
|
||||
f"Full-text index '{name}' already exists, skipping"
|
||||
)
|
||||
)
|
||||
continue
|
||||
try:
|
||||
props_str = ", ".join(f"n.{p}" for p in properties)
|
||||
cypher = (
|
||||
f"CREATE FULLTEXT INDEX {name} IF NOT EXISTS "
|
||||
f"FOR (n:{label}) ON EACH [{props_str}]"
|
||||
)
|
||||
db.cypher_query(cypher)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"Created full-text index: {name} ({label}: {', '.join(properties)})"
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
self.style.ERROR(f"Failed to create full-text index '{name}': {e}")
|
||||
)
|
||||
|
||||
# Install neomodel constraints (unique indexes from model definitions)
|
||||
try:
|
||||
from neomodel import install_all_labels
|
||||
|
||||
install_all_labels()
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS("Installed neomodel constraint indexes")
|
||||
)
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
self.style.ERROR(f"Failed to install neomodel labels: {e}")
|
||||
)
|
||||
|
||||
self.stdout.write(self.style.SUCCESS("\nNeo4j index setup complete."))
|
||||
|
||||
def _get_existing_indexes(self):
|
||||
"""Return set of existing index names."""
|
||||
try:
|
||||
results, _ = db.cypher_query("SHOW INDEXES YIELD name RETURN name")
|
||||
return {row[0] for row in results}
|
||||
except Exception:
|
||||
return set()
|
||||
|
||||
def _drop_indexes(self, existing_indexes, vector_indexes):
|
||||
"""Drop all Mnemosyne-managed indexes."""
|
||||
managed_names = {name for name, *_ in vector_indexes} | {
|
||||
name for name, *_ in FULLTEXT_INDEXES
|
||||
}
|
||||
for name in managed_names & existing_indexes:
|
||||
try:
|
||||
db.cypher_query(f"DROP INDEX {name} IF EXISTS")
|
||||
self.stdout.write(self.style.WARNING(f"Dropped index: {name}"))
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
self.style.ERROR(f"Failed to drop index '{name}': {e}")
|
||||
)
|
||||
96
mnemosyne/library/metrics.py
Normal file
96
mnemosyne/library/metrics.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Prometheus metrics for the Mnemosyne embedding pipeline.
|
||||
|
||||
Exposes counters, histograms, and gauges for monitoring document parsing,
|
||||
chunking, embedding, and pipeline orchestration.
|
||||
"""
|
||||
|
||||
from prometheus_client import Counter, Gauge, Histogram
|
||||
|
||||
# --- Document Parsing ---
|
||||
|
||||
DOCUMENTS_PARSED_TOTAL = Counter(
|
||||
"mnemosyne_documents_parsed_total",
|
||||
"Total documents parsed",
|
||||
["file_type", "status"],
|
||||
)
|
||||
DOCUMENT_PARSE_DURATION = Histogram(
|
||||
"mnemosyne_document_parse_duration_seconds",
|
||||
"Time to parse a document",
|
||||
["file_type"],
|
||||
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60, 120],
|
||||
)
|
||||
IMAGES_EXTRACTED_TOTAL = Counter(
|
||||
"mnemosyne_images_extracted_total",
|
||||
"Total images extracted from documents",
|
||||
["file_type"],
|
||||
)
|
||||
|
||||
# --- Chunking ---
|
||||
|
||||
CHUNKS_CREATED_TOTAL = Counter(
|
||||
"mnemosyne_chunks_created_total",
|
||||
"Total chunks created",
|
||||
["library_type", "strategy"],
|
||||
)
|
||||
CHUNK_SIZE_TOKENS = Histogram(
|
||||
"mnemosyne_chunk_size_tokens",
|
||||
"Distribution of chunk sizes in tokens",
|
||||
buckets=[32, 64, 128, 256, 512, 768, 1024, 2048],
|
||||
)
|
||||
|
||||
# --- Embedding ---
|
||||
|
||||
EMBEDDINGS_GENERATED_TOTAL = Counter(
|
||||
"mnemosyne_embeddings_generated_total",
|
||||
"Total embeddings generated",
|
||||
["model_name", "api_type", "content_type"],
|
||||
)
|
||||
EMBEDDING_BATCH_DURATION = Histogram(
|
||||
"mnemosyne_embedding_batch_duration_seconds",
|
||||
"Time per embedding batch request",
|
||||
["model_name", "api_type"],
|
||||
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60],
|
||||
)
|
||||
EMBEDDING_API_ERRORS_TOTAL = Counter(
|
||||
"mnemosyne_embedding_api_errors_total",
|
||||
"Embedding API errors",
|
||||
["model_name", "api_type", "error_type"],
|
||||
)
|
||||
EMBEDDING_TOKENS_TOTAL = Counter(
|
||||
"mnemosyne_embedding_tokens_total",
|
||||
"Total tokens sent to embedding APIs",
|
||||
["model_name"],
|
||||
)
|
||||
|
||||
# --- Pipeline ---
|
||||
|
||||
PIPELINE_ITEMS_TOTAL = Counter(
|
||||
"mnemosyne_pipeline_items_total",
|
||||
"Total items processed by embedding pipeline",
|
||||
["status"],
|
||||
)
|
||||
PIPELINE_DURATION = Histogram(
|
||||
"mnemosyne_pipeline_item_duration_seconds",
|
||||
"Total time to process one item through the full pipeline",
|
||||
buckets=[1, 5, 10, 30, 60, 120, 300, 600],
|
||||
)
|
||||
PIPELINE_ITEMS_IN_PROGRESS = Gauge(
|
||||
"mnemosyne_pipeline_items_in_progress",
|
||||
"Items currently being processed",
|
||||
)
|
||||
|
||||
# --- Concept Extraction ---
|
||||
|
||||
CONCEPTS_EXTRACTED_TOTAL = Counter(
|
||||
"mnemosyne_concepts_extracted_total",
|
||||
"Total concepts extracted",
|
||||
["concept_type"],
|
||||
)
|
||||
|
||||
# --- System State ---
|
||||
|
||||
EMBEDDING_QUEUE_SIZE = Gauge(
|
||||
"mnemosyne_embedding_queue_size",
|
||||
"Items waiting in the embedding queue",
|
||||
)
|
||||
0
mnemosyne/library/migrations/__init__.py
Normal file
0
mnemosyne/library/migrations/__init__.py
Normal file
251
mnemosyne/library/models.py
Normal file
251
mnemosyne/library/models.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""
|
||||
Neo4j graph models for the Mnemosyne content library.
|
||||
|
||||
All content data (libraries, collections, items, chunks, concepts, images)
|
||||
lives in Neo4j as a knowledge graph. These models use neomodel's StructuredNode
|
||||
OGM — they do NOT participate in Django's ORM or migrations.
|
||||
"""
|
||||
|
||||
from neomodel import (
|
||||
ArrayProperty,
|
||||
DateTimeProperty,
|
||||
FloatProperty,
|
||||
IntegerProperty,
|
||||
JSONProperty,
|
||||
RelationshipTo,
|
||||
StringProperty,
|
||||
StructuredNode,
|
||||
StructuredRel,
|
||||
UniqueIdProperty,
|
||||
)
|
||||
|
||||
|
||||
# --- Relationship models ---
|
||||
|
||||
|
||||
class ReferencesRel(StructuredRel):
|
||||
"""Relationship properties for Item -> Concept REFERENCES edges."""
|
||||
|
||||
weight = FloatProperty(default=1.0)
|
||||
context = StringProperty(default="")
|
||||
|
||||
|
||||
class RelatedToRel(StructuredRel):
|
||||
"""Relationship properties for Item -> Item RELATED_TO edges."""
|
||||
|
||||
relationship_type = StringProperty(default="")
|
||||
weight = FloatProperty(default=1.0)
|
||||
|
||||
|
||||
class NearbyImageRel(StructuredRel):
|
||||
"""Relationship properties for Chunk -> Image HAS_NEARBY_IMAGE edges."""
|
||||
|
||||
proximity = StringProperty(default="same_page") # same_page, inline, same_slide, same_chapter
|
||||
|
||||
|
||||
# --- Node models ---
|
||||
|
||||
|
||||
class Library(StructuredNode):
|
||||
"""
|
||||
Top-level container representing a content library.
|
||||
|
||||
Each library has a type (fiction, technical, music, film, art, journal)
|
||||
that drives chunking strategy, embedding instructions, and LLM prompts.
|
||||
"""
|
||||
|
||||
uid = UniqueIdProperty()
|
||||
name = StringProperty(unique_index=True, required=True)
|
||||
library_type = StringProperty(
|
||||
required=True,
|
||||
choices={
|
||||
"fiction": "Fiction",
|
||||
"technical": "Technical",
|
||||
"music": "Music",
|
||||
"film": "Film",
|
||||
"art": "Art",
|
||||
"journal": "Journal",
|
||||
},
|
||||
)
|
||||
description = StringProperty(default="")
|
||||
|
||||
# Content-type configuration
|
||||
chunking_config = JSONProperty(default={})
|
||||
embedding_instruction = StringProperty(default="")
|
||||
reranker_instruction = StringProperty(default="")
|
||||
llm_context_prompt = StringProperty(default="")
|
||||
|
||||
created_at = DateTimeProperty(default_now=True)
|
||||
|
||||
# Relationships
|
||||
collections = RelationshipTo("Collection", "CONTAINS")
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name} ({self.library_type})"
|
||||
|
||||
|
||||
class Collection(StructuredNode):
|
||||
"""
|
||||
A grouping of items within a library.
|
||||
|
||||
Examples: a book series, an album discography, a project folder.
|
||||
"""
|
||||
|
||||
uid = UniqueIdProperty()
|
||||
name = StringProperty(required=True)
|
||||
description = StringProperty(default="")
|
||||
metadata = JSONProperty(default={})
|
||||
|
||||
created_at = DateTimeProperty(default_now=True)
|
||||
|
||||
# Relationships
|
||||
items = RelationshipTo("Item", "CONTAINS")
|
||||
library = RelationshipTo("Library", "BELONGS_TO")
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
|
||||
class Item(StructuredNode):
|
||||
"""
|
||||
An individual piece of content: a document, song, image set, journal entry, etc.
|
||||
|
||||
Items store their original file in S3 (via s3_key) and are chunked
|
||||
for embedding and retrieval.
|
||||
"""
|
||||
|
||||
uid = UniqueIdProperty()
|
||||
title = StringProperty(required=True)
|
||||
item_type = StringProperty(default="")
|
||||
s3_key = StringProperty(default="")
|
||||
content_hash = StringProperty(index=True)
|
||||
file_type = StringProperty(default="")
|
||||
file_size = IntegerProperty(default=0)
|
||||
metadata = JSONProperty(default={})
|
||||
|
||||
created_at = DateTimeProperty(default_now=True)
|
||||
updated_at = DateTimeProperty(default_now=True)
|
||||
|
||||
# Embedding pipeline fields (Phase 2)
|
||||
embedding_status = StringProperty(
|
||||
default="pending",
|
||||
choices={
|
||||
"pending": "Pending",
|
||||
"processing": "Processing",
|
||||
"completed": "Completed",
|
||||
"failed": "Failed",
|
||||
},
|
||||
)
|
||||
embedding_model_name = StringProperty(default="")
|
||||
chunk_count = IntegerProperty(default=0)
|
||||
image_count = IntegerProperty(default=0)
|
||||
error_message = StringProperty(default="")
|
||||
|
||||
# Relationships
|
||||
chunks = RelationshipTo("Chunk", "HAS_CHUNK")
|
||||
images = RelationshipTo("Image", "HAS_IMAGE")
|
||||
concepts = RelationshipTo("Concept", "REFERENCES", model=ReferencesRel)
|
||||
related_items = RelationshipTo("Item", "RELATED_TO", model=RelatedToRel)
|
||||
|
||||
def __str__(self):
|
||||
return self.title
|
||||
|
||||
|
||||
class Chunk(StructuredNode):
|
||||
"""
|
||||
A text chunk extracted from an Item for embedding and retrieval.
|
||||
|
||||
Chunk text is stored in S3; text_preview holds the first 500 chars
|
||||
for Neo4j full-text indexing.
|
||||
"""
|
||||
|
||||
uid = UniqueIdProperty()
|
||||
chunk_index = IntegerProperty(required=True)
|
||||
chunk_s3_key = StringProperty(required=True)
|
||||
chunk_size = IntegerProperty(default=0)
|
||||
text_preview = StringProperty(default="") # First 500 chars for full-text index
|
||||
embedding = ArrayProperty(FloatProperty()) # 4096d vector
|
||||
|
||||
created_at = DateTimeProperty(default_now=True)
|
||||
|
||||
# Relationships
|
||||
mentions = RelationshipTo("Concept", "MENTIONS")
|
||||
nearby_images = RelationshipTo("Image", "HAS_NEARBY_IMAGE", model=NearbyImageRel)
|
||||
|
||||
def __str__(self):
|
||||
return f"Chunk {self.chunk_index} ({self.uid})"
|
||||
|
||||
|
||||
class Concept(StructuredNode):
|
||||
"""
|
||||
A named entity or topic extracted from content.
|
||||
|
||||
Concepts form the backbone of the knowledge graph, linking items
|
||||
and chunks through shared references.
|
||||
"""
|
||||
|
||||
uid = UniqueIdProperty()
|
||||
name = StringProperty(unique_index=True, required=True)
|
||||
concept_type = StringProperty(
|
||||
default="",
|
||||
choices={
|
||||
"person": "Person",
|
||||
"place": "Place",
|
||||
"topic": "Topic",
|
||||
"technique": "Technique",
|
||||
"theme": "Theme",
|
||||
},
|
||||
)
|
||||
embedding = ArrayProperty(FloatProperty()) # 4096d vector
|
||||
|
||||
# Relationships
|
||||
related_concepts = RelationshipTo("Concept", "RELATED_TO")
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
|
||||
class Image(StructuredNode):
|
||||
"""
|
||||
An image associated with an Item (cover art, diagram, photo, etc.).
|
||||
|
||||
The image file is stored in S3; embeddings enable multimodal search.
|
||||
"""
|
||||
|
||||
uid = UniqueIdProperty()
|
||||
s3_key = StringProperty(required=True)
|
||||
image_type = StringProperty(
|
||||
default="",
|
||||
choices={
|
||||
"cover": "Cover",
|
||||
"diagram": "Diagram",
|
||||
"artwork": "Artwork",
|
||||
"still": "Still",
|
||||
"photo": "Photo",
|
||||
},
|
||||
)
|
||||
description = StringProperty(default="")
|
||||
metadata = JSONProperty(default={})
|
||||
|
||||
created_at = DateTimeProperty(default_now=True)
|
||||
|
||||
# Relationships
|
||||
embeddings = RelationshipTo("ImageEmbedding", "HAS_EMBEDDING")
|
||||
|
||||
def __str__(self):
|
||||
return f"Image {self.image_type} ({self.uid})"
|
||||
|
||||
|
||||
class ImageEmbedding(StructuredNode):
|
||||
"""
|
||||
A multimodal embedding vector for an Image node.
|
||||
|
||||
Generated by Qwen3-VL for unified text+image vector space.
|
||||
"""
|
||||
|
||||
uid = UniqueIdProperty()
|
||||
embedding = ArrayProperty(FloatProperty()) # 4096d multimodal vector
|
||||
created_at = DateTimeProperty(default_now=True)
|
||||
|
||||
def __str__(self):
|
||||
return f"ImageEmbedding ({self.uid})"
|
||||
11
mnemosyne/library/services/__init__.py
Normal file
11
mnemosyne/library/services/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
Library services for the Mnemosyne embedding pipeline.
|
||||
|
||||
Services:
|
||||
- parsers: Universal document parsing via PyMuPDF
|
||||
- text_utils: Text sanitization for embedding APIs
|
||||
- chunker: Content-type-aware chunking
|
||||
- embedding_client: Multi-backend embedding API client
|
||||
- pipeline: Orchestration of parse → chunk → embed → graph
|
||||
- concepts: LLM-based concept extraction
|
||||
"""
|
||||
250
mnemosyne/library/services/chunker.py
Normal file
250
mnemosyne/library/services/chunker.py
Normal file
@@ -0,0 +1,250 @@
|
||||
"""
|
||||
Content-type-aware chunking service.
|
||||
|
||||
Uses semantic-text-splitter with HuggingFace tokenizers to produce
|
||||
chunks that respect document structure boundaries per library type.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from library.metrics import CHUNKS_CREATED_TOTAL, CHUNK_SIZE_TOKENS
|
||||
|
||||
from .parsers import ParseResult, TextBlock
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default tokenizer when no model-specific tokenizer is available
|
||||
DEFAULT_TOKENIZER = "bert-base-uncased"
|
||||
|
||||
# Boundary markers used to detect structural elements in text
|
||||
_BOUNDARY_PATTERNS = {
|
||||
"chapter": [
|
||||
r"(?m)^chapter\s+\d+",
|
||||
r"(?m)^CHAPTER\s+\d+",
|
||||
r"(?m)^Chapter\s+\w+",
|
||||
],
|
||||
"scene": [r"(?m)^\*\s*\*\s*\*", r"(?m)^---+$", r"(?m)^###"],
|
||||
"section": [
|
||||
r"(?m)^#{1,3}\s+",
|
||||
r"(?m)^\d+\.\d*\s+\w",
|
||||
r"(?m)^Section\s+\d+",
|
||||
],
|
||||
"subsection": [r"(?m)^#{4,6}\s+", r"(?m)^\d+\.\d+\.\d+"],
|
||||
"entry": [
|
||||
r"(?m)^\d{4}-\d{2}-\d{2}",
|
||||
r"(?m)^(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d",
|
||||
],
|
||||
"song": [r"(?m)^Track\s+\d+", r"(?m)^\[.*?\]$"],
|
||||
"verse": [r"(?m)^\[Verse", r"(?m)^\[Chorus"],
|
||||
}
|
||||
|
||||
|
||||
class ChunkResult:
|
||||
"""Result of chunking a document."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
chunks: list[str],
|
||||
chunk_page_map: dict[int, int],
|
||||
strategy: str,
|
||||
):
|
||||
"""
|
||||
:param chunks: List of chunk text strings.
|
||||
:param chunk_page_map: Mapping of chunk_index -> source page number.
|
||||
:param strategy: The chunking strategy used.
|
||||
"""
|
||||
self.chunks = chunks
|
||||
self.chunk_page_map = chunk_page_map
|
||||
self.strategy = strategy
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.chunks)
|
||||
|
||||
|
||||
class ContentTypeChunker:
|
||||
"""
|
||||
Content-type-aware document chunker.
|
||||
|
||||
Dispatches to different chunking strategies based on the library's
|
||||
chunking configuration, using semantic-text-splitter for
|
||||
token-aware splitting.
|
||||
"""
|
||||
|
||||
def __init__(self, tokenizer_name: Optional[str] = None):
|
||||
"""
|
||||
:param tokenizer_name: HuggingFace tokenizer name for token counting.
|
||||
"""
|
||||
self._tokenizer_name = tokenizer_name or DEFAULT_TOKENIZER
|
||||
self._splitter_cache: dict[tuple[int, int], object] = {}
|
||||
|
||||
def chunk(
|
||||
self,
|
||||
parse_result: ParseResult,
|
||||
chunking_config: dict,
|
||||
library_type: str = "",
|
||||
) -> ChunkResult:
|
||||
"""
|
||||
Chunk parsed document text using the library's chunking config.
|
||||
|
||||
:param parse_result: ParseResult from the document parser.
|
||||
:param chunking_config: Library chunking configuration dict.
|
||||
:param library_type: Library type for metrics labeling.
|
||||
:returns: ChunkResult with chunk texts and page mapping.
|
||||
"""
|
||||
strategy = chunking_config.get("strategy", "section_aware")
|
||||
chunk_size = chunking_config.get("chunk_size", 512)
|
||||
chunk_overlap = chunking_config.get("chunk_overlap", 64)
|
||||
|
||||
# Combine all text blocks into a single document text,
|
||||
# tracking page boundaries for chunk-page mapping
|
||||
full_text, page_offsets = self._combine_text_blocks(parse_result.text_blocks)
|
||||
|
||||
if not full_text.strip():
|
||||
logger.warning("No text to chunk strategy=%s", strategy)
|
||||
return ChunkResult(chunks=[], chunk_page_map={}, strategy=strategy)
|
||||
|
||||
logger.info(
|
||||
"Chunking text strategy=%s chunk_size=%d overlap=%d total_chars=%d",
|
||||
strategy,
|
||||
chunk_size,
|
||||
chunk_overlap,
|
||||
len(full_text),
|
||||
)
|
||||
|
||||
# Get or create the text splitter for this size/overlap
|
||||
splitter = self._get_splitter(chunk_size, chunk_overlap)
|
||||
|
||||
# Split into chunks
|
||||
try:
|
||||
chunks = splitter.chunks(full_text)
|
||||
except Exception as exc:
|
||||
logger.error("Chunking failed strategy=%s: %s", strategy, exc)
|
||||
raise
|
||||
|
||||
# Build chunk -> page mapping
|
||||
chunk_page_map = self._map_chunks_to_pages(chunks, full_text, page_offsets)
|
||||
|
||||
# Record metrics
|
||||
CHUNKS_CREATED_TOTAL.labels(
|
||||
library_type=library_type,
|
||||
strategy=strategy,
|
||||
).inc(len(chunks))
|
||||
|
||||
for chunk_text in chunks:
|
||||
CHUNK_SIZE_TOKENS.observe(len(chunk_text.split()))
|
||||
|
||||
logger.info(
|
||||
"Chunked document strategy=%s chunks=%d avg_size=%d",
|
||||
strategy,
|
||||
len(chunks),
|
||||
sum(len(c) for c in chunks) // max(len(chunks), 1),
|
||||
)
|
||||
|
||||
return ChunkResult(
|
||||
chunks=chunks,
|
||||
chunk_page_map=chunk_page_map,
|
||||
strategy=strategy,
|
||||
)
|
||||
|
||||
def _get_splitter(self, chunk_size: int, chunk_overlap: int):
|
||||
"""
|
||||
Get or create a semantic text splitter for the given parameters.
|
||||
|
||||
:param chunk_size: Maximum chunk size in tokens.
|
||||
:param chunk_overlap: Overlap between chunks in tokens.
|
||||
:returns: TextSplitter instance.
|
||||
"""
|
||||
cache_key = (chunk_size, chunk_overlap)
|
||||
if cache_key in self._splitter_cache:
|
||||
return self._splitter_cache[cache_key]
|
||||
|
||||
from semantic_text_splitter import TextSplitter
|
||||
from tokenizers import Tokenizer
|
||||
|
||||
try:
|
||||
tokenizer = Tokenizer.from_pretrained(self._tokenizer_name)
|
||||
splitter = TextSplitter.from_huggingface_tokenizer(
|
||||
tokenizer,
|
||||
capacity=chunk_size,
|
||||
overlap=chunk_overlap,
|
||||
)
|
||||
logger.debug(
|
||||
"Created text splitter tokenizer=%s capacity=%d overlap=%d",
|
||||
self._tokenizer_name,
|
||||
chunk_size,
|
||||
chunk_overlap,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Failed to load tokenizer %s: %s, falling back to %s",
|
||||
self._tokenizer_name,
|
||||
exc,
|
||||
DEFAULT_TOKENIZER,
|
||||
)
|
||||
tokenizer = Tokenizer.from_pretrained(DEFAULT_TOKENIZER)
|
||||
splitter = TextSplitter.from_huggingface_tokenizer(
|
||||
tokenizer,
|
||||
capacity=chunk_size,
|
||||
overlap=chunk_overlap,
|
||||
)
|
||||
|
||||
self._splitter_cache[cache_key] = splitter
|
||||
return splitter
|
||||
|
||||
def _combine_text_blocks(
|
||||
self, text_blocks: list[TextBlock]
|
||||
) -> tuple[str, list[tuple[int, int]]]:
|
||||
"""
|
||||
Combine text blocks into a single string, tracking page offsets.
|
||||
|
||||
:param text_blocks: List of TextBlock from parser.
|
||||
:returns: Tuple of (combined_text, page_offsets) where page_offsets
|
||||
is a list of (char_offset, page_number).
|
||||
"""
|
||||
parts: list[str] = []
|
||||
page_offsets: list[tuple[int, int]] = []
|
||||
current_offset = 0
|
||||
|
||||
for block in text_blocks:
|
||||
page_offsets.append((current_offset, block.page))
|
||||
parts.append(block.text)
|
||||
current_offset += len(block.text) + 2 # +2 for paragraph separator
|
||||
|
||||
return "\n\n".join(parts), page_offsets
|
||||
|
||||
def _map_chunks_to_pages(
|
||||
self,
|
||||
chunks: list[str],
|
||||
full_text: str,
|
||||
page_offsets: list[tuple[int, int]],
|
||||
) -> dict[int, int]:
|
||||
"""
|
||||
Map each chunk index to its source page number.
|
||||
|
||||
:param chunks: List of chunk strings.
|
||||
:param full_text: Combined document text.
|
||||
:param page_offsets: List of (char_offset, page_number).
|
||||
:returns: Dict mapping chunk_index -> page_number.
|
||||
"""
|
||||
chunk_page_map: dict[int, int] = {}
|
||||
search_start = 0
|
||||
|
||||
for chunk_idx, chunk_text in enumerate(chunks):
|
||||
# Find where this chunk starts in the full text
|
||||
pos = full_text.find(chunk_text[:100], search_start)
|
||||
if pos == -1:
|
||||
pos = search_start
|
||||
|
||||
# Find which page this position belongs to
|
||||
page = 0
|
||||
for offset, page_num in page_offsets:
|
||||
if offset <= pos:
|
||||
page = page_num
|
||||
else:
|
||||
break
|
||||
|
||||
chunk_page_map[chunk_idx] = page
|
||||
search_start = max(search_start, pos)
|
||||
|
||||
return chunk_page_map
|
||||
267
mnemosyne/library/services/concepts.py
Normal file
267
mnemosyne/library/services/concepts.py
Normal file
@@ -0,0 +1,267 @@
|
||||
"""
|
||||
LLM-based concept extraction for the knowledge graph.
|
||||
|
||||
Uses the system chat model to extract named entities (people, places,
|
||||
topics, techniques, themes) from document chunks, then creates Concept
|
||||
nodes and MENTIONS/REFERENCES relationships in Neo4j.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from library.metrics import CONCEPTS_EXTRACTED_TOTAL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Prompt for concept extraction
|
||||
CONCEPT_EXTRACTION_PROMPT = """Extract named entities and key concepts from the following text.
|
||||
|
||||
Return a JSON array of objects, each with:
|
||||
- "name": the entity/concept name (lowercase, canonical form)
|
||||
- "type": one of "person", "place", "topic", "technique", "theme"
|
||||
|
||||
Only extract significant, specific concepts — not generic words.
|
||||
Return at most 20 concepts. Return ONLY the JSON array, no other text.
|
||||
|
||||
Text:
|
||||
{text}"""
|
||||
|
||||
|
||||
class ConceptExtractor:
|
||||
"""
|
||||
Extracts concepts from text using the system chat model.
|
||||
|
||||
Creates or updates Concept nodes in Neo4j and connects them
|
||||
to Chunk and Item nodes via MENTIONS and REFERENCES relationships.
|
||||
"""
|
||||
|
||||
def __init__(self, chat_model, user=None):
|
||||
"""
|
||||
:param chat_model: LLMModel instance for chat/completion.
|
||||
:param user: Optional Django user for usage tracking.
|
||||
"""
|
||||
self.chat_model = chat_model
|
||||
self.user = user
|
||||
|
||||
def extract_for_item(
|
||||
self,
|
||||
item,
|
||||
chunk_nodes: list,
|
||||
chunk_texts: list[str],
|
||||
) -> int:
|
||||
"""
|
||||
Extract concepts from all chunks of an item.
|
||||
|
||||
:param item: Item node.
|
||||
:param chunk_nodes: List of Chunk nodes.
|
||||
:param chunk_texts: List of chunk text strings.
|
||||
:returns: Total number of unique concepts extracted.
|
||||
"""
|
||||
all_concepts: dict[str, str] = {} # name -> type
|
||||
|
||||
# Sample chunks for extraction (don't process every chunk for large docs)
|
||||
sample_indices = self._select_sample_indices(len(chunk_texts), max_samples=10)
|
||||
|
||||
for idx in sample_indices:
|
||||
chunk_text = chunk_texts[idx]
|
||||
chunk_node = chunk_nodes[idx]
|
||||
|
||||
concepts = self._extract_from_text(chunk_text)
|
||||
if not concepts:
|
||||
continue
|
||||
|
||||
for concept_data in concepts:
|
||||
name = concept_data.get("name", "").strip().lower()
|
||||
concept_type = concept_data.get("type", "topic")
|
||||
|
||||
if not name or len(name) < 2:
|
||||
continue
|
||||
|
||||
all_concepts[name] = concept_type
|
||||
|
||||
# Connect chunk -> concept via MENTIONS
|
||||
concept_node = self._get_or_create_concept(name, concept_type)
|
||||
if concept_node:
|
||||
try:
|
||||
chunk_node.mentions.connect(concept_node)
|
||||
except Exception:
|
||||
pass # Already connected
|
||||
|
||||
# Connect item -> all concepts via REFERENCES
|
||||
for name, concept_type in all_concepts.items():
|
||||
concept_node = self._get_or_create_concept(name, concept_type)
|
||||
if concept_node:
|
||||
try:
|
||||
item.concepts.connect(concept_node, {"weight": 1.0})
|
||||
except Exception:
|
||||
pass # Already connected
|
||||
|
||||
CONCEPTS_EXTRACTED_TOTAL.labels(concept_type=concept_type).inc()
|
||||
|
||||
logger.info(
|
||||
"Extracted %d concepts for item_uid=%s",
|
||||
len(all_concepts),
|
||||
item.uid,
|
||||
)
|
||||
return len(all_concepts)
|
||||
|
||||
def _extract_from_text(self, text: str) -> list[dict]:
|
||||
"""
|
||||
Call the chat model to extract concepts from text.
|
||||
|
||||
:param text: Text to analyze.
|
||||
:returns: List of concept dicts with 'name' and 'type' keys.
|
||||
"""
|
||||
# Truncate very long text to avoid token limits
|
||||
if len(text) > 3000:
|
||||
text = text[:3000]
|
||||
|
||||
prompt = CONCEPT_EXTRACTION_PROMPT.format(text=text)
|
||||
|
||||
try:
|
||||
response_text = self._call_chat_model(prompt)
|
||||
concepts = self._parse_concept_response(response_text)
|
||||
logger.debug(
|
||||
"Extracted %d concepts from text chunk (len=%d)",
|
||||
len(concepts),
|
||||
len(text),
|
||||
)
|
||||
return concepts
|
||||
except Exception as exc:
|
||||
logger.warning("Concept extraction failed: %s", exc)
|
||||
return []
|
||||
|
||||
def _call_chat_model(self, prompt: str) -> str:
|
||||
"""
|
||||
Make a chat completion request to the system chat model.
|
||||
|
||||
:param prompt: User prompt text.
|
||||
:returns: Response text from the model.
|
||||
"""
|
||||
import requests
|
||||
|
||||
api = self.chat_model.api
|
||||
base_url = api.base_url.rstrip("/")
|
||||
|
||||
if api.api_type == "bedrock":
|
||||
# Bedrock Converse endpoint
|
||||
url = f"{base_url}/model/{self.chat_model.name}/converse"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
body = {
|
||||
"messages": [{"role": "user", "content": [{"text": prompt}]}],
|
||||
}
|
||||
else:
|
||||
# OpenAI-compatible
|
||||
url = f"{base_url}/chat/completions"
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if api.api_key:
|
||||
headers["Authorization"] = f"Bearer {api.api_key}"
|
||||
body = {
|
||||
"model": self.chat_model.name,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"temperature": 0.1,
|
||||
"max_tokens": 1000,
|
||||
}
|
||||
|
||||
resp = requests.post(
|
||||
url, json=body, headers=headers, timeout=api.timeout_seconds or 60
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
# Parse response based on format
|
||||
if "output" in data:
|
||||
# Bedrock Converse format
|
||||
return data["output"]["message"]["content"][0]["text"]
|
||||
if "choices" in data:
|
||||
# OpenAI format
|
||||
return data["choices"][0]["message"]["content"]
|
||||
|
||||
raise ValueError(f"Unexpected chat response format: {list(data.keys())}")
|
||||
|
||||
def _parse_concept_response(self, response_text: str) -> list[dict]:
|
||||
"""
|
||||
Parse the LLM's concept extraction response into structured data.
|
||||
|
||||
:param response_text: Raw response text (expected JSON array).
|
||||
:returns: List of concept dicts.
|
||||
"""
|
||||
# Try to extract JSON from the response
|
||||
text = response_text.strip()
|
||||
|
||||
# Handle markdown code blocks
|
||||
if text.startswith("```"):
|
||||
lines = text.split("\n")
|
||||
text = "\n".join(lines[1:-1]) if len(lines) > 2 else text
|
||||
|
||||
try:
|
||||
concepts = json.loads(text)
|
||||
if isinstance(concepts, list):
|
||||
return [
|
||||
c for c in concepts
|
||||
if isinstance(c, dict) and "name" in c
|
||||
]
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Try to find JSON array in the response
|
||||
import re
|
||||
|
||||
match = re.search(r"\[.*\]", text, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
concepts = json.loads(match.group())
|
||||
if isinstance(concepts, list):
|
||||
return [
|
||||
c for c in concepts
|
||||
if isinstance(c, dict) and "name" in c
|
||||
]
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
logger.debug("Could not parse concept response: %s", text[:200])
|
||||
return []
|
||||
|
||||
def _get_or_create_concept(self, name: str, concept_type: str):
|
||||
"""
|
||||
Get or create a Concept node by name.
|
||||
|
||||
:param name: Concept name (lowercase).
|
||||
:param concept_type: Concept type (person, place, topic, etc.).
|
||||
:returns: Concept node, or None on failure.
|
||||
"""
|
||||
from library.models import Concept
|
||||
|
||||
try:
|
||||
# Try to get existing
|
||||
existing = Concept.nodes.filter(name=name)
|
||||
if existing:
|
||||
return existing[0]
|
||||
|
||||
# Create new
|
||||
concept = Concept(name=name, concept_type=concept_type)
|
||||
concept.save()
|
||||
return concept
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to get/create concept '%s': %s", name, exc)
|
||||
return None
|
||||
|
||||
def _select_sample_indices(
|
||||
self, total: int, max_samples: int = 10
|
||||
) -> list[int]:
|
||||
"""
|
||||
Select evenly-spaced sample indices for concept extraction.
|
||||
|
||||
:param total: Total number of chunks.
|
||||
:param max_samples: Maximum samples to take.
|
||||
:returns: List of chunk indices to process.
|
||||
"""
|
||||
if total <= max_samples:
|
||||
return list(range(total))
|
||||
|
||||
step = total / max_samples
|
||||
return [int(i * step) for i in range(max_samples)]
|
||||
396
mnemosyne/library/services/embedding_client.py
Normal file
396
mnemosyne/library/services/embedding_client.py
Normal file
@@ -0,0 +1,396 @@
|
||||
"""
|
||||
Multi-backend embedding client.
|
||||
|
||||
Dispatches embedding requests to OpenAI-compatible APIs (OpenAI, vLLM,
|
||||
llama-cpp, Ollama) or Amazon Bedrock via direct HTTP with Bearer token auth.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from library.metrics import (
|
||||
EMBEDDING_API_ERRORS_TOTAL,
|
||||
EMBEDDING_BATCH_DURATION,
|
||||
EMBEDDING_TOKENS_TOTAL,
|
||||
EMBEDDINGS_GENERATED_TOTAL,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingClient:
|
||||
"""
|
||||
Client for generating text and image embeddings via multiple backends.
|
||||
|
||||
Dispatches based on ``LLMApi.api_type``:
|
||||
|
||||
* ``openai``, ``vllm``, ``llama-cpp``, ``ollama`` — OpenAI-compatible
|
||||
``POST /embeddings``
|
||||
* ``bedrock`` — Amazon Bedrock Runtime ``POST /model/{id}/invoke``
|
||||
with Bearer token auth
|
||||
"""
|
||||
|
||||
def __init__(self, embedding_model, user=None):
|
||||
"""
|
||||
:param embedding_model: ``LLMModel`` instance for embeddings.
|
||||
:param user: Optional Django user for usage tracking.
|
||||
"""
|
||||
self.model = embedding_model
|
||||
self.api = embedding_model.api
|
||||
self.user = user
|
||||
self.base_url = self.api.base_url.rstrip("/")
|
||||
self.model_name = self.model.name
|
||||
self.api_type = self.api.api_type
|
||||
self.timeout = self.api.timeout_seconds or 120
|
||||
|
||||
logger.info(
|
||||
"EmbeddingClient initialized api=%s model=%s api_type=%s base_url=%s",
|
||||
self.api.name,
|
||||
self.model_name,
|
||||
self.api_type,
|
||||
self.base_url,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def embed_text(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate an embedding vector for a single text string.
|
||||
|
||||
:param text: Text to embed.
|
||||
:returns: Embedding vector as list of floats.
|
||||
"""
|
||||
if self.api_type == "bedrock":
|
||||
return self._embed_bedrock_single(text)
|
||||
return self._embed_openai_single(text)
|
||||
|
||||
def embed_texts(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
:param texts: List of text strings.
|
||||
:returns: List of embedding vectors.
|
||||
"""
|
||||
if self.api_type == "bedrock":
|
||||
return self._embed_bedrock_batch(texts)
|
||||
return self._embed_openai_batch(texts)
|
||||
|
||||
def embed_image(self, image_data: bytes, image_ext: str = "png") -> Optional[list[float]]:
|
||||
"""
|
||||
Generate a multimodal embedding for an image.
|
||||
|
||||
Requires a model with ``supports_multimodal=True``.
|
||||
|
||||
:param image_data: Raw image bytes.
|
||||
:param image_ext: Image format extension.
|
||||
:returns: Embedding vector, or None if not supported.
|
||||
"""
|
||||
if not self.model.supports_multimodal:
|
||||
logger.debug(
|
||||
"Model %s does not support multimodal, skipping image embedding",
|
||||
self.model_name,
|
||||
)
|
||||
return None
|
||||
|
||||
b64 = base64.b64encode(image_data).decode("utf-8")
|
||||
mime_type = f"image/{image_ext}" if image_ext != "jpg" else "image/jpeg"
|
||||
|
||||
if self.api_type == "bedrock":
|
||||
return self._embed_bedrock_image(b64, mime_type)
|
||||
return self._embed_openai_image(b64, mime_type)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# OpenAI-compatible backend
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _embed_openai_single(self, text: str) -> list[float]:
|
||||
"""Embed a single text via OpenAI-compatible /embeddings endpoint."""
|
||||
result = self._embed_openai_batch([text])
|
||||
return result[0]
|
||||
|
||||
def _embed_openai_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""Embed a batch of texts via OpenAI-compatible /embeddings endpoint."""
|
||||
url = f"{self.base_url}/embeddings"
|
||||
payload = {"input": texts, "model": self.model_name}
|
||||
headers = self._openai_headers()
|
||||
|
||||
logger.debug(
|
||||
"OpenAI embedding request texts=%d model=%s",
|
||||
len(texts),
|
||||
self.model_name,
|
||||
)
|
||||
|
||||
with EMBEDDING_BATCH_DURATION.labels(
|
||||
model_name=self.model_name, api_type=self.api_type
|
||||
).time():
|
||||
try:
|
||||
resp = requests.post(
|
||||
url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=self.timeout * max(1, len(texts) // 10),
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.error(
|
||||
"OpenAI embedding failed status=%d body=%s",
|
||||
resp.status_code,
|
||||
resp.text[:500],
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except requests.RequestException as exc:
|
||||
EMBEDDING_API_ERRORS_TOTAL.labels(
|
||||
model_name=self.model_name,
|
||||
api_type=self.api_type,
|
||||
error_type=type(exc).__name__,
|
||||
).inc()
|
||||
logger.error("OpenAI embedding request failed: %s", exc)
|
||||
raise
|
||||
|
||||
embeddings = self._parse_openai_response(data)
|
||||
|
||||
# Metrics
|
||||
EMBEDDINGS_GENERATED_TOTAL.labels(
|
||||
model_name=self.model_name,
|
||||
api_type=self.api_type,
|
||||
content_type="text",
|
||||
).inc(len(embeddings))
|
||||
EMBEDDING_TOKENS_TOTAL.labels(model_name=self.model_name).inc(
|
||||
sum(len(t.split()) for t in texts)
|
||||
)
|
||||
|
||||
self._log_usage(len(texts), sum(len(t.split()) for t in texts))
|
||||
|
||||
logger.debug(
|
||||
"OpenAI embedding response texts=%d dimensions=%d",
|
||||
len(embeddings),
|
||||
len(embeddings[0]) if embeddings else 0,
|
||||
)
|
||||
return embeddings
|
||||
|
||||
def _embed_openai_image(self, b64_image: str, mime_type: str) -> Optional[list[float]]:
|
||||
"""Embed an image via OpenAI-compatible multimodal endpoint."""
|
||||
url = f"{self.base_url}/embeddings"
|
||||
payload = {
|
||||
"input": [{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{b64_image}"}}],
|
||||
"model": self.model_name,
|
||||
}
|
||||
headers = self._openai_headers()
|
||||
|
||||
try:
|
||||
resp = requests.post(url, json=payload, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
embeddings = self._parse_openai_response(data)
|
||||
if embeddings:
|
||||
EMBEDDINGS_GENERATED_TOTAL.labels(
|
||||
model_name=self.model_name,
|
||||
api_type=self.api_type,
|
||||
content_type="image",
|
||||
).inc()
|
||||
return embeddings[0]
|
||||
except Exception as exc:
|
||||
EMBEDDING_API_ERRORS_TOTAL.labels(
|
||||
model_name=self.model_name,
|
||||
api_type=self.api_type,
|
||||
error_type=type(exc).__name__,
|
||||
).inc()
|
||||
logger.warning("Image embedding failed: %s", exc)
|
||||
|
||||
return None
|
||||
|
||||
def _openai_headers(self) -> dict[str, str]:
|
||||
"""Build headers for OpenAI-compatible requests."""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.api.api_key:
|
||||
headers["Authorization"] = f"Bearer {self.api.api_key}"
|
||||
return headers
|
||||
|
||||
def _parse_openai_response(self, data) -> list[list[float]]:
|
||||
"""
|
||||
Parse embedding response from various OpenAI-compatible formats.
|
||||
|
||||
Handles:
|
||||
- OpenAI standard: ``{"data": [{"embedding": [...], "index": 0}]}``
|
||||
- Direct list of dicts: ``[{"embedding": [...]}]``
|
||||
- Direct list of vectors: ``[[0.1, 0.2, ...]]``
|
||||
- Dict with embeddings key: ``{"embeddings": [[...]]}``
|
||||
|
||||
:param data: Parsed JSON response.
|
||||
:returns: List of embedding vectors.
|
||||
"""
|
||||
if isinstance(data, list):
|
||||
if data and isinstance(data[0], dict) and "embedding" in data[0]:
|
||||
return [item["embedding"] for item in data]
|
||||
return data
|
||||
|
||||
if isinstance(data, dict):
|
||||
if "data" in data:
|
||||
return [
|
||||
item["embedding"]
|
||||
for item in sorted(data["data"], key=lambda x: x.get("index", 0))
|
||||
]
|
||||
if "embedding" in data:
|
||||
return [data["embedding"]]
|
||||
if "embeddings" in data:
|
||||
return data["embeddings"]
|
||||
|
||||
raise ValueError(f"Unexpected embedding response format: {type(data)}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Amazon Bedrock backend
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _embed_bedrock_single(self, text: str) -> list[float]:
|
||||
"""Embed a single text via Bedrock Runtime InvokeModel."""
|
||||
url = f"{self.base_url}/model/{self.model_name}/invoke"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
body = {"inputText": text, "normalize": True}
|
||||
|
||||
# Include dimensions if the model supports configurable output
|
||||
if self.model.vector_dimensions:
|
||||
body["dimensions"] = self.model.vector_dimensions
|
||||
|
||||
logger.debug(
|
||||
"Bedrock embedding request model=%s text_len=%d",
|
||||
self.model_name,
|
||||
len(text),
|
||||
)
|
||||
|
||||
with EMBEDDING_BATCH_DURATION.labels(
|
||||
model_name=self.model_name, api_type=self.api_type
|
||||
).time():
|
||||
try:
|
||||
resp = requests.post(
|
||||
url, json=body, headers=headers, timeout=self.timeout
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.error(
|
||||
"Bedrock embedding failed status=%d body=%s",
|
||||
resp.status_code,
|
||||
resp.text[:500],
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except requests.RequestException as exc:
|
||||
EMBEDDING_API_ERRORS_TOTAL.labels(
|
||||
model_name=self.model_name,
|
||||
api_type=self.api_type,
|
||||
error_type=type(exc).__name__,
|
||||
).inc()
|
||||
logger.error("Bedrock embedding request failed: %s", exc)
|
||||
raise
|
||||
|
||||
embedding = data.get("embedding")
|
||||
if not embedding:
|
||||
raise ValueError(f"Bedrock response missing 'embedding' key: {list(data.keys())}")
|
||||
|
||||
token_count = data.get("inputTextTokenCount", len(text.split()))
|
||||
|
||||
EMBEDDINGS_GENERATED_TOTAL.labels(
|
||||
model_name=self.model_name,
|
||||
api_type=self.api_type,
|
||||
content_type="text",
|
||||
).inc()
|
||||
EMBEDDING_TOKENS_TOTAL.labels(model_name=self.model_name).inc(token_count)
|
||||
self._log_usage(1, token_count)
|
||||
|
||||
logger.debug(
|
||||
"Bedrock embedding response dimensions=%d tokens=%d",
|
||||
len(embedding),
|
||||
token_count,
|
||||
)
|
||||
return embedding
|
||||
|
||||
def _embed_bedrock_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Embed multiple texts via Bedrock (client-side loop).
|
||||
|
||||
Bedrock InvokeModel accepts one input at a time, so we loop.
|
||||
|
||||
:param texts: List of text strings.
|
||||
:returns: List of embedding vectors.
|
||||
"""
|
||||
embeddings = []
|
||||
for i, text in enumerate(texts):
|
||||
embedding = self._embed_bedrock_single(text)
|
||||
embeddings.append(embedding)
|
||||
if (i + 1) % 10 == 0:
|
||||
logger.debug(
|
||||
"Bedrock batch progress %d/%d", i + 1, len(texts)
|
||||
)
|
||||
return embeddings
|
||||
|
||||
def _embed_bedrock_image(self, b64_image: str, mime_type: str) -> Optional[list[float]]:
|
||||
"""Embed an image via Bedrock multimodal endpoint."""
|
||||
url = f"{self.base_url}/model/{self.model_name}/invoke"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
body = {
|
||||
"inputImage": b64_image,
|
||||
"normalize": True,
|
||||
}
|
||||
if self.model.vector_dimensions:
|
||||
body["dimensions"] = self.model.vector_dimensions
|
||||
|
||||
try:
|
||||
resp = requests.post(url, json=body, headers=headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
embedding = data.get("embedding")
|
||||
if embedding:
|
||||
EMBEDDINGS_GENERATED_TOTAL.labels(
|
||||
model_name=self.model_name,
|
||||
api_type=self.api_type,
|
||||
content_type="image",
|
||||
).inc()
|
||||
return embedding
|
||||
except Exception as exc:
|
||||
EMBEDDING_API_ERRORS_TOTAL.labels(
|
||||
model_name=self.model_name,
|
||||
api_type=self.api_type,
|
||||
error_type=type(exc).__name__,
|
||||
).inc()
|
||||
logger.warning("Bedrock image embedding failed: %s", exc)
|
||||
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Usage tracking
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _log_usage(self, text_count: int, token_count: int):
|
||||
"""
|
||||
Log embedding usage to LLMUsage model.
|
||||
|
||||
:param text_count: Number of texts embedded.
|
||||
:param token_count: Approximate token count.
|
||||
"""
|
||||
try:
|
||||
from llm_manager.models import LLMUsage
|
||||
|
||||
LLMUsage.objects.create(
|
||||
model=self.model,
|
||||
user=self.user,
|
||||
input_tokens=token_count,
|
||||
output_tokens=0,
|
||||
cached_tokens=0,
|
||||
total_cost=(token_count / 1000) * float(self.model.input_cost_per_1k),
|
||||
purpose="embeddings",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to log embedding usage: %s", exc)
|
||||
360
mnemosyne/library/services/parsers.py
Normal file
360
mnemosyne/library/services/parsers.py
Normal file
@@ -0,0 +1,360 @@
|
||||
"""
|
||||
Universal document parsing service using PyMuPDF.
|
||||
|
||||
Handles text extraction and image extraction for all supported formats:
|
||||
PDF, EPUB, DOCX, PPTX, XLSX, XPS, MOBI, FB2, CBZ, TXT, HTML, and images.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import fitz # PyMuPDF
|
||||
|
||||
from library.metrics import (
|
||||
DOCUMENT_PARSE_DURATION,
|
||||
DOCUMENTS_PARSED_TOTAL,
|
||||
IMAGES_EXTRACTED_TOTAL,
|
||||
)
|
||||
|
||||
from .text_utils import remove_excessive_whitespace, sanitize_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# File extensions supported by PyMuPDF
|
||||
PYMUPDF_EXTENSIONS = {
|
||||
"pdf", "epub", "xps", "mobi", "fb2", "cbz", "svg",
|
||||
"docx", "pptx", "xlsx", "hwpx",
|
||||
}
|
||||
|
||||
# Plain text extensions — read directly, no PyMuPDF needed
|
||||
PLAINTEXT_EXTENSIONS = {"txt", "md", "csv", "tsv", "log", "json", "yaml", "yml", "xml"}
|
||||
|
||||
# Image extensions — store as Image nodes directly
|
||||
IMAGE_EXTENSIONS = {"jpg", "jpeg", "png", "gif", "bmp", "tiff", "tif", "webp", "svg"}
|
||||
|
||||
# Minimum image dimensions to extract (skip tiny icons/bullets)
|
||||
MIN_IMAGE_WIDTH = 50
|
||||
MIN_IMAGE_HEIGHT = 50
|
||||
|
||||
|
||||
@dataclass
|
||||
class TextBlock:
|
||||
"""A block of extracted text with page/section context."""
|
||||
|
||||
text: str
|
||||
page: int
|
||||
metadata: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtractedImage:
|
||||
"""An image extracted from a document."""
|
||||
|
||||
data: bytes
|
||||
ext: str
|
||||
width: int
|
||||
height: int
|
||||
source_page: int
|
||||
source_index: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParseResult:
|
||||
"""Result of parsing a document: text blocks + images + metadata."""
|
||||
|
||||
text_blocks: list[TextBlock] = field(default_factory=list)
|
||||
images: list[ExtractedImage] = field(default_factory=list)
|
||||
metadata: dict = field(default_factory=dict)
|
||||
file_type: str = ""
|
||||
|
||||
|
||||
class DocumentParser:
|
||||
"""
|
||||
Universal document parser using PyMuPDF.
|
||||
|
||||
Extracts text and images from all supported document formats through
|
||||
a single unified interface.
|
||||
"""
|
||||
|
||||
def parse(self, file_path: str, file_type: str) -> ParseResult:
|
||||
"""
|
||||
Parse a document and extract text blocks and images.
|
||||
|
||||
:param file_path: Path to the document file.
|
||||
:param file_type: File extension (without dot), e.g. 'pdf', 'epub'.
|
||||
:returns: ParseResult with text blocks, images, and metadata.
|
||||
:raises ValueError: If the file type is not supported.
|
||||
"""
|
||||
file_type = file_type.lower().lstrip(".")
|
||||
|
||||
logger.info(
|
||||
"Parsing document file_type=%s path=%s",
|
||||
file_type,
|
||||
os.path.basename(file_path),
|
||||
)
|
||||
|
||||
if file_type in PLAINTEXT_EXTENSIONS:
|
||||
return self._parse_plaintext(file_path, file_type)
|
||||
|
||||
if file_type in IMAGE_EXTENSIONS:
|
||||
return self._parse_image_file(file_path, file_type)
|
||||
|
||||
if file_type in PYMUPDF_EXTENSIONS:
|
||||
return self._parse_with_pymupdf(file_path, file_type)
|
||||
|
||||
# HTML can be handled by PyMuPDF or direct read
|
||||
if file_type in ("html", "htm"):
|
||||
return self._parse_with_pymupdf(file_path, file_type)
|
||||
|
||||
raise ValueError(
|
||||
f"Unsupported file type '{file_type}'. "
|
||||
f"Supported: {sorted(PYMUPDF_EXTENSIONS | PLAINTEXT_EXTENSIONS | IMAGE_EXTENSIONS)}"
|
||||
)
|
||||
|
||||
def parse_bytes(self, data: bytes, file_type: str, filename: str = "") -> ParseResult:
|
||||
"""
|
||||
Parse document from bytes (e.g. from S3 download).
|
||||
|
||||
:param data: Raw file bytes.
|
||||
:param file_type: File extension (without dot).
|
||||
:param filename: Optional original filename for logging.
|
||||
:returns: ParseResult.
|
||||
"""
|
||||
suffix = f".{file_type.lower().lstrip('.')}"
|
||||
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
||||
tmp.write(data)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
logger.debug(
|
||||
"Parsing from bytes file_type=%s size=%d filename=%s",
|
||||
file_type,
|
||||
len(data),
|
||||
filename,
|
||||
)
|
||||
return self.parse(tmp_path, file_type)
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
def _parse_with_pymupdf(self, file_path: str, file_type: str) -> ParseResult:
|
||||
"""
|
||||
Parse a document using PyMuPDF for text and image extraction.
|
||||
|
||||
:param file_path: Path to the document.
|
||||
:param file_type: Normalized file extension.
|
||||
:returns: ParseResult.
|
||||
"""
|
||||
with DOCUMENT_PARSE_DURATION.labels(file_type=file_type).time():
|
||||
try:
|
||||
doc = fitz.open(file_path)
|
||||
except Exception as exc:
|
||||
DOCUMENTS_PARSED_TOTAL.labels(file_type=file_type, status="error").inc()
|
||||
logger.error("Failed to open document file_type=%s: %s", file_type, exc)
|
||||
raise
|
||||
|
||||
text_blocks: list[TextBlock] = []
|
||||
images: list[ExtractedImage] = []
|
||||
image_global_index = 0
|
||||
|
||||
for page_num in range(len(doc)):
|
||||
page = doc[page_num]
|
||||
|
||||
# --- Text extraction ---
|
||||
try:
|
||||
text = page.get_text("text")
|
||||
if text and text.strip():
|
||||
cleaned = sanitize_text(text, log_changes=False)
|
||||
cleaned = remove_excessive_whitespace(cleaned)
|
||||
if cleaned.strip():
|
||||
text_blocks.append(
|
||||
TextBlock(text=cleaned, page=page_num)
|
||||
)
|
||||
logger.debug(
|
||||
"Extracted text page=%d chars=%d",
|
||||
page_num,
|
||||
len(cleaned),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Text extraction failed page=%d: %s, continuing",
|
||||
page_num,
|
||||
exc,
|
||||
)
|
||||
|
||||
# --- Image extraction ---
|
||||
try:
|
||||
for img_info in page.get_images(full=True):
|
||||
xref = img_info[0]
|
||||
try:
|
||||
img_data = doc.extract_image(xref)
|
||||
if not img_data or not img_data.get("image"):
|
||||
continue
|
||||
|
||||
width = img_data.get("width", 0)
|
||||
height = img_data.get("height", 0)
|
||||
|
||||
# Skip tiny images (icons, bullets, etc.)
|
||||
if width < MIN_IMAGE_WIDTH or height < MIN_IMAGE_HEIGHT:
|
||||
logger.debug(
|
||||
"Skipping small image page=%d xref=%d size=%dx%d",
|
||||
page_num,
|
||||
xref,
|
||||
width,
|
||||
height,
|
||||
)
|
||||
continue
|
||||
|
||||
images.append(
|
||||
ExtractedImage(
|
||||
data=img_data["image"],
|
||||
ext=img_data.get("ext", "png"),
|
||||
width=width,
|
||||
height=height,
|
||||
source_page=page_num,
|
||||
source_index=image_global_index,
|
||||
)
|
||||
)
|
||||
image_global_index += 1
|
||||
|
||||
logger.debug(
|
||||
"Extracted image page=%d format=%s size=%dx%d bytes=%d",
|
||||
page_num,
|
||||
img_data.get("ext", "?"),
|
||||
width,
|
||||
height,
|
||||
len(img_data["image"]),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Image extraction failed page=%d xref=%d: %s",
|
||||
page_num,
|
||||
xref,
|
||||
exc,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Image listing failed page=%d: %s, continuing",
|
||||
page_num,
|
||||
exc,
|
||||
)
|
||||
|
||||
# Collect document metadata
|
||||
meta = doc.metadata or {}
|
||||
result_meta = {
|
||||
"page_count": len(doc),
|
||||
"title": meta.get("title", ""),
|
||||
"author": meta.get("author", ""),
|
||||
"subject": meta.get("subject", ""),
|
||||
"creator": meta.get("creator", ""),
|
||||
}
|
||||
|
||||
doc.close()
|
||||
|
||||
DOCUMENTS_PARSED_TOTAL.labels(file_type=file_type, status="success").inc()
|
||||
IMAGES_EXTRACTED_TOTAL.labels(file_type=file_type).inc(len(images))
|
||||
|
||||
logger.info(
|
||||
"Parsed document file_type=%s pages=%d text_blocks=%d images=%d",
|
||||
file_type,
|
||||
result_meta["page_count"],
|
||||
len(text_blocks),
|
||||
len(images),
|
||||
)
|
||||
|
||||
return ParseResult(
|
||||
text_blocks=text_blocks,
|
||||
images=images,
|
||||
metadata=result_meta,
|
||||
file_type=file_type,
|
||||
)
|
||||
|
||||
def _parse_plaintext(self, file_path: str, file_type: str) -> ParseResult:
|
||||
"""
|
||||
Parse a plain text file by direct read.
|
||||
|
||||
:param file_path: Path to the text file.
|
||||
:param file_type: Normalized file extension.
|
||||
:returns: ParseResult.
|
||||
"""
|
||||
with DOCUMENT_PARSE_DURATION.labels(file_type=file_type).time():
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
||||
content = f.read()
|
||||
except Exception as exc:
|
||||
DOCUMENTS_PARSED_TOTAL.labels(file_type=file_type, status="error").inc()
|
||||
logger.error("Failed to read text file file_type=%s: %s", file_type, exc)
|
||||
raise
|
||||
|
||||
cleaned = sanitize_text(content, log_changes=True)
|
||||
cleaned = remove_excessive_whitespace(cleaned)
|
||||
|
||||
text_blocks = []
|
||||
if cleaned.strip():
|
||||
text_blocks.append(TextBlock(text=cleaned, page=0))
|
||||
|
||||
DOCUMENTS_PARSED_TOTAL.labels(file_type=file_type, status="success").inc()
|
||||
|
||||
logger.info(
|
||||
"Parsed plaintext file_type=%s chars=%d",
|
||||
file_type,
|
||||
len(cleaned),
|
||||
)
|
||||
|
||||
return ParseResult(
|
||||
text_blocks=text_blocks,
|
||||
images=[],
|
||||
metadata={"page_count": 1},
|
||||
file_type=file_type,
|
||||
)
|
||||
|
||||
def _parse_image_file(self, file_path: str, file_type: str) -> ParseResult:
|
||||
"""
|
||||
Handle a standalone image file — store as a single ExtractedImage.
|
||||
|
||||
:param file_path: Path to the image file.
|
||||
:param file_type: Normalized file extension.
|
||||
:returns: ParseResult with one image and no text.
|
||||
"""
|
||||
with DOCUMENT_PARSE_DURATION.labels(file_type=file_type).time():
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
|
||||
with open(file_path, "rb") as f:
|
||||
data = f.read()
|
||||
|
||||
img = PILImage.open(file_path)
|
||||
width, height = img.size
|
||||
img.close()
|
||||
except Exception as exc:
|
||||
DOCUMENTS_PARSED_TOTAL.labels(file_type=file_type, status="error").inc()
|
||||
logger.error("Failed to read image file_type=%s: %s", file_type, exc)
|
||||
raise
|
||||
|
||||
DOCUMENTS_PARSED_TOTAL.labels(file_type=file_type, status="success").inc()
|
||||
IMAGES_EXTRACTED_TOTAL.labels(file_type=file_type).inc(1)
|
||||
|
||||
logger.info(
|
||||
"Parsed image file file_type=%s size=%dx%d bytes=%d",
|
||||
file_type,
|
||||
width,
|
||||
height,
|
||||
len(data),
|
||||
)
|
||||
|
||||
return ParseResult(
|
||||
text_blocks=[],
|
||||
images=[
|
||||
ExtractedImage(
|
||||
data=data,
|
||||
ext=file_type,
|
||||
width=width,
|
||||
height=height,
|
||||
source_page=0,
|
||||
source_index=0,
|
||||
)
|
||||
],
|
||||
metadata={"page_count": 0, "width": width, "height": height},
|
||||
file_type=file_type,
|
||||
)
|
||||
581
mnemosyne/library/services/pipeline.py
Normal file
581
mnemosyne/library/services/pipeline.py
Normal file
@@ -0,0 +1,581 @@
|
||||
"""
|
||||
Embedding pipeline orchestrator.
|
||||
|
||||
Coordinates the full ingestion flow:
|
||||
parse → chunk → embed → store → graph construction.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from django.core.files.base import ContentFile
|
||||
from django.core.files.storage import default_storage
|
||||
|
||||
from library.metrics import (
|
||||
PIPELINE_DURATION,
|
||||
PIPELINE_ITEMS_IN_PROGRESS,
|
||||
PIPELINE_ITEMS_TOTAL,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# S3 key patterns
|
||||
ORIGINAL_S3_KEY = "items/{item_uid}/original.{ext}"
|
||||
CHUNK_S3_KEY = "chunks/{item_uid}/chunk_{index}.txt"
|
||||
IMAGE_S3_KEY = "images/{item_uid}/{index}.{ext}"
|
||||
|
||||
# Batch sizes
|
||||
EMBEDDING_BATCH_SIZE = getattr(settings, "EMBEDDING_BATCH_SIZE", 8)
|
||||
|
||||
|
||||
class EmbeddingPipeline:
|
||||
"""
|
||||
Orchestrates the complete embedding pipeline for a single Item.
|
||||
|
||||
Stages:
|
||||
1. Parse document (text + images)
|
||||
2. Chunk text (content-type-aware)
|
||||
3. Store chunks in S3 + Neo4j
|
||||
4. Embed text chunks
|
||||
5. Store images in S3 + Neo4j
|
||||
6. Embed images (multimodal, if available)
|
||||
7. Extract concepts (if system chat model available)
|
||||
"""
|
||||
|
||||
def __init__(self, user=None):
|
||||
"""
|
||||
:param user: Optional Django user for usage tracking.
|
||||
"""
|
||||
self.user = user
|
||||
|
||||
def process_item(
|
||||
self,
|
||||
item_uid: str,
|
||||
progress_callback: Optional[callable] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Run the full embedding pipeline for an Item.
|
||||
|
||||
:param item_uid: UID of the Item node to process.
|
||||
:param progress_callback: Optional callback(percent, message).
|
||||
:returns: Dict with processing results.
|
||||
:raises ValueError: If item not found or no embedding model configured.
|
||||
"""
|
||||
from library.models import Item
|
||||
|
||||
start_time = time.time()
|
||||
PIPELINE_ITEMS_IN_PROGRESS.inc()
|
||||
|
||||
try:
|
||||
item = Item.nodes.get(uid=item_uid)
|
||||
except Exception:
|
||||
PIPELINE_ITEMS_TOTAL.labels(status="failed").inc()
|
||||
PIPELINE_ITEMS_IN_PROGRESS.dec()
|
||||
raise ValueError(f"Item not found: {item_uid}")
|
||||
|
||||
logger.info(
|
||||
"Pipeline starting item_uid=%s title='%s' file_type=%s",
|
||||
item_uid,
|
||||
item.title,
|
||||
item.file_type,
|
||||
)
|
||||
|
||||
# Mark as processing
|
||||
item.embedding_status = "processing"
|
||||
item.error_message = ""
|
||||
item.save()
|
||||
|
||||
try:
|
||||
result = self._run_pipeline(item, progress_callback)
|
||||
|
||||
# Mark as completed
|
||||
item.embedding_status = "completed"
|
||||
item.chunk_count = result.get("chunks_created", 0)
|
||||
item.image_count = result.get("images_stored", 0)
|
||||
item.embedding_model_name = result.get("model_name", "")
|
||||
item.error_message = ""
|
||||
item.save()
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
PIPELINE_ITEMS_TOTAL.labels(status="completed").inc()
|
||||
PIPELINE_DURATION.observe(elapsed)
|
||||
|
||||
logger.info(
|
||||
"Pipeline completed item_uid=%s chunks=%d images=%d concepts=%d elapsed=%.2fs",
|
||||
item_uid,
|
||||
result.get("chunks_created", 0),
|
||||
result.get("images_stored", 0),
|
||||
result.get("concepts_extracted", 0),
|
||||
elapsed,
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(100, "Completed")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as exc:
|
||||
item.embedding_status = "failed"
|
||||
item.error_message = str(exc)[:500]
|
||||
item.save()
|
||||
|
||||
PIPELINE_ITEMS_TOTAL.labels(status="failed").inc()
|
||||
logger.error(
|
||||
"Pipeline failed item_uid=%s: %s",
|
||||
item_uid,
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
raise
|
||||
|
||||
finally:
|
||||
PIPELINE_ITEMS_IN_PROGRESS.dec()
|
||||
|
||||
def _run_pipeline(self, item, progress_callback) -> dict:
|
||||
"""
|
||||
Execute pipeline stages sequentially.
|
||||
|
||||
:param item: Item node instance.
|
||||
:param progress_callback: Optional progress callback.
|
||||
:returns: Results dict.
|
||||
"""
|
||||
from llm_manager.models import LLMModel
|
||||
|
||||
from .chunker import ContentTypeChunker
|
||||
from .concepts import ConceptExtractor
|
||||
from .embedding_client import EmbeddingClient
|
||||
from .parsers import DocumentParser
|
||||
|
||||
result = {
|
||||
"chunks_created": 0,
|
||||
"chunks_embedded": 0,
|
||||
"images_stored": 0,
|
||||
"images_embedded": 0,
|
||||
"concepts_extracted": 0,
|
||||
"model_name": "",
|
||||
}
|
||||
|
||||
# --- Resolve library context ---
|
||||
library = self._get_item_library(item)
|
||||
chunking_config = library.chunking_config if library else {}
|
||||
embedding_instruction = library.embedding_instruction if library else ""
|
||||
library_type = library.library_type if library else ""
|
||||
|
||||
# --- Get system embedding model ---
|
||||
embedding_model = LLMModel.get_system_embedding_model()
|
||||
if not embedding_model:
|
||||
raise ValueError(
|
||||
"No system embedding model configured. "
|
||||
"Set one via Django admin > LLM Models > Set as System Embedding Model."
|
||||
)
|
||||
|
||||
result["model_name"] = embedding_model.name
|
||||
embed_client = EmbeddingClient(embedding_model, user=self.user)
|
||||
|
||||
# --- Check dimension compatibility ---
|
||||
if embedding_model.vector_dimensions:
|
||||
self._check_dimension_compatibility(embedding_model.vector_dimensions)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(5, "Parsing document")
|
||||
|
||||
# --- Stage 1: Parse ---
|
||||
parser = DocumentParser()
|
||||
file_data = self._read_item_from_s3(item)
|
||||
if not file_data:
|
||||
logger.warning("No file data for item_uid=%s, skipping", item.uid)
|
||||
return result
|
||||
|
||||
parse_result = parser.parse_bytes(
|
||||
file_data,
|
||||
item.file_type,
|
||||
filename=item.title,
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(20, "Chunking text")
|
||||
|
||||
# --- Stage 2: Chunk ---
|
||||
chunker = ContentTypeChunker()
|
||||
chunk_result = chunker.chunk(parse_result, chunking_config, library_type)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(30, "Storing chunks")
|
||||
|
||||
# --- Stage 3: Store chunks in S3 + Neo4j ---
|
||||
chunk_nodes = self._store_chunks(item, chunk_result)
|
||||
result["chunks_created"] = len(chunk_nodes)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(40, "Embedding text chunks")
|
||||
|
||||
# --- Stage 4: Embed text chunks ---
|
||||
if chunk_result.chunks:
|
||||
self._embed_chunks(
|
||||
item,
|
||||
chunk_nodes,
|
||||
chunk_result.chunks,
|
||||
embed_client,
|
||||
embedding_instruction,
|
||||
progress_callback,
|
||||
)
|
||||
result["chunks_embedded"] = len(chunk_nodes)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(70, "Storing images")
|
||||
|
||||
# --- Stage 5: Store images ---
|
||||
image_nodes = self._store_images(item, parse_result.images)
|
||||
result["images_stored"] = len(image_nodes)
|
||||
|
||||
# Associate images with nearby chunks
|
||||
self._associate_images_with_chunks(
|
||||
chunk_nodes, image_nodes, chunk_result, parse_result
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(80, "Embedding images")
|
||||
|
||||
# --- Stage 6: Embed images (multimodal) ---
|
||||
if image_nodes and embedding_model.supports_multimodal:
|
||||
embedded_count = self._embed_images(image_nodes, embed_client)
|
||||
result["images_embedded"] = embedded_count
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(90, "Extracting concepts")
|
||||
|
||||
# --- Stage 7: Concept extraction ---
|
||||
chat_model = LLMModel.get_system_chat_model()
|
||||
if chat_model and chunk_result.chunks:
|
||||
extractor = ConceptExtractor(chat_model, user=self.user)
|
||||
concepts_count = extractor.extract_for_item(
|
||||
item, chunk_nodes, chunk_result.chunks
|
||||
)
|
||||
result["concepts_extracted"] = concepts_count
|
||||
|
||||
# Update content hash to prevent redundant re-processing
|
||||
if file_data:
|
||||
item.content_hash = hashlib.sha256(file_data).hexdigest()
|
||||
item.save()
|
||||
|
||||
return result
|
||||
|
||||
def _get_item_library(self, item):
|
||||
"""
|
||||
Walk the graph to find the Library containing this Item.
|
||||
|
||||
:param item: Item node.
|
||||
:returns: Library node, or None.
|
||||
"""
|
||||
from library.models import Collection
|
||||
|
||||
try:
|
||||
# Item <- Collection <- Library
|
||||
from neomodel import db
|
||||
|
||||
results, _ = db.cypher_query(
|
||||
"MATCH (l:Library)-[:CONTAINS]->(c:Collection)-[:CONTAINS]->(i:Item {uid: $uid}) "
|
||||
"RETURN l.uid, l.library_type, l.chunking_config, l.embedding_instruction",
|
||||
{"uid": item.uid},
|
||||
)
|
||||
if results:
|
||||
from library.models import Library
|
||||
|
||||
return Library.nodes.get(uid=results[0][0])
|
||||
except Exception as exc:
|
||||
logger.warning("Could not resolve library for item_uid=%s: %s", item.uid, exc)
|
||||
|
||||
return None
|
||||
|
||||
def _read_item_from_s3(self, item) -> Optional[bytes]:
|
||||
"""
|
||||
Read the original file from S3 storage.
|
||||
|
||||
:param item: Item node with s3_key.
|
||||
:returns: File bytes, or None.
|
||||
"""
|
||||
if not item.s3_key:
|
||||
logger.warning("Item has no s3_key item_uid=%s", item.uid)
|
||||
return None
|
||||
|
||||
try:
|
||||
with default_storage.open(item.s3_key, "rb") as f:
|
||||
data = f.read()
|
||||
logger.debug(
|
||||
"Read item from S3 key=%s size=%d", item.s3_key, len(data)
|
||||
)
|
||||
return data
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"Failed to read from S3 key=%s: %s", item.s3_key, exc
|
||||
)
|
||||
raise
|
||||
|
||||
def _store_chunks(self, item, chunk_result) -> list:
|
||||
"""
|
||||
Store chunk text in S3 and create Chunk nodes in Neo4j.
|
||||
|
||||
:param item: Item node.
|
||||
:param chunk_result: ChunkResult from chunker.
|
||||
:returns: List of Chunk node instances.
|
||||
"""
|
||||
from library.models import Chunk
|
||||
|
||||
# Delete existing chunks for this item
|
||||
for old_chunk in item.chunks.all():
|
||||
# Clean up S3
|
||||
try:
|
||||
default_storage.delete(old_chunk.chunk_s3_key)
|
||||
except Exception:
|
||||
pass
|
||||
old_chunk.delete()
|
||||
|
||||
chunk_nodes = []
|
||||
for idx, chunk_text in enumerate(chunk_result.chunks):
|
||||
s3_key = CHUNK_S3_KEY.format(item_uid=item.uid, index=idx)
|
||||
|
||||
# Store chunk text in S3
|
||||
try:
|
||||
default_storage.save(s3_key, ContentFile(chunk_text.encode("utf-8")))
|
||||
except Exception as exc:
|
||||
logger.error("Failed to store chunk %d to S3: %s", idx, exc)
|
||||
raise
|
||||
|
||||
# Create Chunk node
|
||||
chunk_node = Chunk(
|
||||
chunk_index=idx,
|
||||
chunk_s3_key=s3_key,
|
||||
chunk_size=len(chunk_text),
|
||||
text_preview=chunk_text[:500],
|
||||
)
|
||||
chunk_node.save()
|
||||
item.chunks.connect(chunk_node)
|
||||
chunk_nodes.append(chunk_node)
|
||||
|
||||
logger.info(
|
||||
"Stored %d chunks for item_uid=%s", len(chunk_nodes), item.uid
|
||||
)
|
||||
return chunk_nodes
|
||||
|
||||
def _embed_chunks(
|
||||
self,
|
||||
item,
|
||||
chunk_nodes: list,
|
||||
chunk_texts: list[str],
|
||||
embed_client,
|
||||
embedding_instruction: str,
|
||||
progress_callback: Optional[callable],
|
||||
):
|
||||
"""
|
||||
Generate embeddings for chunks and update Chunk nodes.
|
||||
|
||||
:param item: Item node.
|
||||
:param chunk_nodes: List of Chunk nodes.
|
||||
:param chunk_texts: List of chunk text strings.
|
||||
:param embed_client: EmbeddingClient instance.
|
||||
:param embedding_instruction: Instruction prefix for embedding.
|
||||
:param progress_callback: Optional progress callback.
|
||||
"""
|
||||
# Prepend embedding instruction if configured
|
||||
if embedding_instruction:
|
||||
texts_to_embed = [
|
||||
f"{embedding_instruction}\n\n{text}" for text in chunk_texts
|
||||
]
|
||||
else:
|
||||
texts_to_embed = chunk_texts
|
||||
|
||||
batch_size = EMBEDDING_BATCH_SIZE
|
||||
total_batches = (len(texts_to_embed) + batch_size - 1) // batch_size
|
||||
|
||||
for batch_idx in range(0, len(texts_to_embed), batch_size):
|
||||
batch_texts = texts_to_embed[batch_idx : batch_idx + batch_size]
|
||||
batch_nodes = chunk_nodes[batch_idx : batch_idx + batch_size]
|
||||
batch_num = batch_idx // batch_size + 1
|
||||
|
||||
logger.debug(
|
||||
"Embedding batch %d/%d size=%d item_uid=%s",
|
||||
batch_num,
|
||||
total_batches,
|
||||
len(batch_texts),
|
||||
item.uid,
|
||||
)
|
||||
|
||||
embeddings = embed_client.embed_texts(batch_texts)
|
||||
|
||||
for node, embedding in zip(batch_nodes, embeddings):
|
||||
node.embedding = embedding
|
||||
node.save()
|
||||
|
||||
if progress_callback:
|
||||
pct = 40 + (30 * (batch_idx + len(batch_texts)) / len(texts_to_embed))
|
||||
progress_callback(
|
||||
int(pct),
|
||||
f"Embedded {batch_idx + len(batch_texts)}/{len(texts_to_embed)} chunks",
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Embedded %d chunks for item_uid=%s", len(chunk_nodes), item.uid
|
||||
)
|
||||
|
||||
def _store_images(self, item, extracted_images) -> list:
|
||||
"""
|
||||
Store extracted images in S3 and create Image nodes in Neo4j.
|
||||
|
||||
:param item: Item node.
|
||||
:param extracted_images: List of ExtractedImage from parser.
|
||||
:returns: List of Image node instances.
|
||||
"""
|
||||
from library.models import Image
|
||||
|
||||
# Delete existing images for this item
|
||||
for old_image in item.images.all():
|
||||
try:
|
||||
default_storage.delete(old_image.s3_key)
|
||||
except Exception:
|
||||
pass
|
||||
old_image.delete()
|
||||
|
||||
image_nodes = []
|
||||
for img in extracted_images:
|
||||
s3_key = IMAGE_S3_KEY.format(
|
||||
item_uid=item.uid,
|
||||
index=img.source_index,
|
||||
ext=img.ext,
|
||||
)
|
||||
|
||||
try:
|
||||
default_storage.save(s3_key, ContentFile(img.data))
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Failed to store image %d to S3: %s", img.source_index, exc
|
||||
)
|
||||
continue
|
||||
|
||||
image_node = Image(
|
||||
s3_key=s3_key,
|
||||
image_type="diagram", # Default; could be refined by content analysis
|
||||
metadata={
|
||||
"width": img.width,
|
||||
"height": img.height,
|
||||
"source_page": img.source_page,
|
||||
"content_type": f"image/{img.ext}",
|
||||
},
|
||||
)
|
||||
image_node.save()
|
||||
item.images.connect(image_node)
|
||||
image_nodes.append(image_node)
|
||||
|
||||
if image_nodes:
|
||||
logger.info(
|
||||
"Stored %d images for item_uid=%s", len(image_nodes), item.uid
|
||||
)
|
||||
|
||||
return image_nodes
|
||||
|
||||
def _associate_images_with_chunks(
|
||||
self, chunk_nodes, image_nodes, chunk_result, parse_result
|
||||
):
|
||||
"""
|
||||
Create HAS_NEARBY_IMAGE relationships between chunks and images.
|
||||
|
||||
Associates images with chunks from the same page/section.
|
||||
|
||||
:param chunk_nodes: List of Chunk nodes.
|
||||
:param image_nodes: List of Image nodes.
|
||||
:param chunk_result: ChunkResult with page mapping.
|
||||
:param parse_result: ParseResult with image source pages.
|
||||
"""
|
||||
if not chunk_nodes or not image_nodes:
|
||||
return
|
||||
|
||||
# Build page -> images mapping
|
||||
page_images: dict[int, list] = {}
|
||||
for img_node, ext_img in zip(image_nodes, parse_result.images):
|
||||
page_images.setdefault(ext_img.source_page, []).append(img_node)
|
||||
|
||||
# Connect chunks to images on the same page
|
||||
connected = 0
|
||||
for chunk_idx, chunk_node in enumerate(chunk_nodes):
|
||||
page = chunk_result.chunk_page_map.get(chunk_idx, -1)
|
||||
nearby = page_images.get(page, [])
|
||||
for img_node in nearby:
|
||||
chunk_node.nearby_images.connect(
|
||||
img_node, {"proximity": "same_page"}
|
||||
)
|
||||
connected += 1
|
||||
|
||||
if connected:
|
||||
logger.debug(
|
||||
"Created %d chunk-image associations", connected
|
||||
)
|
||||
|
||||
def _embed_images(self, image_nodes: list, embed_client) -> int:
|
||||
"""
|
||||
Generate multimodal embeddings for Image nodes.
|
||||
|
||||
:param image_nodes: List of Image nodes.
|
||||
:param embed_client: EmbeddingClient with multimodal support.
|
||||
:returns: Number of images successfully embedded.
|
||||
"""
|
||||
from library.models import ImageEmbedding
|
||||
|
||||
embedded_count = 0
|
||||
for img_node in image_nodes:
|
||||
try:
|
||||
img_data = default_storage.open(img_node.s3_key, "rb").read()
|
||||
ext = img_node.s3_key.rsplit(".", 1)[-1] if "." in img_node.s3_key else "png"
|
||||
|
||||
embedding = embed_client.embed_image(img_data, ext)
|
||||
if embedding:
|
||||
emb_node = ImageEmbedding(embedding=embedding)
|
||||
emb_node.save()
|
||||
img_node.embeddings.connect(emb_node)
|
||||
embedded_count += 1
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Image embedding failed s3_key=%s: %s",
|
||||
img_node.s3_key,
|
||||
exc,
|
||||
)
|
||||
|
||||
if embedded_count:
|
||||
logger.info("Embedded %d images", embedded_count)
|
||||
|
||||
return embedded_count
|
||||
|
||||
def _check_dimension_compatibility(self, model_dimensions: int):
|
||||
"""
|
||||
Check if the model's vector dimensions match the Neo4j index.
|
||||
|
||||
:param model_dimensions: Expected embedding dimensions.
|
||||
"""
|
||||
# Log a warning — actual enforcement is in setup_neo4j_indexes
|
||||
logger.debug(
|
||||
"System embedding model dimensions=%d", model_dimensions
|
||||
)
|
||||
|
||||
def reprocess_item(self, item_uid: str, progress_callback=None) -> dict:
|
||||
"""
|
||||
Re-embed an item: delete existing chunks/images, then re-process.
|
||||
|
||||
:param item_uid: UID of the Item to re-embed.
|
||||
:param progress_callback: Optional progress callback.
|
||||
:returns: Processing results dict.
|
||||
"""
|
||||
from library.models import Item
|
||||
|
||||
try:
|
||||
item = Item.nodes.get(uid=item_uid)
|
||||
except Exception:
|
||||
raise ValueError(f"Item not found: {item_uid}")
|
||||
|
||||
# Clear content hash to force re-processing
|
||||
item.content_hash = ""
|
||||
item.save()
|
||||
|
||||
logger.info("Re-processing item_uid=%s title='%s'", item_uid, item.title)
|
||||
return self.process_item(item_uid, progress_callback)
|
||||
165
mnemosyne/library/services/text_utils.py
Normal file
165
mnemosyne/library/services/text_utils.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""
|
||||
Text sanitization utilities for the embedding pipeline.
|
||||
|
||||
Ported from Spelunker's text_utils.py — ensures text can be safely
|
||||
processed by embedding APIs and LLMs.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import unicodedata
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Common PDF ligatures
|
||||
_LIGATURE_MAP = {
|
||||
"\ufb01": "fi",
|
||||
"\ufb02": "fl",
|
||||
"\ufb00": "ff",
|
||||
"\ufb03": "ffi",
|
||||
"\ufb04": "ffl",
|
||||
"\ufb05": "ft",
|
||||
"\ufb06": "st",
|
||||
}
|
||||
|
||||
# Common special characters from PDF extraction
|
||||
_SPECIAL_CHAR_MAP = {
|
||||
"\u2018": "'", # left single quotation
|
||||
"\u2019": "'", # right single quotation
|
||||
"\u201c": '"', # left double quotation
|
||||
"\u201d": '"', # right double quotation
|
||||
"\u2013": "-", # en dash
|
||||
"\u2014": "-", # em dash
|
||||
"\u2026": "...", # horizontal ellipsis
|
||||
"\u00a0": " ", # non-breaking space
|
||||
}
|
||||
|
||||
# Zero-width characters
|
||||
_ZERO_WIDTH_CHARS = [
|
||||
"\u200b", # zero-width space
|
||||
"\u200c", # zero-width non-joiner
|
||||
"\u200d", # zero-width joiner
|
||||
"\ufeff", # zero-width no-break space (BOM)
|
||||
]
|
||||
|
||||
# Control characters pattern (exclude newline, tab, carriage return)
|
||||
_CONTROL_CHAR_RE = re.compile(r"[\x00-\x08\x0b-\x0c\x0e-\x1f\x7f-\x9f]")
|
||||
|
||||
|
||||
def sanitize_text(text: str, log_changes: bool = True) -> str:
|
||||
"""
|
||||
Sanitize text for embedding APIs by removing problematic characters.
|
||||
|
||||
Addresses common issues that cause "invalid tokens" errors:
|
||||
null bytes, control characters, zero-width characters, invalid UTF-8,
|
||||
and non-normalized Unicode.
|
||||
|
||||
:param text: Text to sanitize.
|
||||
:param log_changes: Whether to log sanitization actions.
|
||||
:returns: Sanitized text safe for tokenization.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
original_length = len(text)
|
||||
changes: list[str] = []
|
||||
|
||||
# 1. Remove null bytes
|
||||
if "\x00" in text:
|
||||
text = text.replace("\x00", "")
|
||||
changes.append("removed null bytes")
|
||||
|
||||
# 2. Remove control characters
|
||||
if _CONTROL_CHAR_RE.search(text):
|
||||
text = _CONTROL_CHAR_RE.sub("", text)
|
||||
changes.append("removed control characters")
|
||||
|
||||
# 3. Remove zero-width characters
|
||||
for char in _ZERO_WIDTH_CHARS:
|
||||
if char in text:
|
||||
text = text.replace(char, "")
|
||||
changes.append("removed zero-width characters")
|
||||
break
|
||||
|
||||
# 4. Normalize Unicode to NFC form
|
||||
normalized = unicodedata.normalize("NFC", text)
|
||||
if normalized != text:
|
||||
text = normalized
|
||||
changes.append("normalized Unicode to NFC")
|
||||
|
||||
# 5. Replace invalid UTF-8 sequences
|
||||
try:
|
||||
text = text.encode("utf-8", errors="replace").decode("utf-8")
|
||||
if "\ufffd" in text:
|
||||
changes.append("replaced invalid UTF-8 sequences")
|
||||
except Exception as exc:
|
||||
logger.warning("Error during UTF-8 validation: %s", exc)
|
||||
|
||||
# 6. Clean PDF artifacts (ligatures, special chars)
|
||||
cleaned = clean_pdf_artifacts(text)
|
||||
if cleaned != text:
|
||||
text = cleaned
|
||||
changes.append("cleaned PDF artifacts")
|
||||
|
||||
if log_changes and changes:
|
||||
chars_removed = original_length - len(text)
|
||||
logger.info(
|
||||
"Text sanitization: %s original_length=%d final_length=%d chars_removed=%d",
|
||||
", ".join(changes),
|
||||
original_length,
|
||||
len(text),
|
||||
chars_removed,
|
||||
)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def clean_pdf_artifacts(text: str) -> str:
|
||||
"""
|
||||
Clean common PDF extraction artifacts.
|
||||
|
||||
Replaces ligatures and special characters with standard equivalents.
|
||||
|
||||
:param text: Text to clean.
|
||||
:returns: Cleaned text.
|
||||
"""
|
||||
for ligature, replacement in _LIGATURE_MAP.items():
|
||||
text = text.replace(ligature, replacement)
|
||||
|
||||
for special, replacement in _SPECIAL_CHAR_MAP.items():
|
||||
text = text.replace(special, replacement)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def remove_excessive_whitespace(text: str) -> str:
|
||||
"""
|
||||
Remove excessive whitespace while preserving paragraph structure.
|
||||
|
||||
:param text: Text to clean.
|
||||
:returns: Text with normalized whitespace.
|
||||
"""
|
||||
text = re.sub(r" +", " ", text)
|
||||
text = re.sub(r"\n\n+", "\n\n", text)
|
||||
text = "\n".join(line.strip() for line in text.split("\n"))
|
||||
return text.strip()
|
||||
|
||||
|
||||
def truncate_text(text: str, max_chars: int, suffix: str = "...") -> str:
|
||||
"""
|
||||
Truncate text to a maximum length preserving word boundaries.
|
||||
|
||||
:param text: Text to truncate.
|
||||
:param max_chars: Maximum number of characters.
|
||||
:param suffix: Suffix to add if truncated.
|
||||
:returns: Truncated text.
|
||||
"""
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
|
||||
truncate_at = text.rfind(" ", 0, max_chars - len(suffix))
|
||||
if truncate_at == -1:
|
||||
truncate_at = max_chars - len(suffix)
|
||||
|
||||
return text[:truncate_at] + suffix
|
||||
282
mnemosyne/library/tasks.py
Normal file
282
mnemosyne/library/tasks.py
Normal file
@@ -0,0 +1,282 @@
|
||||
"""
|
||||
Celery tasks for the embedding pipeline.
|
||||
|
||||
All tasks pass UIDs (not model instances) per Red Panda Standards.
|
||||
Tasks are idempotent, include retry logic, and track progress
|
||||
via Memcached: library:task:{task_id}:progress.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from celery import shared_task
|
||||
from django.core.cache import cache
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache key pattern for task progress
|
||||
PROGRESS_KEY = "library:task:{task_id}:progress"
|
||||
|
||||
|
||||
def _update_progress(task, percent: int, message: str):
|
||||
"""
|
||||
Update task progress in Memcached and Celery state.
|
||||
|
||||
:param task: Celery task instance (self).
|
||||
:param percent: Progress percentage (0-100).
|
||||
:param message: Human-readable status message.
|
||||
"""
|
||||
try:
|
||||
task.update_state(state="PROGRESS", meta={"percent": percent, "message": message})
|
||||
cache.set(
|
||||
PROGRESS_KEY.format(task_id=task.request.id),
|
||||
{"percent": percent, "message": message},
|
||||
timeout=3600,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@shared_task(
|
||||
name="library.tasks.embed_item",
|
||||
bind=True,
|
||||
queue="embedding",
|
||||
max_retries=3,
|
||||
default_retry_delay=60,
|
||||
acks_late=True,
|
||||
)
|
||||
def embed_item(self, item_uid: str, user_id: int = None):
|
||||
"""
|
||||
Run the full embedding pipeline for a single Item.
|
||||
|
||||
:param item_uid: UID of the Item node to process.
|
||||
:param user_id: Optional user ID for usage tracking.
|
||||
:returns: Dict with processing results.
|
||||
"""
|
||||
logger.info("Task embed_item starting item_uid=%s task_id=%s", item_uid, self.request.id)
|
||||
|
||||
try:
|
||||
from library.services.pipeline import EmbeddingPipeline
|
||||
|
||||
user = _resolve_user(user_id)
|
||||
pipeline = EmbeddingPipeline(user=user)
|
||||
|
||||
def progress_cb(percent, message):
|
||||
_update_progress(self, percent, message)
|
||||
|
||||
result = pipeline.process_item(item_uid, progress_callback=progress_cb)
|
||||
|
||||
logger.info(
|
||||
"Task embed_item completed item_uid=%s chunks=%d images=%d",
|
||||
item_uid,
|
||||
result.get("chunks_created", 0),
|
||||
result.get("images_stored", 0),
|
||||
)
|
||||
return {"success": True, "item_uid": item_uid, **result}
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"Task embed_item failed item_uid=%s: %s",
|
||||
item_uid,
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
# Retry on transient errors
|
||||
if self.request.retries < self.max_retries:
|
||||
raise self.retry(exc=exc)
|
||||
return {"success": False, "item_uid": item_uid, "error": str(exc)}
|
||||
|
||||
|
||||
@shared_task(
|
||||
name="library.tasks.reembed_item",
|
||||
bind=True,
|
||||
queue="embedding",
|
||||
max_retries=3,
|
||||
default_retry_delay=60,
|
||||
acks_late=True,
|
||||
)
|
||||
def reembed_item(self, item_uid: str, user_id: int = None):
|
||||
"""
|
||||
Delete existing embeddings and re-process an Item.
|
||||
|
||||
:param item_uid: UID of the Item node to re-embed.
|
||||
:param user_id: Optional user ID for usage tracking.
|
||||
:returns: Dict with processing results.
|
||||
"""
|
||||
logger.info("Task reembed_item starting item_uid=%s", item_uid)
|
||||
|
||||
try:
|
||||
from library.services.pipeline import EmbeddingPipeline
|
||||
|
||||
user = _resolve_user(user_id)
|
||||
pipeline = EmbeddingPipeline(user=user)
|
||||
|
||||
def progress_cb(percent, message):
|
||||
_update_progress(self, percent, message)
|
||||
|
||||
result = pipeline.reprocess_item(item_uid, progress_callback=progress_cb)
|
||||
|
||||
logger.info("Task reembed_item completed item_uid=%s", item_uid)
|
||||
return {"success": True, "item_uid": item_uid, **result}
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("Task reembed_item failed item_uid=%s: %s", item_uid, exc, exc_info=True)
|
||||
if self.request.retries < self.max_retries:
|
||||
raise self.retry(exc=exc)
|
||||
return {"success": False, "item_uid": item_uid, "error": str(exc)}
|
||||
|
||||
|
||||
@shared_task(
|
||||
name="library.tasks.embed_collection",
|
||||
bind=True,
|
||||
queue="batch",
|
||||
acks_late=True,
|
||||
)
|
||||
def embed_collection(self, collection_uid: str, user_id: int = None):
|
||||
"""
|
||||
Embed all items in a collection.
|
||||
|
||||
:param collection_uid: UID of the Collection node.
|
||||
:param user_id: Optional user ID for usage tracking.
|
||||
:returns: Dict with summary results.
|
||||
"""
|
||||
logger.info("Task embed_collection starting collection_uid=%s", collection_uid)
|
||||
|
||||
try:
|
||||
from library.models import Collection
|
||||
|
||||
col = Collection.nodes.get(uid=collection_uid)
|
||||
items = col.items.all()
|
||||
|
||||
results = {"total": len(items), "successful": 0, "failed": 0, "skipped": 0}
|
||||
|
||||
for i, item in enumerate(items):
|
||||
# Skip already-completed items with unchanged content
|
||||
if item.embedding_status == "completed" and item.content_hash:
|
||||
results["skipped"] += 1
|
||||
logger.debug("Skipping already-embedded item_uid=%s", item.uid)
|
||||
continue
|
||||
|
||||
try:
|
||||
embed_item.delay(item.uid, user_id)
|
||||
results["successful"] += 1
|
||||
except Exception as exc:
|
||||
results["failed"] += 1
|
||||
logger.error(
|
||||
"Failed to queue embed for item_uid=%s: %s", item.uid, exc
|
||||
)
|
||||
|
||||
_update_progress(
|
||||
self,
|
||||
int((i + 1) / len(items) * 100),
|
||||
f"Queued {i + 1}/{len(items)} items",
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Task embed_collection completed collection_uid=%s queued=%d skipped=%d failed=%d",
|
||||
collection_uid,
|
||||
results["successful"],
|
||||
results["skipped"],
|
||||
results["failed"],
|
||||
)
|
||||
return {"success": True, "collection_uid": collection_uid, **results}
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"Task embed_collection failed collection_uid=%s: %s",
|
||||
collection_uid,
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
return {"success": False, "collection_uid": collection_uid, "error": str(exc)}
|
||||
|
||||
|
||||
@shared_task(
|
||||
name="library.tasks.embed_library",
|
||||
bind=True,
|
||||
queue="batch",
|
||||
acks_late=True,
|
||||
)
|
||||
def embed_library(self, library_uid: str, user_id: int = None):
|
||||
"""
|
||||
Embed all items across all collections in a library.
|
||||
|
||||
:param library_uid: UID of the Library node.
|
||||
:param user_id: Optional user ID for usage tracking.
|
||||
:returns: Dict with summary results.
|
||||
"""
|
||||
logger.info("Task embed_library starting library_uid=%s", library_uid)
|
||||
|
||||
try:
|
||||
from library.models import Library
|
||||
|
||||
lib = Library.nodes.get(uid=library_uid)
|
||||
collections = lib.collections.all()
|
||||
|
||||
results = {"total_collections": len(collections), "items_queued": 0}
|
||||
|
||||
for col in collections:
|
||||
embed_collection.delay(col.uid, user_id)
|
||||
results["items_queued"] += len(col.items.all())
|
||||
|
||||
logger.info(
|
||||
"Task embed_library completed library_uid=%s collections=%d items=%d",
|
||||
library_uid,
|
||||
results["total_collections"],
|
||||
results["items_queued"],
|
||||
)
|
||||
return {"success": True, "library_uid": library_uid, **results}
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"Task embed_library failed library_uid=%s: %s",
|
||||
library_uid,
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
return {"success": False, "library_uid": library_uid, "error": str(exc)}
|
||||
|
||||
|
||||
@shared_task(
|
||||
name="library.tasks.batch_embed_items",
|
||||
bind=True,
|
||||
queue="batch",
|
||||
acks_late=True,
|
||||
)
|
||||
def batch_embed_items(self, item_uids: list[str], user_id: int = None):
|
||||
"""
|
||||
Queue embedding tasks for a specific list of items.
|
||||
|
||||
:param item_uids: List of Item UIDs.
|
||||
:param user_id: Optional user ID for usage tracking.
|
||||
:returns: Dict with queuing results.
|
||||
"""
|
||||
logger.info("Task batch_embed_items starting count=%d", len(item_uids))
|
||||
|
||||
queued = 0
|
||||
for uid in item_uids:
|
||||
try:
|
||||
embed_item.delay(uid, user_id)
|
||||
queued += 1
|
||||
except Exception as exc:
|
||||
logger.error("Failed to queue item_uid=%s: %s", uid, exc)
|
||||
|
||||
logger.info("Task batch_embed_items completed queued=%d/%d", queued, len(item_uids))
|
||||
return {"success": True, "queued": queued, "total": len(item_uids)}
|
||||
|
||||
|
||||
def _resolve_user(user_id: int = None):
|
||||
"""
|
||||
Resolve a user ID to a User instance.
|
||||
|
||||
:param user_id: Optional user ID.
|
||||
:returns: User instance, or None.
|
||||
"""
|
||||
if not user_id:
|
||||
return None
|
||||
try:
|
||||
from django.contrib.auth import get_user_model
|
||||
|
||||
User = get_user_model()
|
||||
return User.objects.get(pk=user_id)
|
||||
except Exception:
|
||||
return None
|
||||
@@ -0,0 +1,19 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Delete {{ collection.name }} — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="max-w-lg">
|
||||
<h1 class="text-3xl font-bold mb-4 text-error">Delete Collection</h1>
|
||||
<div class="alert alert-warning mb-6">
|
||||
<span>Are you sure you want to delete <strong>{{ collection.name }}</strong>? This action cannot be undone.</span>
|
||||
</div>
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
<div class="flex gap-2">
|
||||
<button type="submit" class="btn btn-error">Delete</button>
|
||||
<a href="{% url 'library:collection-detail' uid=collection.uid %}" class="btn btn-ghost">Cancel</a>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
||||
71
mnemosyne/library/templates/library/collection_detail.html
Normal file
71
mnemosyne/library/templates/library/collection_detail.html
Normal file
@@ -0,0 +1,71 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}{{ collection.name }} — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-4">
|
||||
{% if library %}
|
||||
<a href="{% url 'library:library-detail' uid=library.uid %}" class="btn btn-ghost btn-sm">← {{ library.name }}</a>
|
||||
{% else %}
|
||||
<a href="{% url 'library:library-list' %}" class="btn btn-ghost btn-sm">← Libraries</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="flex justify-between items-start mb-6">
|
||||
<div>
|
||||
<h1 class="text-3xl font-bold">{{ collection.name }}</h1>
|
||||
{% if library %}<p class="opacity-60 mt-1">In: {{ library.name }}</p>{% endif %}
|
||||
{% if collection.description %}
|
||||
<p class="mt-3 opacity-80">{{ collection.description }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="flex gap-2">
|
||||
<a href="{% url 'library:collection-edit' uid=collection.uid %}" class="btn btn-sm btn-outline">Edit</a>
|
||||
<a href="{% url 'library:collection-delete' uid=collection.uid %}" class="btn btn-sm btn-error btn-outline">Delete</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Items -->
|
||||
<div class="flex justify-between items-center mb-4">
|
||||
<h2 class="text-xl font-bold">Items</h2>
|
||||
<a href="{% url 'library:item-create' collection_uid=collection.uid %}" class="btn btn-sm btn-primary">
|
||||
+ New Item
|
||||
</a>
|
||||
</div>
|
||||
|
||||
{% if items %}
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table table-zebra w-full">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Title</th>
|
||||
<th>Type</th>
|
||||
<th>File Type</th>
|
||||
<th>Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for item in items %}
|
||||
<tr>
|
||||
<td>
|
||||
<a href="{% url 'library:item-detail' uid=item.uid %}" class="link link-hover font-medium">
|
||||
{{ item.title }}
|
||||
</a>
|
||||
</td>
|
||||
<td>{{ item.item_type|default:"-" }}</td>
|
||||
<td>{{ item.file_type|default:"-" }}</td>
|
||||
<td>
|
||||
<a href="{% url 'library:item-detail' uid=item.uid %}" class="btn btn-xs btn-ghost">View</a>
|
||||
<a href="{% url 'library:item-edit' uid=item.uid %}" class="btn btn-xs btn-ghost">Edit</a>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="text-center py-8 opacity-60">
|
||||
<p>No items in this collection yet.</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
43
mnemosyne/library/templates/library/collection_form.html
Normal file
43
mnemosyne/library/templates/library/collection_form.html
Normal file
@@ -0,0 +1,43 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}{% if editing %}Edit Collection{% else %}New Collection{% endif %} — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-4">
|
||||
{% if library %}
|
||||
<a href="{% url 'library:library-detail' uid=library.uid %}" class="btn btn-ghost btn-sm">← {{ library.name }}</a>
|
||||
{% else %}
|
||||
<a href="{% url 'library:library-list' %}" class="btn btn-ghost btn-sm">← Libraries</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<h1 class="text-3xl font-bold mb-6">
|
||||
{% if editing %}Edit Collection: {{ collection.name }}{% else %}New Collection{% endif %}
|
||||
</h1>
|
||||
{% if library %}<p class="opacity-60 mb-4">In library: {{ library.name }}</p>{% endif %}
|
||||
|
||||
<form method="post" class="max-w-2xl">
|
||||
{% csrf_token %}
|
||||
<div class="space-y-4">
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">Name</span></label>
|
||||
{{ form.name }}
|
||||
{% if form.name.errors %}<p class="text-error text-sm mt-1">{{ form.name.errors.0 }}</p>{% endif %}
|
||||
</div>
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">Description</span></label>
|
||||
{{ form.description }}
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex gap-2 mt-6">
|
||||
<button type="submit" class="btn btn-primary">
|
||||
{% if editing %}Save Changes{% else %}Create Collection{% endif %}
|
||||
</button>
|
||||
{% if library %}
|
||||
<a href="{% url 'library:library-detail' uid=library.uid %}" class="btn btn-ghost">Cancel</a>
|
||||
{% else %}
|
||||
<a href="{% url 'library:library-list' %}" class="btn btn-ghost">Cancel</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</form>
|
||||
{% endblock %}
|
||||
163
mnemosyne/library/templates/library/embedding_dashboard.html
Normal file
163
mnemosyne/library/templates/library/embedding_dashboard.html
Normal file
@@ -0,0 +1,163 @@
|
||||
{% extends "themis/base.html" %}
|
||||
{% load humanize %}
|
||||
|
||||
{% block title %}Embedding Pipeline — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-4">
|
||||
<a href="{% url 'library:library-list' %}" class="btn btn-ghost btn-sm">← Libraries</a>
|
||||
</div>
|
||||
|
||||
<h1 class="text-3xl font-bold mb-6">Embedding Pipeline Dashboard</h1>
|
||||
|
||||
<!-- System Models -->
|
||||
<div class="card bg-base-200 mb-6">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title">System Models</h2>
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<th class="w-48">Embedding Model</th>
|
||||
<td>
|
||||
{% if system_embedding_model %}
|
||||
<span class="font-semibold">{{ system_embedding_model.api.name }}: {{ system_embedding_model.name }}</span>
|
||||
{% if system_embedding_model.vector_dimensions %}
|
||||
<span class="badge badge-info badge-sm ml-2">{{ system_embedding_model.vector_dimensions }}d</span>
|
||||
{% endif %}
|
||||
{% if system_embedding_model.supports_multimodal %}
|
||||
<span class="badge badge-accent badge-sm ml-1">Multimodal</span>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<div class="flex items-center gap-2">
|
||||
<span class="badge badge-error">NOT CONFIGURED</span>
|
||||
<span class="text-sm opacity-60">Set via <a href="/admin/llm_manager/llmmodel/" class="link link-primary">Admin → LLM Models</a> → Action: "Set as System Embedding Model"</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Chat Model</th>
|
||||
<td>
|
||||
{% if system_chat_model %}
|
||||
<span class="font-semibold">{{ system_chat_model.api.name }}: {{ system_chat_model.name }}</span>
|
||||
{% else %}
|
||||
<span class="text-sm opacity-60">Not configured — concept extraction disabled</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Reranker Model</th>
|
||||
<td>
|
||||
{% if system_reranker_model %}
|
||||
<span class="font-semibold">{{ system_reranker_model.api.name }}: {{ system_reranker_model.name }}</span>
|
||||
{% else %}
|
||||
<span class="text-sm opacity-60">Not configured — Phase 3</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if not neo4j_available %}
|
||||
<div class="alert alert-warning mb-6">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-2.5L13.732 4c-.77-.833-1.962-.833-2.732 0L4.082 16.5c-.77.833.192 2.5 1.732 2.5z" /></svg>
|
||||
<span>Neo4j is not available. Item counts and graph statistics cannot be loaded.</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Embedding Status -->
|
||||
{% if neo4j_available %}
|
||||
<div class="grid grid-cols-2 md:grid-cols-5 gap-4 mb-6">
|
||||
<div class="stat bg-base-200 rounded-lg">
|
||||
<div class="stat-title">Total Items</div>
|
||||
<div class="stat-value text-lg">{{ total_items }}</div>
|
||||
</div>
|
||||
{% for status, count in status_counts.items %}
|
||||
<div class="stat bg-base-200 rounded-lg">
|
||||
<div class="stat-title">
|
||||
{% if status == "completed" %}✓ Completed
|
||||
{% elif status == "processing" %}⟳ Processing
|
||||
{% elif status == "failed" %}✗ Failed
|
||||
{% elif status == "pending" %}◦ Pending
|
||||
{% else %}{{ status }}{% endif %}
|
||||
</div>
|
||||
<div class="stat-value text-lg
|
||||
{% if status == 'completed' %} text-success
|
||||
{% elif status == 'processing' %} text-warning
|
||||
{% elif status == 'failed' %} text-error
|
||||
{% endif %}">
|
||||
{{ count }}
|
||||
</div>
|
||||
{% if total_items > 0 %}
|
||||
<div class="stat-desc">{% widthratio count total_items 100 %}% of items</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Actions -->
|
||||
{% if status_counts.pending and status_counts.pending > 0 %}
|
||||
<div class="card bg-base-200 mb-6">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title">Actions</h2>
|
||||
<form method="post" action="{% url 'library:embed-all-pending' %}">
|
||||
{% csrf_token %}
|
||||
<button type="submit" class="btn btn-primary"
|
||||
onclick="return confirm('Queue embedding for {{ status_counts.pending }} pending items?')">
|
||||
Embed All Pending Items ({{ status_counts.pending }})
|
||||
</button>
|
||||
<p class="text-sm opacity-60 mt-2">
|
||||
This will queue Celery tasks for all pending items that have uploaded files.
|
||||
</p>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Knowledge Graph Nodes -->
|
||||
{% if neo4j_available %}
|
||||
<div class="card bg-base-200 mb-6">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title">Knowledge Graph</h2>
|
||||
<div class="grid grid-cols-2 md:grid-cols-4 gap-3">
|
||||
{% for label, count in node_counts.items %}
|
||||
<div class="stat bg-base-100 rounded-lg p-3">
|
||||
<div class="stat-title text-xs">{{ label }}</div>
|
||||
<div class="stat-value text-base">{{ count|intcomma }}</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% if total_chunks > 0 %}
|
||||
<div class="mt-4">
|
||||
<div class="flex items-center gap-2">
|
||||
<span class="font-medium">Chunks with embeddings:</span>
|
||||
<span>{{ embedded_chunks|intcomma }} / {{ total_chunks|intcomma }}</span>
|
||||
<progress class="progress progress-primary w-48"
|
||||
value="{{ embedded_chunks }}"
|
||||
max="{{ total_chunks }}"></progress>
|
||||
<span class="text-sm opacity-60">{% widthratio embedded_chunks total_chunks 100 %}%</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Quick Links -->
|
||||
<div class="card bg-base-200">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title">Quick Links</h2>
|
||||
<div class="flex flex-wrap gap-2">
|
||||
<a href="{% url 'library:library-list' %}" class="btn btn-outline btn-sm">Libraries</a>
|
||||
<a href="/llm/" class="btn btn-outline btn-sm">LLM Manager</a>
|
||||
<a href="/admin/llm_manager/llmmodel/" class="btn btn-outline btn-sm">Admin: LLM Models</a>
|
||||
<a href="/admin/llm_manager/llmusage/" class="btn btn-outline btn-sm">Admin: Usage</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
19
mnemosyne/library/templates/library/item_confirm_delete.html
Normal file
19
mnemosyne/library/templates/library/item_confirm_delete.html
Normal file
@@ -0,0 +1,19 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Delete {{ item.title }} — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="max-w-lg">
|
||||
<h1 class="text-3xl font-bold mb-4 text-error">Delete Item</h1>
|
||||
<div class="alert alert-warning mb-6">
|
||||
<span>Are you sure you want to delete <strong>{{ item.title }}</strong>? This action cannot be undone.</span>
|
||||
</div>
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
<div class="flex gap-2">
|
||||
<button type="submit" class="btn btn-error">Delete</button>
|
||||
<a href="{% url 'library:item-detail' uid=item.uid %}" class="btn btn-ghost">Cancel</a>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
||||
138
mnemosyne/library/templates/library/item_detail.html
Normal file
138
mnemosyne/library/templates/library/item_detail.html
Normal file
@@ -0,0 +1,138 @@
|
||||
{% extends "themis/base.html" %}
|
||||
{% load humanize %}
|
||||
|
||||
{% block title %}{{ item.title }} — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-4">
|
||||
<a href="{% url 'library:library-list' %}" class="btn btn-ghost btn-sm">← Libraries</a>
|
||||
</div>
|
||||
|
||||
<div class="flex justify-between items-start mb-6">
|
||||
<div>
|
||||
<h1 class="text-3xl font-bold">{{ item.title }}</h1>
|
||||
{% if item.item_type %}<div class="badge badge-outline mt-2">{{ item.item_type }}</div>{% endif %}
|
||||
{% if item.file_type %}<div class="badge badge-ghost mt-2 ml-1">{{ item.file_type }}</div>{% endif %}
|
||||
</div>
|
||||
<div class="flex gap-2">
|
||||
<a href="{% url 'library:item-edit' uid=item.uid %}" class="btn btn-sm btn-outline">Edit</a>
|
||||
<form method="post" action="{% url 'library:item-reembed' uid=item.uid %}" class="inline">
|
||||
{% csrf_token %}
|
||||
<button type="submit" class="btn btn-sm btn-outline btn-secondary" title="Re-embed this item">
|
||||
↻ Re-embed
|
||||
</button>
|
||||
</form>
|
||||
<a href="{% url 'library:item-delete' uid=item.uid %}" class="btn btn-sm btn-error btn-outline">Delete</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Embedding Status -->
|
||||
<div class="mb-6">
|
||||
<div class="flex items-center gap-3">
|
||||
<span class="font-medium">Embedding Status:</span>
|
||||
{% if item.embedding_status == "completed" %}
|
||||
<span class="badge badge-success">Completed</span>
|
||||
{% elif item.embedding_status == "processing" %}
|
||||
<span class="badge badge-warning">Processing</span>
|
||||
{% elif item.embedding_status == "failed" %}
|
||||
<span class="badge badge-error">Failed</span>
|
||||
{% else %}
|
||||
<span class="badge badge-ghost">Pending</span>
|
||||
{% endif %}
|
||||
|
||||
{% if item.embedding_model_name %}
|
||||
<span class="text-sm opacity-60">Model: {{ item.embedding_model_name }}</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% if item.error_message %}
|
||||
<div class="alert alert-error mt-2">
|
||||
<span>{{ item.error_message }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- Item Metadata -->
|
||||
<div class="grid grid-cols-1 md:grid-cols-4 gap-4 mb-6">
|
||||
<div class="stat bg-base-200 rounded-lg">
|
||||
<div class="stat-title">File Size</div>
|
||||
<div class="stat-value text-lg">{{ item.file_size|default:0|intcomma }} bytes</div>
|
||||
</div>
|
||||
<div class="stat bg-base-200 rounded-lg">
|
||||
<div class="stat-title">Chunks</div>
|
||||
<div class="stat-value text-lg">{{ item.chunk_count|default:0 }}</div>
|
||||
</div>
|
||||
<div class="stat bg-base-200 rounded-lg">
|
||||
<div class="stat-title">Images</div>
|
||||
<div class="stat-value text-lg">{{ item.image_count|default:0 }}</div>
|
||||
</div>
|
||||
<div class="stat bg-base-200 rounded-lg">
|
||||
<div class="stat-title">Concepts</div>
|
||||
<div class="stat-value text-lg">{{ concepts|length }}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Concepts -->
|
||||
{% if concepts %}
|
||||
<div class="mb-6">
|
||||
<h2 class="text-xl font-bold mb-3">Referenced Concepts</h2>
|
||||
<div class="flex flex-wrap gap-2">
|
||||
{% for concept in concepts %}
|
||||
<div class="badge badge-lg badge-primary badge-outline">
|
||||
{{ concept.name }}
|
||||
{% if concept.concept_type %}
|
||||
<span class="ml-1 opacity-60 text-xs">({{ concept.concept_type }})</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Images -->
|
||||
{% if images %}
|
||||
<div class="mb-6">
|
||||
<h2 class="text-xl font-bold mb-3">Images ({{ images|length }})</h2>
|
||||
<div class="grid grid-cols-2 md:grid-cols-4 gap-3">
|
||||
{% for img in images %}
|
||||
<div class="card bg-base-200">
|
||||
<div class="card-body p-3">
|
||||
<span class="badge badge-sm">{{ img.image_type|default:"image" }}</span>
|
||||
{% if img.description %}
|
||||
<p class="text-xs opacity-60 mt-1">{{ img.description|truncatewords:10 }}</p>
|
||||
{% endif %}
|
||||
<p class="text-xs opacity-40 mt-1">{{ img.s3_key }}</p>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Chunks Preview -->
|
||||
{% if chunks %}
|
||||
<div class="mb-6">
|
||||
<h2 class="text-xl font-bold mb-3">Chunks ({{ chunks|length }})</h2>
|
||||
<div class="space-y-2">
|
||||
{% for chunk in chunks|slice:":10" %}
|
||||
<div class="collapse collapse-arrow bg-base-200">
|
||||
<input type="checkbox" />
|
||||
<div class="collapse-title font-medium">
|
||||
Chunk {{ chunk.chunk_index }} <span class="text-sm opacity-60">({{ chunk.chunk_size }} chars)</span>
|
||||
{% if chunk.embedding %}
|
||||
<span class="badge badge-success badge-xs ml-2">embedded</span>
|
||||
{% else %}
|
||||
<span class="badge badge-ghost badge-xs ml-2">no vector</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="collapse-content">
|
||||
<p class="text-sm whitespace-pre-wrap">{{ chunk.text_preview }}</p>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% if chunks|length > 10 %}
|
||||
<p class="text-sm opacity-60">… and {{ chunks|length|add:"-10" }} more chunks</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
67
mnemosyne/library/templates/library/item_form.html
Normal file
67
mnemosyne/library/templates/library/item_form.html
Normal file
@@ -0,0 +1,67 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}{% if editing %}Edit Item{% else %}New Item{% endif %} — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-4">
|
||||
{% if collection %}
|
||||
<a href="{% url 'library:collection-detail' uid=collection.uid %}" class="btn btn-ghost btn-sm">← {{ collection.name }}</a>
|
||||
{% else %}
|
||||
<a href="{% url 'library:library-list' %}" class="btn btn-ghost btn-sm">← Libraries</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<h1 class="text-3xl font-bold mb-6">
|
||||
{% if editing %}Edit Item: {{ item.title }}{% else %}New Item{% endif %}
|
||||
</h1>
|
||||
|
||||
<form method="post" enctype="multipart/form-data" class="max-w-2xl">
|
||||
{% csrf_token %}
|
||||
<div class="space-y-4">
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">Title</span></label>
|
||||
{{ form.title }}
|
||||
{% if form.title.errors %}<p class="text-error text-sm mt-1">{{ form.title.errors.0 }}</p>{% endif %}
|
||||
</div>
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">Item Type</span></label>
|
||||
{{ form.item_type }}
|
||||
</div>
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">File Type</span></label>
|
||||
{{ form.file_type }}
|
||||
<label class="label"><span class="label-text-alt">Auto-detected from uploaded file if left blank</span></label>
|
||||
</div>
|
||||
|
||||
{% if not editing %}
|
||||
<!-- File upload (only on create) -->
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">Document File</span></label>
|
||||
{{ form.file }}
|
||||
<label class="label">
|
||||
<span class="label-text-alt">{{ form.file.help_text }}</span>
|
||||
</label>
|
||||
{% if form.file.errors %}<p class="text-error text-sm mt-1">{{ form.file.errors.0 }}</p>{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="form-control">
|
||||
<label class="label cursor-pointer justify-start gap-3">
|
||||
{{ form.auto_embed }}
|
||||
<span class="label-text">Auto-embed after upload</span>
|
||||
</label>
|
||||
<label class="label">
|
||||
<span class="label-text-alt">{{ form.auto_embed.help_text }}</span>
|
||||
</label>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="flex gap-2 mt-6">
|
||||
<button type="submit" class="btn btn-primary">
|
||||
{% if editing %}Save Changes{% else %}Create Item{% endif %}
|
||||
</button>
|
||||
{% if collection %}
|
||||
<a href="{% url 'library:collection-detail' uid=collection.uid %}" class="btn btn-ghost">Cancel</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</form>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,23 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Delete {{ library.name }} — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-4">
|
||||
<a href="{% url 'library:library-detail' uid=library.uid %}" class="btn btn-ghost btn-sm">← {{ library.name }}</a>
|
||||
</div>
|
||||
|
||||
<div class="max-w-lg">
|
||||
<h1 class="text-3xl font-bold mb-4 text-error">Delete Library</h1>
|
||||
<div class="alert alert-warning mb-6">
|
||||
<span>Are you sure you want to delete <strong>{{ library.name }}</strong>? This action cannot be undone.</span>
|
||||
</div>
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
<div class="flex gap-2">
|
||||
<button type="submit" class="btn btn-error">Delete</button>
|
||||
<a href="{% url 'library:library-detail' uid=library.uid %}" class="btn btn-ghost">Cancel</a>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
||||
93
mnemosyne/library/templates/library/library_detail.html
Normal file
93
mnemosyne/library/templates/library/library_detail.html
Normal file
@@ -0,0 +1,93 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}{{ library.name }} — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-4">
|
||||
<a href="{% url 'library:library-list' %}" class="btn btn-ghost btn-sm">← Libraries</a>
|
||||
</div>
|
||||
|
||||
<div class="flex justify-between items-start mb-6">
|
||||
<div>
|
||||
<h1 class="text-3xl font-bold">{{ library.name }}</h1>
|
||||
<div class="badge badge-primary mt-2">{{ library.library_type }}</div>
|
||||
{% if library.description %}
|
||||
<p class="mt-3 opacity-80">{{ library.description }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="flex gap-2">
|
||||
<a href="{% url 'library:library-edit' uid=library.uid %}" class="btn btn-sm btn-outline">Edit</a>
|
||||
<a href="{% url 'library:library-delete' uid=library.uid %}" class="btn btn-sm btn-error btn-outline">Delete</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Content-Type Configuration -->
|
||||
<div class="collapse collapse-arrow bg-base-200 mb-6">
|
||||
<input type="checkbox" />
|
||||
<div class="collapse-title font-medium">Content-Type Configuration</div>
|
||||
<div class="collapse-content">
|
||||
<div class="grid grid-cols-1 gap-4">
|
||||
{% if library.embedding_instruction %}
|
||||
<div>
|
||||
<h4 class="font-semibold text-sm opacity-60">Embedding Instruction</h4>
|
||||
<p class="text-sm mt-1">{{ library.embedding_instruction }}</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if library.reranker_instruction %}
|
||||
<div>
|
||||
<h4 class="font-semibold text-sm opacity-60">Reranker Instruction</h4>
|
||||
<p class="text-sm mt-1">{{ library.reranker_instruction }}</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if library.llm_context_prompt %}
|
||||
<div>
|
||||
<h4 class="font-semibold text-sm opacity-60">LLM Context Prompt</h4>
|
||||
<p class="text-sm mt-1">{{ library.llm_context_prompt }}</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Collections -->
|
||||
<div class="flex justify-between items-center mb-4">
|
||||
<h2 class="text-xl font-bold">Collections</h2>
|
||||
<a href="{% url 'library:collection-create' library_uid=library.uid %}" class="btn btn-sm btn-primary">
|
||||
+ New Collection
|
||||
</a>
|
||||
</div>
|
||||
|
||||
{% if collections %}
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table table-zebra w-full">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Description</th>
|
||||
<th>Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for col in collections %}
|
||||
<tr>
|
||||
<td>
|
||||
<a href="{% url 'library:collection-detail' uid=col.uid %}" class="link link-hover font-medium">
|
||||
{{ col.name }}
|
||||
</a>
|
||||
</td>
|
||||
<td class="opacity-70">{{ col.description|truncatewords:15 }}</td>
|
||||
<td>
|
||||
<a href="{% url 'library:collection-detail' uid=col.uid %}" class="btn btn-xs btn-ghost">View</a>
|
||||
<a href="{% url 'library:collection-edit' uid=col.uid %}" class="btn btn-xs btn-ghost">Edit</a>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="text-center py-8 opacity-60">
|
||||
<p>No collections in this library yet.</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
59
mnemosyne/library/templates/library/library_form.html
Normal file
59
mnemosyne/library/templates/library/library_form.html
Normal file
@@ -0,0 +1,59 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}{% if editing %}Edit Library{% else %}New Library{% endif %} — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-4">
|
||||
<a href="{% url 'library:library-list' %}" class="btn btn-ghost btn-sm">← Libraries</a>
|
||||
</div>
|
||||
|
||||
<h1 class="text-3xl font-bold mb-6">
|
||||
{% if editing %}Edit Library: {{ library.name }}{% else %}New Library{% endif %}
|
||||
</h1>
|
||||
|
||||
<form method="post" class="max-w-2xl">
|
||||
{% csrf_token %}
|
||||
<div class="space-y-4">
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">Name</span></label>
|
||||
{{ form.name }}
|
||||
{% if form.name.errors %}<p class="text-error text-sm mt-1">{{ form.name.errors.0 }}</p>{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">Library Type</span></label>
|
||||
{{ form.library_type }}
|
||||
</div>
|
||||
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">Description</span></label>
|
||||
{{ form.description }}
|
||||
</div>
|
||||
|
||||
<div class="divider">Content-Type Configuration</div>
|
||||
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">Embedding Instruction</span></label>
|
||||
{{ form.embedding_instruction }}
|
||||
<label class="label"><span class="label-text-alt opacity-60">Leave blank to use default for the selected library type</span></label>
|
||||
</div>
|
||||
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">Reranker Instruction</span></label>
|
||||
{{ form.reranker_instruction }}
|
||||
</div>
|
||||
|
||||
<div class="form-control">
|
||||
<label class="label"><span class="label-text font-medium">LLM Context Prompt</span></label>
|
||||
{{ form.llm_context_prompt }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap-2 mt-6">
|
||||
<button type="submit" class="btn btn-primary">
|
||||
{% if editing %}Save Changes{% else %}Create Library{% endif %}
|
||||
</button>
|
||||
<a href="{% url 'library:library-list' %}" class="btn btn-ghost">Cancel</a>
|
||||
</div>
|
||||
</form>
|
||||
{% endblock %}
|
||||
54
mnemosyne/library/templates/library/library_list.html
Normal file
54
mnemosyne/library/templates/library/library_list.html
Normal file
@@ -0,0 +1,54 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Libraries — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-between items-center mb-6">
|
||||
<h1 class="text-3xl font-bold">Libraries</h1>
|
||||
<div class="flex gap-2">
|
||||
<a href="{% url 'library:embedding-dashboard' %}" class="btn btn-outline btn-secondary">
|
||||
Embedding Pipeline
|
||||
</a>
|
||||
<a href="{% url 'library:library-create' %}" class="btn btn-primary">
|
||||
+ New Library
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if error %}
|
||||
<div class="alert alert-warning mb-4">
|
||||
<span>{{ error }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if libraries %}
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||
{% for lib in libraries %}
|
||||
<div class="card bg-base-200 shadow-md">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title">
|
||||
<a href="{% url 'library:library-detail' uid=lib.uid %}" class="link link-hover">
|
||||
{{ lib.name }}
|
||||
</a>
|
||||
</h2>
|
||||
<div class="badge badge-outline">{{ lib.library_type }}</div>
|
||||
{% if lib.description %}
|
||||
<p class="text-sm opacity-70 mt-2">{{ lib.description|truncatewords:20 }}</p>
|
||||
{% endif %}
|
||||
<div class="card-actions justify-end mt-3">
|
||||
<a href="{% url 'library:library-detail' uid=lib.uid %}" class="btn btn-sm btn-ghost">View</a>
|
||||
<a href="{% url 'library:library-edit' uid=lib.uid %}" class="btn btn-sm btn-ghost">Edit</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% else %}
|
||||
{% if not error %}
|
||||
<div class="text-center py-12 opacity-60">
|
||||
<p class="text-lg">No libraries yet.</p>
|
||||
<p class="mt-2">Create your first library to get started.</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
0
mnemosyne/library/tests/__init__.py
Normal file
0
mnemosyne/library/tests/__init__.py
Normal file
108
mnemosyne/library/tests/test_chunker.py
Normal file
108
mnemosyne/library/tests/test_chunker.py
Normal file
@@ -0,0 +1,108 @@
|
||||
"""
|
||||
Tests for the content-type-aware chunking service.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from library.services.chunker import ChunkResult, ContentTypeChunker
|
||||
from library.services.parsers import ParseResult, TextBlock
|
||||
|
||||
|
||||
class ChunkResultTests(TestCase):
|
||||
"""Tests for ChunkResult dataclass."""
|
||||
|
||||
def test_len(self):
|
||||
result = ChunkResult(chunks=["a", "b", "c"], chunk_page_map={}, strategy="test")
|
||||
self.assertEqual(len(result), 3)
|
||||
|
||||
def test_empty(self):
|
||||
result = ChunkResult(chunks=[], chunk_page_map={}, strategy="test")
|
||||
self.assertEqual(len(result), 0)
|
||||
|
||||
|
||||
class ContentTypeChunkerTests(TestCase):
|
||||
"""Tests for ContentTypeChunker."""
|
||||
|
||||
def _make_parse_result(self, text: str, pages: int = 1) -> ParseResult:
|
||||
"""Helper to create a ParseResult with text blocks."""
|
||||
blocks = []
|
||||
if pages == 1:
|
||||
blocks = [TextBlock(text=text, page=0)]
|
||||
else:
|
||||
chunk_size = len(text) // pages
|
||||
for i in range(pages):
|
||||
start = i * chunk_size
|
||||
end = start + chunk_size if i < pages - 1 else len(text)
|
||||
blocks.append(TextBlock(text=text[start:end], page=i))
|
||||
return ParseResult(text_blocks=blocks, images=[], metadata={}, file_type="txt")
|
||||
|
||||
@patch("library.services.chunker.ContentTypeChunker._get_splitter")
|
||||
def test_chunk_dispatches_strategy(self, mock_splitter):
|
||||
"""Chunker uses the strategy from config."""
|
||||
mock_instance = MagicMock()
|
||||
mock_instance.chunks.return_value = ["chunk1", "chunk2"]
|
||||
mock_splitter.return_value = mock_instance
|
||||
|
||||
chunker = ContentTypeChunker()
|
||||
parse_result = self._make_parse_result("Some text to chunk into pieces")
|
||||
config = {"strategy": "chapter_aware", "chunk_size": 512, "chunk_overlap": 64}
|
||||
|
||||
result = chunker.chunk(parse_result, config, library_type="fiction")
|
||||
|
||||
self.assertIsInstance(result, ChunkResult)
|
||||
self.assertEqual(result.strategy, "chapter_aware")
|
||||
self.assertEqual(len(result.chunks), 2)
|
||||
mock_splitter.assert_called_once_with(512, 64)
|
||||
|
||||
@patch("library.services.chunker.ContentTypeChunker._get_splitter")
|
||||
def test_empty_text_returns_empty(self, mock_splitter):
|
||||
"""Empty text produces no chunks."""
|
||||
chunker = ContentTypeChunker()
|
||||
parse_result = ParseResult(text_blocks=[], images=[], metadata={}, file_type="txt")
|
||||
config = {"strategy": "section_aware", "chunk_size": 512, "chunk_overlap": 64}
|
||||
|
||||
result = chunker.chunk(parse_result, config)
|
||||
|
||||
self.assertEqual(len(result), 0)
|
||||
mock_splitter.assert_not_called()
|
||||
|
||||
@patch("library.services.chunker.ContentTypeChunker._get_splitter")
|
||||
def test_default_config_values(self, mock_splitter):
|
||||
"""Missing config keys use defaults."""
|
||||
mock_instance = MagicMock()
|
||||
mock_instance.chunks.return_value = ["chunk"]
|
||||
mock_splitter.return_value = mock_instance
|
||||
|
||||
chunker = ContentTypeChunker()
|
||||
parse_result = self._make_parse_result("Text")
|
||||
|
||||
result = chunker.chunk(parse_result, {})
|
||||
|
||||
# Default: strategy=section_aware, chunk_size=512, overlap=64
|
||||
self.assertEqual(result.strategy, "section_aware")
|
||||
mock_splitter.assert_called_once_with(512, 64)
|
||||
|
||||
@patch("library.services.chunker.ContentTypeChunker._get_splitter")
|
||||
def test_page_mapping(self, mock_splitter):
|
||||
"""Chunks are mapped to their source pages."""
|
||||
mock_instance = MagicMock()
|
||||
mock_instance.chunks.return_value = ["Page 0 text", "Page 1 text"]
|
||||
mock_splitter.return_value = mock_instance
|
||||
|
||||
chunker = ContentTypeChunker()
|
||||
parse_result = ParseResult(
|
||||
text_blocks=[
|
||||
TextBlock(text="Page 0 text content", page=0),
|
||||
TextBlock(text="Page 1 text content", page=1),
|
||||
],
|
||||
images=[],
|
||||
metadata={},
|
||||
file_type="pdf",
|
||||
)
|
||||
config = {"strategy": "section_aware", "chunk_size": 512, "chunk_overlap": 64}
|
||||
|
||||
result = chunker.chunk(parse_result, config)
|
||||
|
||||
self.assertIn(0, result.chunk_page_map)
|
||||
77
mnemosyne/library/tests/test_concepts.py
Normal file
77
mnemosyne/library/tests/test_concepts.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""
|
||||
Tests for the concept extraction service.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from library.services.concepts import ConceptExtractor
|
||||
|
||||
|
||||
class ConceptExtractionParsingTests(TestCase):
|
||||
"""Tests for concept response parsing."""
|
||||
|
||||
def setUp(self):
|
||||
self.mock_model = MagicMock()
|
||||
self.mock_model.api.api_type = "openai"
|
||||
self.mock_model.api.base_url = "http://localhost:8080/v1"
|
||||
self.mock_model.api.api_key = "test"
|
||||
self.mock_model.api.timeout_seconds = 30
|
||||
self.mock_model.name = "test-chat"
|
||||
self.extractor = ConceptExtractor(self.mock_model)
|
||||
|
||||
def test_parse_valid_json_array(self):
|
||||
response = '[{"name": "python", "type": "topic"}, {"name": "django", "type": "technique"}]'
|
||||
result = self.extractor._parse_concept_response(response)
|
||||
self.assertEqual(len(result), 2)
|
||||
self.assertEqual(result[0]["name"], "python")
|
||||
self.assertEqual(result[1]["type"], "technique")
|
||||
|
||||
def test_parse_json_in_markdown_code_block(self):
|
||||
response = '```json\n[{"name": "python", "type": "topic"}]\n```'
|
||||
result = self.extractor._parse_concept_response(response)
|
||||
self.assertEqual(len(result), 1)
|
||||
|
||||
def test_parse_json_embedded_in_text(self):
|
||||
response = 'Here are the concepts: [{"name": "neo4j", "type": "technique"}] found in the text.'
|
||||
result = self.extractor._parse_concept_response(response)
|
||||
self.assertEqual(len(result), 1)
|
||||
|
||||
def test_parse_invalid_json_returns_empty(self):
|
||||
response = "This is not JSON at all."
|
||||
result = self.extractor._parse_concept_response(response)
|
||||
self.assertEqual(result, [])
|
||||
|
||||
def test_parse_filters_invalid_entries(self):
|
||||
response = '[{"name": "valid", "type": "topic"}, {"invalid": "entry"}, "string"]'
|
||||
result = self.extractor._parse_concept_response(response)
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result[0]["name"], "valid")
|
||||
|
||||
|
||||
class SampleIndexSelectionTests(TestCase):
|
||||
"""Tests for sample index selection."""
|
||||
|
||||
def setUp(self):
|
||||
self.extractor = ConceptExtractor(MagicMock())
|
||||
|
||||
def test_small_total_returns_all(self):
|
||||
indices = self.extractor._select_sample_indices(5, max_samples=10)
|
||||
self.assertEqual(indices, [0, 1, 2, 3, 4])
|
||||
|
||||
def test_equal_total_returns_all(self):
|
||||
indices = self.extractor._select_sample_indices(10, max_samples=10)
|
||||
self.assertEqual(indices, list(range(10)))
|
||||
|
||||
def test_large_total_returns_max_samples(self):
|
||||
indices = self.extractor._select_sample_indices(100, max_samples=10)
|
||||
self.assertEqual(len(indices), 10)
|
||||
# Should be evenly spaced
|
||||
self.assertEqual(indices[0], 0)
|
||||
self.assertEqual(indices[-1], 90)
|
||||
|
||||
def test_returns_integers(self):
|
||||
indices = self.extractor._select_sample_indices(50, max_samples=7)
|
||||
for idx in indices:
|
||||
self.assertIsInstance(idx, int)
|
||||
165
mnemosyne/library/tests/test_embedding_client.py
Normal file
165
mnemosyne/library/tests/test_embedding_client.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""
|
||||
Tests for the multi-backend embedding client.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from library.services.embedding_client import EmbeddingClient
|
||||
|
||||
|
||||
class MockLLMModel:
|
||||
"""Mock LLMModel for testing."""
|
||||
|
||||
def __init__(self, api_type="openai", supports_multimodal=False, vector_dimensions=None):
|
||||
self.name = "test-embedding-model"
|
||||
self.supports_multimodal = supports_multimodal
|
||||
self.vector_dimensions = vector_dimensions
|
||||
self.input_cost_per_1k = "0.0001"
|
||||
self.api = MockLLMApi(api_type=api_type)
|
||||
|
||||
|
||||
class MockLLMApi:
|
||||
"""Mock LLMApi for testing."""
|
||||
|
||||
def __init__(self, api_type="openai"):
|
||||
self.name = "Test API"
|
||||
self.api_type = api_type
|
||||
self.base_url = "http://localhost:8080/v1"
|
||||
self.api_key = "test-key"
|
||||
self.timeout_seconds = 60
|
||||
|
||||
|
||||
class EmbeddingClientInitTests(TestCase):
|
||||
"""Tests for EmbeddingClient initialization."""
|
||||
|
||||
def test_init_openai(self):
|
||||
model = MockLLMModel(api_type="openai")
|
||||
client = EmbeddingClient(model)
|
||||
self.assertEqual(client.api_type, "openai")
|
||||
self.assertEqual(client.model_name, "test-embedding-model")
|
||||
|
||||
def test_init_bedrock(self):
|
||||
model = MockLLMModel(api_type="bedrock")
|
||||
model.api.base_url = "https://bedrock-runtime.us-east-1.amazonaws.com"
|
||||
client = EmbeddingClient(model)
|
||||
self.assertEqual(client.api_type, "bedrock")
|
||||
|
||||
def test_init_with_user(self):
|
||||
model = MockLLMModel()
|
||||
user = MagicMock()
|
||||
client = EmbeddingClient(model, user=user)
|
||||
self.assertEqual(client.user, user)
|
||||
|
||||
|
||||
class OpenAIResponseParsingTests(TestCase):
|
||||
"""Tests for OpenAI-compatible response parsing."""
|
||||
|
||||
def setUp(self):
|
||||
self.model = MockLLMModel()
|
||||
self.client = EmbeddingClient(self.model)
|
||||
|
||||
def test_parse_standard_openai_format(self):
|
||||
data = {"data": [{"embedding": [0.1, 0.2, 0.3], "index": 0}]}
|
||||
result = self.client._parse_openai_response(data)
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result[0], [0.1, 0.2, 0.3])
|
||||
|
||||
def test_parse_multi_embedding_openai(self):
|
||||
data = {
|
||||
"data": [
|
||||
{"embedding": [0.1, 0.2], "index": 1},
|
||||
{"embedding": [0.3, 0.4], "index": 0},
|
||||
]
|
||||
}
|
||||
result = self.client._parse_openai_response(data)
|
||||
self.assertEqual(len(result), 2)
|
||||
# Should be sorted by index
|
||||
self.assertEqual(result[0], [0.3, 0.4])
|
||||
self.assertEqual(result[1], [0.1, 0.2])
|
||||
|
||||
def test_parse_list_of_dicts(self):
|
||||
data = [{"embedding": [0.1, 0.2]}, {"embedding": [0.3, 0.4]}]
|
||||
result = self.client._parse_openai_response(data)
|
||||
self.assertEqual(len(result), 2)
|
||||
|
||||
def test_parse_dict_with_embedding_key(self):
|
||||
data = {"embedding": [0.1, 0.2, 0.3]}
|
||||
result = self.client._parse_openai_response(data)
|
||||
self.assertEqual(len(result), 1)
|
||||
|
||||
def test_parse_dict_with_embeddings_key(self):
|
||||
data = {"embeddings": [[0.1, 0.2], [0.3, 0.4]]}
|
||||
result = self.client._parse_openai_response(data)
|
||||
self.assertEqual(len(result), 2)
|
||||
|
||||
def test_unexpected_format_raises(self):
|
||||
with self.assertRaises(ValueError):
|
||||
self.client._parse_openai_response({"unexpected": "data"})
|
||||
|
||||
|
||||
class EmbeddingClientDispatchTests(TestCase):
|
||||
"""Tests for API type dispatch."""
|
||||
|
||||
@patch("library.services.embedding_client.requests.post")
|
||||
def test_embed_text_openai_dispatch(self, mock_post):
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"data": [{"embedding": [0.1, 0.2, 0.3], "index": 0}]
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
model = MockLLMModel(api_type="openai")
|
||||
client = EmbeddingClient(model)
|
||||
result = client.embed_text("test text")
|
||||
|
||||
self.assertEqual(result, [0.1, 0.2, 0.3])
|
||||
mock_post.assert_called_once()
|
||||
call_url = mock_post.call_args[0][0]
|
||||
self.assertIn("/embeddings", call_url)
|
||||
|
||||
@patch("library.services.embedding_client.requests.post")
|
||||
def test_embed_text_bedrock_dispatch(self, mock_post):
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"embedding": [0.4, 0.5, 0.6],
|
||||
"inputTextTokenCount": 5,
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
model = MockLLMModel(api_type="bedrock", vector_dimensions=1024)
|
||||
model.api.base_url = "https://bedrock-runtime.us-east-1.amazonaws.com"
|
||||
client = EmbeddingClient(model)
|
||||
result = client.embed_text("test text")
|
||||
|
||||
self.assertEqual(result, [0.4, 0.5, 0.6])
|
||||
call_url = mock_post.call_args[0][0]
|
||||
self.assertIn("/model/", call_url)
|
||||
self.assertIn("/invoke", call_url)
|
||||
|
||||
def test_embed_image_not_multimodal_returns_none(self):
|
||||
model = MockLLMModel(supports_multimodal=False)
|
||||
client = EmbeddingClient(model)
|
||||
result = client.embed_image(b"fake image data", "png")
|
||||
self.assertIsNone(result)
|
||||
|
||||
@patch("library.services.embedding_client.requests.post")
|
||||
def test_embed_texts_batch(self, mock_post):
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"data": [
|
||||
{"embedding": [0.1, 0.2], "index": 0},
|
||||
{"embedding": [0.3, 0.4], "index": 1},
|
||||
]
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
model = MockLLMModel()
|
||||
client = EmbeddingClient(model)
|
||||
results = client.embed_texts(["text1", "text2"])
|
||||
|
||||
self.assertEqual(len(results), 2)
|
||||
129
mnemosyne/library/tests/test_parsers.py
Normal file
129
mnemosyne/library/tests/test_parsers.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
Tests for the document parser service.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from library.services.parsers import (
|
||||
IMAGE_EXTENSIONS,
|
||||
PLAINTEXT_EXTENSIONS,
|
||||
PYMUPDF_EXTENSIONS,
|
||||
DocumentParser,
|
||||
ParseResult,
|
||||
)
|
||||
|
||||
|
||||
class DocumentParserPlaintextTests(TestCase):
|
||||
"""Tests for plain text parsing."""
|
||||
|
||||
def setUp(self):
|
||||
self.parser = DocumentParser()
|
||||
|
||||
def test_parse_txt_file(self):
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
||||
f.write("Hello World\n\nThis is a test document.")
|
||||
f.flush()
|
||||
path = f.name
|
||||
|
||||
try:
|
||||
result = self.parser.parse(path, "txt")
|
||||
self.assertIsInstance(result, ParseResult)
|
||||
self.assertEqual(result.file_type, "txt")
|
||||
self.assertEqual(len(result.text_blocks), 1)
|
||||
self.assertIn("Hello World", result.text_blocks[0].text)
|
||||
self.assertEqual(len(result.images), 0)
|
||||
finally:
|
||||
os.unlink(path)
|
||||
|
||||
def test_parse_md_file(self):
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
|
||||
f.write("# Heading\n\nSome markdown content.")
|
||||
f.flush()
|
||||
path = f.name
|
||||
|
||||
try:
|
||||
result = self.parser.parse(path, "md")
|
||||
self.assertEqual(result.file_type, "md")
|
||||
self.assertIn("Heading", result.text_blocks[0].text)
|
||||
finally:
|
||||
os.unlink(path)
|
||||
|
||||
def test_parse_empty_file(self):
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
||||
f.write("")
|
||||
f.flush()
|
||||
path = f.name
|
||||
|
||||
try:
|
||||
result = self.parser.parse(path, "txt")
|
||||
self.assertEqual(len(result.text_blocks), 0)
|
||||
finally:
|
||||
os.unlink(path)
|
||||
|
||||
def test_parse_bytes(self):
|
||||
data = b"Hello from bytes"
|
||||
result = self.parser.parse_bytes(data, "txt", filename="test.txt")
|
||||
self.assertEqual(len(result.text_blocks), 1)
|
||||
self.assertIn("Hello from bytes", result.text_blocks[0].text)
|
||||
|
||||
|
||||
class DocumentParserValidationTests(TestCase):
|
||||
"""Tests for parser input validation."""
|
||||
|
||||
def setUp(self):
|
||||
self.parser = DocumentParser()
|
||||
|
||||
def test_unsupported_format_raises(self):
|
||||
with tempfile.NamedTemporaryFile(suffix=".xyz", delete=False) as f:
|
||||
f.write(b"data")
|
||||
path = f.name
|
||||
|
||||
try:
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
self.parser.parse(path, "xyz")
|
||||
self.assertIn("Unsupported file type", str(ctx.exception))
|
||||
finally:
|
||||
os.unlink(path)
|
||||
|
||||
def test_file_type_normalization(self):
|
||||
"""File type should be normalized (lowercase, no dot)."""
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
||||
f.write("test")
|
||||
path = f.name
|
||||
|
||||
try:
|
||||
result = self.parser.parse(path, ".TXT")
|
||||
self.assertEqual(result.file_type, "txt")
|
||||
finally:
|
||||
os.unlink(path)
|
||||
|
||||
|
||||
class SupportedExtensionsTests(TestCase):
|
||||
"""Tests for supported extension sets."""
|
||||
|
||||
def test_pymupdf_includes_pdf(self):
|
||||
self.assertIn("pdf", PYMUPDF_EXTENSIONS)
|
||||
|
||||
def test_pymupdf_includes_epub(self):
|
||||
self.assertIn("epub", PYMUPDF_EXTENSIONS)
|
||||
|
||||
def test_pymupdf_includes_docx(self):
|
||||
self.assertIn("docx", PYMUPDF_EXTENSIONS)
|
||||
|
||||
def test_pymupdf_includes_pptx(self):
|
||||
self.assertIn("pptx", PYMUPDF_EXTENSIONS)
|
||||
|
||||
def test_plaintext_includes_txt(self):
|
||||
self.assertIn("txt", PLAINTEXT_EXTENSIONS)
|
||||
|
||||
def test_plaintext_includes_md(self):
|
||||
self.assertIn("md", PLAINTEXT_EXTENSIONS)
|
||||
|
||||
def test_image_includes_png(self):
|
||||
self.assertIn("png", IMAGE_EXTENSIONS)
|
||||
|
||||
def test_image_includes_jpg(self):
|
||||
self.assertIn("jpg", IMAGE_EXTENSIONS)
|
||||
103
mnemosyne/library/tests/test_pipeline.py
Normal file
103
mnemosyne/library/tests/test_pipeline.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""
|
||||
Tests for the embedding pipeline orchestrator.
|
||||
|
||||
Pipeline tests mock external dependencies (Neo4j, S3, LLM APIs).
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from library.services.pipeline import (
|
||||
CHUNK_S3_KEY,
|
||||
IMAGE_S3_KEY,
|
||||
ORIGINAL_S3_KEY,
|
||||
EmbeddingPipeline,
|
||||
)
|
||||
|
||||
|
||||
class S3KeyPatternTests(TestCase):
|
||||
"""Tests for S3 key pattern formatting."""
|
||||
|
||||
def test_original_key_format(self):
|
||||
key = ORIGINAL_S3_KEY.format(item_uid="abc123", ext="pdf")
|
||||
self.assertEqual(key, "items/abc123/original.pdf")
|
||||
|
||||
def test_chunk_key_format(self):
|
||||
key = CHUNK_S3_KEY.format(item_uid="abc123", index=5)
|
||||
self.assertEqual(key, "chunks/abc123/chunk_5.txt")
|
||||
|
||||
def test_image_key_format(self):
|
||||
key = IMAGE_S3_KEY.format(item_uid="abc123", index=2, ext="png")
|
||||
self.assertEqual(key, "images/abc123/2.png")
|
||||
|
||||
|
||||
class EmbeddingPipelineInitTests(TestCase):
|
||||
"""Tests for pipeline initialization."""
|
||||
|
||||
def test_init_without_user(self):
|
||||
pipeline = EmbeddingPipeline()
|
||||
self.assertIsNone(pipeline.user)
|
||||
|
||||
def test_init_with_user(self):
|
||||
user = MagicMock()
|
||||
pipeline = EmbeddingPipeline(user=user)
|
||||
self.assertEqual(pipeline.user, user)
|
||||
|
||||
|
||||
class PipelineItemNotFoundTests(TestCase):
|
||||
"""Tests for handling missing items."""
|
||||
|
||||
@patch("library.services.pipeline.Item")
|
||||
def test_process_nonexistent_item_raises(self, mock_item_cls):
|
||||
mock_item_cls.nodes.get.side_effect = Exception("Not found")
|
||||
|
||||
pipeline = EmbeddingPipeline()
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
pipeline.process_item("nonexistent-uid")
|
||||
self.assertIn("Item not found", str(ctx.exception))
|
||||
|
||||
@patch("library.services.pipeline.Item")
|
||||
def test_reprocess_nonexistent_item_raises(self, mock_item_cls):
|
||||
mock_item_cls.nodes.get.side_effect = Exception("Not found")
|
||||
|
||||
pipeline = EmbeddingPipeline()
|
||||
with self.assertRaises(ValueError):
|
||||
pipeline.reprocess_item("nonexistent-uid")
|
||||
|
||||
|
||||
class PipelineNoEmbeddingModelTests(TestCase):
|
||||
"""Tests for handling missing system embedding model."""
|
||||
|
||||
@patch("library.services.pipeline.LLMModel")
|
||||
@patch("library.services.pipeline.default_storage")
|
||||
@patch("library.services.pipeline.DocumentParser")
|
||||
def test_no_embedding_model_raises(self, mock_parser, mock_storage, mock_llm):
|
||||
"""Pipeline raises ValueError if no system embedding model is configured."""
|
||||
mock_llm.get_system_embedding_model.return_value = None
|
||||
|
||||
# Mock item
|
||||
mock_item = MagicMock()
|
||||
mock_item.uid = "test-uid"
|
||||
mock_item.title = "Test"
|
||||
mock_item.file_type = "txt"
|
||||
mock_item.s3_key = "items/test-uid/original.txt"
|
||||
mock_item.embedding_status = "pending"
|
||||
mock_item.chunks.all.return_value = []
|
||||
mock_item.images.all.return_value = []
|
||||
|
||||
with patch("library.services.pipeline.Item") as mock_item_cls:
|
||||
mock_item_cls.nodes.get.return_value = mock_item
|
||||
|
||||
# Mock S3 read
|
||||
mock_storage.open.return_value.__enter__ = MagicMock(
|
||||
return_value=MagicMock(read=MagicMock(return_value=b"test content"))
|
||||
)
|
||||
mock_storage.open.return_value.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
pipeline = EmbeddingPipeline()
|
||||
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
pipeline.process_item("test-uid")
|
||||
|
||||
self.assertIn("No system embedding model", str(ctx.exception))
|
||||
86
mnemosyne/library/tests/test_tasks.py
Normal file
86
mnemosyne/library/tests/test_tasks.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
Tests for Celery embedding tasks.
|
||||
|
||||
Tasks are tested with CELERY_TASK_ALWAYS_EAGER=True for synchronous execution.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
|
||||
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
|
||||
class EmbedItemTaskTests(TestCase):
|
||||
"""Tests for the embed_item task."""
|
||||
|
||||
@patch("library.tasks.EmbeddingPipeline")
|
||||
def test_embed_item_success(self, mock_pipeline_cls):
|
||||
from library.tasks import embed_item
|
||||
|
||||
mock_pipeline = MagicMock()
|
||||
mock_pipeline.process_item.return_value = {
|
||||
"chunks_created": 10,
|
||||
"images_stored": 2,
|
||||
"model_name": "test-model",
|
||||
}
|
||||
mock_pipeline_cls.return_value = mock_pipeline
|
||||
|
||||
result = embed_item("test-uid-123")
|
||||
|
||||
self.assertTrue(result["success"])
|
||||
self.assertEqual(result["item_uid"], "test-uid-123")
|
||||
mock_pipeline.process_item.assert_called_once()
|
||||
|
||||
@patch("library.tasks.EmbeddingPipeline")
|
||||
def test_embed_item_failure(self, mock_pipeline_cls):
|
||||
from library.tasks import embed_item
|
||||
|
||||
mock_pipeline = MagicMock()
|
||||
mock_pipeline.process_item.side_effect = ValueError("Item not found")
|
||||
mock_pipeline_cls.return_value = mock_pipeline
|
||||
|
||||
result = embed_item("nonexistent-uid")
|
||||
|
||||
self.assertFalse(result["success"])
|
||||
self.assertIn("error", result)
|
||||
|
||||
|
||||
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
|
||||
class ReembedItemTaskTests(TestCase):
|
||||
"""Tests for the reembed_item task."""
|
||||
|
||||
@patch("library.tasks.EmbeddingPipeline")
|
||||
def test_reembed_item_success(self, mock_pipeline_cls):
|
||||
from library.tasks import reembed_item
|
||||
|
||||
mock_pipeline = MagicMock()
|
||||
mock_pipeline.reprocess_item.return_value = {
|
||||
"chunks_created": 5,
|
||||
"images_stored": 1,
|
||||
"model_name": "test-model",
|
||||
}
|
||||
mock_pipeline_cls.return_value = mock_pipeline
|
||||
|
||||
result = reembed_item("test-uid-123")
|
||||
|
||||
self.assertTrue(result["success"])
|
||||
mock_pipeline.reprocess_item.assert_called_once()
|
||||
|
||||
|
||||
class ResolveUserTests(TestCase):
|
||||
"""Tests for the _resolve_user helper."""
|
||||
|
||||
def test_none_user_id(self):
|
||||
from library.tasks import _resolve_user
|
||||
|
||||
self.assertIsNone(_resolve_user(None))
|
||||
|
||||
def test_zero_user_id(self):
|
||||
from library.tasks import _resolve_user
|
||||
|
||||
self.assertIsNone(_resolve_user(0))
|
||||
|
||||
def test_invalid_user_id(self):
|
||||
from library.tasks import _resolve_user
|
||||
|
||||
self.assertIsNone(_resolve_user(999999))
|
||||
121
mnemosyne/library/tests/test_text_utils.py
Normal file
121
mnemosyne/library/tests/test_text_utils.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
Tests for text sanitization utilities.
|
||||
"""
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from library.services.text_utils import (
|
||||
clean_pdf_artifacts,
|
||||
remove_excessive_whitespace,
|
||||
sanitize_text,
|
||||
truncate_text,
|
||||
)
|
||||
|
||||
|
||||
class SanitizeTextTests(TestCase):
|
||||
"""Tests for the sanitize_text function."""
|
||||
|
||||
def test_empty_string(self):
|
||||
self.assertEqual(sanitize_text("", log_changes=False), "")
|
||||
|
||||
def test_none_input(self):
|
||||
self.assertIsNone(sanitize_text(None, log_changes=False))
|
||||
|
||||
def test_clean_text_unchanged(self):
|
||||
text = "Hello, this is clean text."
|
||||
self.assertEqual(sanitize_text(text, log_changes=False), text)
|
||||
|
||||
def test_removes_null_bytes(self):
|
||||
text = "Hello\x00World"
|
||||
result = sanitize_text(text, log_changes=False)
|
||||
self.assertNotIn("\x00", result)
|
||||
self.assertEqual(result, "HelloWorld")
|
||||
|
||||
def test_removes_control_characters(self):
|
||||
text = "Hello\x07World\x0eTest"
|
||||
result = sanitize_text(text, log_changes=False)
|
||||
self.assertNotIn("\x07", result)
|
||||
self.assertNotIn("\x0e", result)
|
||||
|
||||
def test_preserves_newlines_and_tabs(self):
|
||||
text = "Hello\nWorld\tTest\r\n"
|
||||
result = sanitize_text(text, log_changes=False)
|
||||
self.assertIn("\n", result)
|
||||
self.assertIn("\t", result)
|
||||
|
||||
def test_removes_zero_width_characters(self):
|
||||
text = "Hello\u200bWorld"
|
||||
result = sanitize_text(text, log_changes=False)
|
||||
self.assertNotIn("\u200b", result)
|
||||
|
||||
def test_normalizes_unicode(self):
|
||||
# é as combining characters vs. precomposed
|
||||
combining = "e\u0301" # e + combining acute
|
||||
result = sanitize_text(combining, log_changes=False)
|
||||
self.assertEqual(result, "\u00e9") # precomposed é
|
||||
|
||||
def test_cleans_pdf_ligatures(self):
|
||||
text = "finding the flow of effort"
|
||||
result = sanitize_text(text, log_changes=False)
|
||||
self.assertIn("fi", result)
|
||||
self.assertIn("fl", result)
|
||||
self.assertIn("ff", result)
|
||||
|
||||
|
||||
class CleanPdfArtifactsTests(TestCase):
|
||||
"""Tests for clean_pdf_artifacts."""
|
||||
|
||||
def test_replaces_smart_quotes(self):
|
||||
text = "\u201cHello\u201d \u2018World\u2019"
|
||||
result = clean_pdf_artifacts(text)
|
||||
self.assertEqual(result, '"Hello" \'World\'')
|
||||
|
||||
def test_replaces_dashes(self):
|
||||
text = "word\u2013word\u2014end"
|
||||
result = clean_pdf_artifacts(text)
|
||||
self.assertEqual(result, "word-word-end")
|
||||
|
||||
def test_replaces_ellipsis(self):
|
||||
text = "wait\u2026"
|
||||
result = clean_pdf_artifacts(text)
|
||||
self.assertEqual(result, "wait...")
|
||||
|
||||
def test_replaces_nbsp(self):
|
||||
text = "non\u00a0breaking"
|
||||
result = clean_pdf_artifacts(text)
|
||||
self.assertEqual(result, "non breaking")
|
||||
|
||||
|
||||
class RemoveExcessiveWhitespaceTests(TestCase):
|
||||
"""Tests for remove_excessive_whitespace."""
|
||||
|
||||
def test_collapses_spaces(self):
|
||||
self.assertEqual(remove_excessive_whitespace("a b"), "a b")
|
||||
|
||||
def test_collapses_newlines(self):
|
||||
self.assertEqual(
|
||||
remove_excessive_whitespace("a\n\n\n\nb"), "a\n\nb"
|
||||
)
|
||||
|
||||
def test_strips_line_whitespace(self):
|
||||
self.assertEqual(
|
||||
remove_excessive_whitespace(" hello \n world "),
|
||||
"hello\nworld",
|
||||
)
|
||||
|
||||
|
||||
class TruncateTextTests(TestCase):
|
||||
"""Tests for truncate_text."""
|
||||
|
||||
def test_short_text_unchanged(self):
|
||||
self.assertEqual(truncate_text("hello", 100), "hello")
|
||||
|
||||
def test_truncates_at_word_boundary(self):
|
||||
text = "hello beautiful world"
|
||||
result = truncate_text(text, 15)
|
||||
self.assertTrue(result.endswith("..."))
|
||||
self.assertLessEqual(len(result), 15)
|
||||
|
||||
def test_custom_suffix(self):
|
||||
result = truncate_text("hello beautiful world", 15, suffix="…")
|
||||
self.assertTrue(result.endswith("…"))
|
||||
56
mnemosyne/library/urls.py
Normal file
56
mnemosyne/library/urls.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""
|
||||
URL patterns for the library app.
|
||||
|
||||
Provides both custom admin views (HTML CRUD) and DRF API endpoints.
|
||||
"""
|
||||
|
||||
from django.urls import include, path
|
||||
|
||||
from . import views
|
||||
|
||||
app_name = "library"
|
||||
|
||||
urlpatterns = [
|
||||
# Embedding Pipeline Dashboard
|
||||
path("embedding/", views.embedding_dashboard, name="embedding-dashboard"),
|
||||
path("embedding/embed-all/", views.embed_all_pending, name="embed-all-pending"),
|
||||
# Library CRUD
|
||||
path("", views.library_list, name="library-list"),
|
||||
path("create/", views.library_create, name="library-create"),
|
||||
path("<str:uid>/", views.library_detail, name="library-detail"),
|
||||
path("<str:uid>/edit/", views.library_edit, name="library-edit"),
|
||||
path("<str:uid>/delete/", views.library_delete, name="library-delete"),
|
||||
# Collection CRUD
|
||||
path(
|
||||
"<str:library_uid>/collections/create/",
|
||||
views.collection_create,
|
||||
name="collection-create",
|
||||
),
|
||||
path(
|
||||
"collections/<str:uid>/",
|
||||
views.collection_detail,
|
||||
name="collection-detail",
|
||||
),
|
||||
path(
|
||||
"collections/<str:uid>/edit/",
|
||||
views.collection_edit,
|
||||
name="collection-edit",
|
||||
),
|
||||
path(
|
||||
"collections/<str:uid>/delete/",
|
||||
views.collection_delete,
|
||||
name="collection-delete",
|
||||
),
|
||||
# Item CRUD
|
||||
path(
|
||||
"collections/<str:collection_uid>/items/create/",
|
||||
views.item_create,
|
||||
name="item-create",
|
||||
),
|
||||
path("items/<str:uid>/", views.item_detail, name="item-detail"),
|
||||
path("items/<str:uid>/edit/", views.item_edit, name="item-edit"),
|
||||
path("items/<str:uid>/reembed/", views.item_reembed, name="item-reembed"),
|
||||
path("items/<str:uid>/delete/", views.item_delete, name="item-delete"),
|
||||
# DRF API
|
||||
path("api/", include("library.api.urls")),
|
||||
]
|
||||
23
mnemosyne/library/utils.py
Normal file
23
mnemosyne/library/utils.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
Utility helpers for the library app.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def neo4j_available():
|
||||
"""
|
||||
Check whether Neo4j is reachable.
|
||||
|
||||
Returns True if a simple Cypher query succeeds, False otherwise.
|
||||
Used to guard views/tests that require Neo4j.
|
||||
"""
|
||||
try:
|
||||
from neomodel import db
|
||||
|
||||
db.cypher_query("RETURN 1")
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
587
mnemosyne/library/views.py
Normal file
587
mnemosyne/library/views.py
Normal file
@@ -0,0 +1,587 @@
|
||||
"""
|
||||
Custom admin views for Library, Collection, and Item CRUD.
|
||||
|
||||
Since neomodel StructuredNodes cannot use Django's standard ModelAdmin,
|
||||
these FBVs provide CRUD operations rendered within Themis's template structure.
|
||||
All views require login.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
|
||||
from django.contrib import messages
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.core.files.base import ContentFile
|
||||
from django.core.files.storage import default_storage
|
||||
from django.shortcuts import redirect, render
|
||||
|
||||
from .content_types import get_library_type_config
|
||||
from .forms import CollectionForm, ItemForm, LibraryForm
|
||||
from .utils import neo4j_available
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Library views
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@login_required
|
||||
def library_list(request):
|
||||
"""List all libraries."""
|
||||
libraries = []
|
||||
error = None
|
||||
if neo4j_available():
|
||||
try:
|
||||
from .models import Library
|
||||
|
||||
libraries = Library.nodes.order_by("name")
|
||||
except Exception as e:
|
||||
error = f"Could not connect to Neo4j: {e}"
|
||||
logger.error(error)
|
||||
else:
|
||||
error = "Neo4j is not available."
|
||||
return render(
|
||||
request,
|
||||
"library/library_list.html",
|
||||
{"libraries": libraries, "error": error},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
def library_create(request):
|
||||
"""Create a new library."""
|
||||
if request.method == "POST":
|
||||
form = LibraryForm(request.POST)
|
||||
if form.is_valid():
|
||||
try:
|
||||
from .models import Library
|
||||
|
||||
# If content-type fields are empty, populate from defaults
|
||||
library_type = form.cleaned_data["library_type"]
|
||||
defaults = get_library_type_config(library_type)
|
||||
|
||||
lib = Library(
|
||||
name=form.cleaned_data["name"],
|
||||
library_type=library_type,
|
||||
description=form.cleaned_data.get("description", ""),
|
||||
chunking_config=defaults["chunking_config"],
|
||||
embedding_instruction=(
|
||||
form.cleaned_data.get("embedding_instruction")
|
||||
or defaults["embedding_instruction"]
|
||||
),
|
||||
reranker_instruction=(
|
||||
form.cleaned_data.get("reranker_instruction")
|
||||
or defaults["reranker_instruction"]
|
||||
),
|
||||
llm_context_prompt=(
|
||||
form.cleaned_data.get("llm_context_prompt")
|
||||
or defaults["llm_context_prompt"]
|
||||
),
|
||||
)
|
||||
lib.save()
|
||||
messages.success(request, f'Library "{lib.name}" created.')
|
||||
return redirect("library:library-detail", uid=lib.uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Error creating library: {e}")
|
||||
else:
|
||||
form = LibraryForm()
|
||||
return render(request, "library/library_form.html", {"form": form, "editing": False})
|
||||
|
||||
|
||||
@login_required
|
||||
def library_detail(request, uid):
|
||||
"""View library details and its collections."""
|
||||
try:
|
||||
from .models import Library
|
||||
|
||||
lib = Library.nodes.get(uid=uid)
|
||||
collections = lib.collections.all()
|
||||
except Exception as e:
|
||||
messages.error(request, f"Library not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
return render(
|
||||
request,
|
||||
"library/library_detail.html",
|
||||
{"library": lib, "collections": collections},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
def library_edit(request, uid):
|
||||
"""Edit an existing library."""
|
||||
try:
|
||||
from .models import Library
|
||||
|
||||
lib = Library.nodes.get(uid=uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Library not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
|
||||
if request.method == "POST":
|
||||
form = LibraryForm(request.POST)
|
||||
if form.is_valid():
|
||||
try:
|
||||
lib.name = form.cleaned_data["name"]
|
||||
lib.library_type = form.cleaned_data["library_type"]
|
||||
lib.description = form.cleaned_data.get("description", "")
|
||||
lib.embedding_instruction = form.cleaned_data.get(
|
||||
"embedding_instruction", ""
|
||||
)
|
||||
lib.reranker_instruction = form.cleaned_data.get(
|
||||
"reranker_instruction", ""
|
||||
)
|
||||
lib.llm_context_prompt = form.cleaned_data.get(
|
||||
"llm_context_prompt", ""
|
||||
)
|
||||
lib.save()
|
||||
messages.success(request, f'Library "{lib.name}" updated.')
|
||||
return redirect("library:library-detail", uid=lib.uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Error updating library: {e}")
|
||||
else:
|
||||
form = LibraryForm(
|
||||
initial={
|
||||
"name": lib.name,
|
||||
"library_type": lib.library_type,
|
||||
"description": lib.description,
|
||||
"embedding_instruction": lib.embedding_instruction,
|
||||
"reranker_instruction": lib.reranker_instruction,
|
||||
"llm_context_prompt": lib.llm_context_prompt,
|
||||
}
|
||||
)
|
||||
return render(
|
||||
request,
|
||||
"library/library_form.html",
|
||||
{"form": form, "editing": True, "library": lib},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
def library_delete(request, uid):
|
||||
"""Delete a library (and confirm)."""
|
||||
try:
|
||||
from .models import Library
|
||||
|
||||
lib = Library.nodes.get(uid=uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Library not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
|
||||
if request.method == "POST":
|
||||
name = lib.name
|
||||
lib.delete()
|
||||
messages.success(request, f'Library "{name}" deleted.')
|
||||
return redirect("library:library-list")
|
||||
return render(request, "library/library_confirm_delete.html", {"library": lib})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Collection views
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@login_required
|
||||
def collection_create(request, library_uid):
|
||||
"""Create a new collection within a library."""
|
||||
try:
|
||||
from .models import Library
|
||||
|
||||
lib = Library.nodes.get(uid=library_uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Library not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
|
||||
if request.method == "POST":
|
||||
form = CollectionForm(request.POST)
|
||||
if form.is_valid():
|
||||
try:
|
||||
from .models import Collection
|
||||
|
||||
col = Collection(
|
||||
name=form.cleaned_data["name"],
|
||||
description=form.cleaned_data.get("description", ""),
|
||||
)
|
||||
col.save()
|
||||
lib.collections.connect(col)
|
||||
col.library.connect(lib)
|
||||
messages.success(request, f'Collection "{col.name}" created.')
|
||||
return redirect("library:collection-detail", uid=col.uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Error creating collection: {e}")
|
||||
else:
|
||||
form = CollectionForm()
|
||||
return render(
|
||||
request,
|
||||
"library/collection_form.html",
|
||||
{"form": form, "library": lib, "editing": False},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
def collection_detail(request, uid):
|
||||
"""View collection details and its items."""
|
||||
try:
|
||||
from .models import Collection
|
||||
|
||||
col = Collection.nodes.get(uid=uid)
|
||||
items = col.items.all()
|
||||
libraries = col.library.all()
|
||||
library = libraries[0] if libraries else None
|
||||
except Exception as e:
|
||||
messages.error(request, f"Collection not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
return render(
|
||||
request,
|
||||
"library/collection_detail.html",
|
||||
{"collection": col, "items": items, "library": library},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
def collection_edit(request, uid):
|
||||
"""Edit an existing collection."""
|
||||
try:
|
||||
from .models import Collection
|
||||
|
||||
col = Collection.nodes.get(uid=uid)
|
||||
libraries = col.library.all()
|
||||
library = libraries[0] if libraries else None
|
||||
except Exception as e:
|
||||
messages.error(request, f"Collection not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
|
||||
if request.method == "POST":
|
||||
form = CollectionForm(request.POST)
|
||||
if form.is_valid():
|
||||
try:
|
||||
col.name = form.cleaned_data["name"]
|
||||
col.description = form.cleaned_data.get("description", "")
|
||||
col.save()
|
||||
messages.success(request, f'Collection "{col.name}" updated.')
|
||||
return redirect("library:collection-detail", uid=col.uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Error updating collection: {e}")
|
||||
else:
|
||||
form = CollectionForm(
|
||||
initial={
|
||||
"name": col.name,
|
||||
"description": col.description,
|
||||
}
|
||||
)
|
||||
return render(
|
||||
request,
|
||||
"library/collection_form.html",
|
||||
{"form": form, "collection": col, "library": library, "editing": True},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
def collection_delete(request, uid):
|
||||
"""Delete a collection."""
|
||||
try:
|
||||
from .models import Collection
|
||||
|
||||
col = Collection.nodes.get(uid=uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Collection not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
|
||||
if request.method == "POST":
|
||||
name = col.name
|
||||
col.delete()
|
||||
messages.success(request, f'Collection "{name}" deleted.')
|
||||
return redirect("library:library-list")
|
||||
return render(
|
||||
request,
|
||||
"library/collection_confirm_delete.html",
|
||||
{"collection": col},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Item views
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@login_required
|
||||
def item_create(request, collection_uid):
|
||||
"""Create a new item within a collection, with optional file upload."""
|
||||
try:
|
||||
from .models import Collection
|
||||
|
||||
col = Collection.nodes.get(uid=collection_uid)
|
||||
libraries = col.library.all()
|
||||
library = libraries[0] if libraries else None
|
||||
except Exception as e:
|
||||
messages.error(request, f"Collection not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
|
||||
if request.method == "POST":
|
||||
form = ItemForm(request.POST, request.FILES)
|
||||
if form.is_valid():
|
||||
try:
|
||||
from .models import Item
|
||||
|
||||
uploaded_file = request.FILES.get("file")
|
||||
file_type = form.cleaned_data.get("file_type", "")
|
||||
|
||||
# Infer file_type from upload if not explicitly set
|
||||
if uploaded_file and not file_type:
|
||||
_, ext = os.path.splitext(uploaded_file.name)
|
||||
file_type = ext.lstrip(".").lower()
|
||||
|
||||
item = Item(
|
||||
title=form.cleaned_data["title"],
|
||||
item_type=form.cleaned_data.get("item_type", ""),
|
||||
file_type=file_type,
|
||||
embedding_status="pending",
|
||||
)
|
||||
|
||||
# Handle file upload
|
||||
if uploaded_file:
|
||||
file_data = uploaded_file.read()
|
||||
item.file_size = len(file_data)
|
||||
item.content_hash = hashlib.sha256(file_data).hexdigest()
|
||||
item.save()
|
||||
|
||||
# Store in S3
|
||||
s3_key = f"items/{item.uid}/original.{file_type}"
|
||||
default_storage.save(s3_key, ContentFile(file_data))
|
||||
item.s3_key = s3_key
|
||||
item.save()
|
||||
else:
|
||||
item.save()
|
||||
|
||||
col.items.connect(item)
|
||||
|
||||
# Auto-trigger embedding if file uploaded and checkbox set
|
||||
auto_embed = form.cleaned_data.get("auto_embed", True)
|
||||
if uploaded_file and auto_embed:
|
||||
try:
|
||||
from .tasks import embed_item
|
||||
|
||||
task = embed_item.delay(item.uid, request.user.id)
|
||||
messages.info(
|
||||
request,
|
||||
f"Embedding queued (task: {task.id})",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to queue embedding: %s", exc)
|
||||
|
||||
messages.success(request, f'Item "{item.title}" created.')
|
||||
return redirect("library:item-detail", uid=item.uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Error creating item: {e}")
|
||||
else:
|
||||
form = ItemForm(initial={"auto_embed": True})
|
||||
return render(
|
||||
request,
|
||||
"library/item_form.html",
|
||||
{"form": form, "collection": col, "library": library, "editing": False},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
def item_detail(request, uid):
|
||||
"""View item details."""
|
||||
try:
|
||||
from .models import Item
|
||||
|
||||
item = Item.nodes.get(uid=uid)
|
||||
chunks = item.chunks.all()
|
||||
images = item.images.all()
|
||||
concepts = item.concepts.all()
|
||||
except Exception as e:
|
||||
messages.error(request, f"Item not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
return render(
|
||||
request,
|
||||
"library/item_detail.html",
|
||||
{"item": item, "chunks": chunks, "images": images, "concepts": concepts},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
def item_edit(request, uid):
|
||||
"""Edit an existing item."""
|
||||
try:
|
||||
from .models import Item
|
||||
|
||||
item = Item.nodes.get(uid=uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Item not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
|
||||
if request.method == "POST":
|
||||
form = ItemForm(request.POST)
|
||||
if form.is_valid():
|
||||
try:
|
||||
item.title = form.cleaned_data["title"]
|
||||
item.item_type = form.cleaned_data.get("item_type", "")
|
||||
item.file_type = form.cleaned_data.get("file_type", "")
|
||||
item.save()
|
||||
messages.success(request, f'Item "{item.title}" updated.')
|
||||
return redirect("library:item-detail", uid=item.uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Error updating item: {e}")
|
||||
else:
|
||||
form = ItemForm(
|
||||
initial={
|
||||
"title": item.title,
|
||||
"item_type": item.item_type,
|
||||
"file_type": item.file_type,
|
||||
}
|
||||
)
|
||||
return render(
|
||||
request,
|
||||
"library/item_form.html",
|
||||
{"form": form, "item": item, "editing": True},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
def item_reembed(request, uid):
|
||||
"""Trigger re-embedding for an item."""
|
||||
try:
|
||||
from .models import Item
|
||||
|
||||
item = Item.nodes.get(uid=uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Item not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
|
||||
if request.method == "POST":
|
||||
try:
|
||||
from .tasks import reembed_item
|
||||
|
||||
task = reembed_item.delay(uid, request.user.id)
|
||||
messages.info(request, f"Re-embedding queued for \"{item.title}\" (task: {task.id})")
|
||||
except Exception as exc:
|
||||
messages.error(request, f"Failed to queue re-embedding: {exc}")
|
||||
return redirect("library:item-detail", uid=uid)
|
||||
|
||||
return redirect("library:item-detail", uid=uid)
|
||||
|
||||
|
||||
@login_required
|
||||
def item_delete(request, uid):
|
||||
"""Delete an item."""
|
||||
try:
|
||||
from .models import Item
|
||||
|
||||
item = Item.nodes.get(uid=uid)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Item not found: {e}")
|
||||
return redirect("library:library-list")
|
||||
|
||||
if request.method == "POST":
|
||||
title = item.title
|
||||
item.delete()
|
||||
messages.success(request, f'Item "{title}" deleted.')
|
||||
return redirect("library:library-list")
|
||||
return render(request, "library/item_confirm_delete.html", {"item": item})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Embedding Pipeline Dashboard
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@login_required
|
||||
def embedding_dashboard(request):
|
||||
"""
|
||||
Embedding pipeline dashboard — system model status, item embedding
|
||||
progress, knowledge graph node counts, and batch actions.
|
||||
"""
|
||||
context = {
|
||||
"system_embedding_model": None,
|
||||
"system_chat_model": None,
|
||||
"system_reranker_model": None,
|
||||
"status_counts": {},
|
||||
"node_counts": {},
|
||||
"total_items": 0,
|
||||
"embedded_chunks": 0,
|
||||
"total_chunks": 0,
|
||||
"neo4j_available": False,
|
||||
}
|
||||
|
||||
# Get system models from LLM Manager
|
||||
try:
|
||||
from llm_manager.models import LLMModel
|
||||
|
||||
context["system_embedding_model"] = LLMModel.get_system_embedding_model()
|
||||
context["system_chat_model"] = LLMModel.get_system_chat_model()
|
||||
context["system_reranker_model"] = LLMModel.get_system_reranker_model()
|
||||
except Exception as exc:
|
||||
logger.warning("Could not load system models: %s", exc)
|
||||
|
||||
# Get item status counts and node counts from Neo4j
|
||||
if neo4j_available():
|
||||
context["neo4j_available"] = True
|
||||
try:
|
||||
from neomodel import db
|
||||
|
||||
for status in ["pending", "processing", "completed", "failed"]:
|
||||
results, _ = db.cypher_query(
|
||||
"MATCH (i:Item {embedding_status: $status}) RETURN count(i)",
|
||||
{"status": status},
|
||||
)
|
||||
context["status_counts"][status] = results[0][0] if results else 0
|
||||
|
||||
results, _ = db.cypher_query("MATCH (i:Item) RETURN count(i)")
|
||||
context["total_items"] = results[0][0] if results else 0
|
||||
|
||||
for label in ["Library", "Collection", "Item", "Chunk", "Concept", "Image", "ImageEmbedding"]:
|
||||
results, _ = db.cypher_query(f"MATCH (n:{label}) RETURN count(n)")
|
||||
context["node_counts"][label] = results[0][0] if results else 0
|
||||
|
||||
results, _ = db.cypher_query(
|
||||
"MATCH (c:Chunk) WHERE c.embedding IS NOT NULL RETURN count(c)"
|
||||
)
|
||||
context["embedded_chunks"] = results[0][0] if results else 0
|
||||
context["total_chunks"] = context["node_counts"].get("Chunk", 0)
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("Could not query Neo4j for dashboard: %s", exc)
|
||||
messages.warning(request, f"Neo4j query error: {exc}")
|
||||
|
||||
return render(request, "library/embedding_dashboard.html", context)
|
||||
|
||||
|
||||
@login_required
|
||||
def embed_all_pending(request):
|
||||
"""
|
||||
Trigger embedding for all pending items with uploaded files.
|
||||
POST-only action, redirects back to dashboard.
|
||||
"""
|
||||
if request.method != "POST":
|
||||
return redirect("library:embedding-dashboard")
|
||||
|
||||
try:
|
||||
from neomodel import db
|
||||
|
||||
results, _ = db.cypher_query(
|
||||
"MATCH (i:Item {embedding_status: 'pending'}) "
|
||||
"WHERE i.s3_key IS NOT NULL AND i.s3_key <> '' "
|
||||
"RETURN i.uid"
|
||||
)
|
||||
item_uids = [row[0] for row in results]
|
||||
|
||||
if not item_uids:
|
||||
messages.info(request, "No pending items with files to embed.")
|
||||
else:
|
||||
from .tasks import batch_embed_items
|
||||
|
||||
task = batch_embed_items.delay(item_uids, request.user.id)
|
||||
messages.success(
|
||||
request,
|
||||
f"Queued embedding for {len(item_uids)} items (task: {task.id})",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to trigger batch embedding: %s", exc, exc_info=True)
|
||||
messages.error(request, f"Failed to trigger embedding: {exc}")
|
||||
|
||||
return redirect("library:embedding-dashboard")
|
||||
1
mnemosyne/llm_manager/__init__.py
Normal file
1
mnemosyne/llm_manager/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
default_app_config = "llm_manager.apps.LLMManagerConfig"
|
||||
326
mnemosyne/llm_manager/admin.py
Normal file
326
mnemosyne/llm_manager/admin.py
Normal file
@@ -0,0 +1,326 @@
|
||||
"""
|
||||
Admin configuration for LLM Manager — ported from Spelunker.
|
||||
|
||||
Adds system model actions for embedding, chat, and reranker models.
|
||||
"""
|
||||
|
||||
from django.contrib import admin, messages
|
||||
from django.db import transaction
|
||||
from django.utils.html import format_html
|
||||
|
||||
from .models import LLMApi, LLMModel, LLMUsage
|
||||
from .services import test_llm_api
|
||||
|
||||
|
||||
@admin.register(LLMApi)
|
||||
class LLMApiAdmin(admin.ModelAdmin):
|
||||
list_display = (
|
||||
"name",
|
||||
"api_type",
|
||||
"base_url",
|
||||
"is_active",
|
||||
"last_test_status",
|
||||
"last_tested_at",
|
||||
"supports_streaming",
|
||||
"timeout_seconds",
|
||||
"created_at",
|
||||
)
|
||||
list_filter = ("api_type", "is_active", "last_test_status", "supports_streaming")
|
||||
search_fields = ("name", "base_url")
|
||||
readonly_fields = (
|
||||
"created_at",
|
||||
"updated_at",
|
||||
"last_tested_at",
|
||||
"last_test_status",
|
||||
"last_test_message",
|
||||
)
|
||||
actions = ["test_api_connection"]
|
||||
fieldsets = (
|
||||
("API Info", {"fields": ("name", "api_type", "base_url", "is_active")}),
|
||||
("Security", {"fields": ("api_key",)}),
|
||||
(
|
||||
"Advanced",
|
||||
{"fields": ("supports_streaming", "timeout_seconds", "max_retries", "created_by")},
|
||||
),
|
||||
(
|
||||
"Test Status",
|
||||
{"fields": ("last_tested_at", "last_test_status", "last_test_message")},
|
||||
),
|
||||
("Timestamps", {"fields": ("created_at", "updated_at")}),
|
||||
)
|
||||
|
||||
def test_api_connection(self, request, queryset):
|
||||
"""Test selected LLM API(s) and discover models."""
|
||||
success_count = 0
|
||||
failed_count = 0
|
||||
total_added = 0
|
||||
total_updated = 0
|
||||
total_deactivated = 0
|
||||
|
||||
for api in queryset:
|
||||
result = test_llm_api(api)
|
||||
|
||||
if result["success"]:
|
||||
success_count += 1
|
||||
total_added += result["models_added"]
|
||||
total_updated += result["models_updated"]
|
||||
total_deactivated += result["models_deactivated"]
|
||||
self.message_user(request, f"✓ {api.name}: {result['message']}", messages.SUCCESS)
|
||||
else:
|
||||
failed_count += 1
|
||||
self.message_user(request, f"✗ {api.name}: {result['error']}", messages.ERROR)
|
||||
|
||||
if success_count > 0:
|
||||
summary = (
|
||||
f"Tested {success_count} API(s). "
|
||||
f"Total: {total_added} added, {total_updated} updated, "
|
||||
f"{total_deactivated} deactivated."
|
||||
)
|
||||
self.message_user(request, summary, messages.SUCCESS)
|
||||
|
||||
if failed_count > 0:
|
||||
self.message_user(
|
||||
request,
|
||||
f"Failed to test {failed_count} API(s). Check logs.",
|
||||
messages.WARNING,
|
||||
)
|
||||
|
||||
test_api_connection.short_description = "Test API Connection and Discover Models"
|
||||
|
||||
|
||||
@admin.register(LLMModel)
|
||||
class LLMModelAdmin(admin.ModelAdmin):
|
||||
list_display = (
|
||||
"name",
|
||||
"api",
|
||||
"model_type",
|
||||
"vector_dimensions_display",
|
||||
"context_window",
|
||||
"input_cost_per_1k",
|
||||
"system_embedding_badge",
|
||||
"system_chat_badge",
|
||||
"system_reranker_badge",
|
||||
"is_active",
|
||||
"created_at",
|
||||
)
|
||||
list_filter = (
|
||||
"api",
|
||||
"model_type",
|
||||
"supports_cache",
|
||||
"supports_vision",
|
||||
"supports_multimodal",
|
||||
"is_active",
|
||||
"is_system_embedding_model",
|
||||
"is_system_chat_model",
|
||||
"is_system_reranker_model",
|
||||
)
|
||||
search_fields = ("name", "display_name", "api__name")
|
||||
readonly_fields = (
|
||||
"created_at",
|
||||
"updated_at",
|
||||
"is_system_embedding_model",
|
||||
"is_system_chat_model",
|
||||
"is_system_reranker_model",
|
||||
)
|
||||
actions = [
|
||||
"set_as_system_embedding_model",
|
||||
"set_as_system_chat_model",
|
||||
"set_as_system_reranker_model",
|
||||
]
|
||||
fieldsets = (
|
||||
("Model Info", {"fields": ("api", "name", "display_name", "model_type", "is_active")}),
|
||||
(
|
||||
"System Defaults",
|
||||
{
|
||||
"fields": (
|
||||
"is_system_embedding_model",
|
||||
"is_system_chat_model",
|
||||
"is_system_reranker_model",
|
||||
),
|
||||
"classes": ("collapse",),
|
||||
"description": (
|
||||
"System default models are set via admin actions. "
|
||||
"Only one model per type can be system default."
|
||||
),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Capabilities",
|
||||
{
|
||||
"fields": (
|
||||
"context_window",
|
||||
"max_output_tokens",
|
||||
"vector_dimensions",
|
||||
"supports_cache",
|
||||
"supports_vision",
|
||||
"supports_multimodal",
|
||||
"supports_function_calling",
|
||||
"supports_json_mode",
|
||||
),
|
||||
},
|
||||
),
|
||||
(
|
||||
"Pricing",
|
||||
{"fields": ("input_cost_per_1k", "output_cost_per_1k", "cached_cost_per_1k")},
|
||||
),
|
||||
("Timestamps", {"fields": ("created_at", "updated_at")}),
|
||||
)
|
||||
|
||||
def vector_dimensions_display(self, obj):
|
||||
if obj.model_type in ("embedding", "multimodal_embed") and obj.vector_dimensions:
|
||||
return format_html(
|
||||
'<span style="color: #0066cc; font-weight: bold;">{}</span>',
|
||||
obj.vector_dimensions,
|
||||
)
|
||||
elif obj.model_type in ("embedding", "multimodal_embed"):
|
||||
return format_html('<span style="color: #999;">Not set</span>')
|
||||
return "-"
|
||||
|
||||
vector_dimensions_display.short_description = "Dimensions"
|
||||
|
||||
def system_embedding_badge(self, obj):
|
||||
if obj.is_system_embedding_model and obj.model_type in ("embedding", "multimodal_embed"):
|
||||
return format_html(
|
||||
'<span style="background:#28a745;color:white;padding:3px 8px;'
|
||||
'border-radius:3px;font-weight:bold;">SYSTEM DEFAULT</span>'
|
||||
)
|
||||
return ""
|
||||
|
||||
system_embedding_badge.short_description = "Embed Default"
|
||||
|
||||
def system_chat_badge(self, obj):
|
||||
if obj.is_system_chat_model and obj.model_type == "chat":
|
||||
return format_html(
|
||||
'<span style="background:#007bff;color:white;padding:3px 8px;'
|
||||
'border-radius:3px;font-weight:bold;">SYSTEM DEFAULT</span>'
|
||||
)
|
||||
return ""
|
||||
|
||||
system_chat_badge.short_description = "Chat Default"
|
||||
|
||||
def system_reranker_badge(self, obj):
|
||||
if obj.is_system_reranker_model and obj.model_type == "reranker":
|
||||
return format_html(
|
||||
'<span style="background:#fd7e14;color:white;padding:3px 8px;'
|
||||
'border-radius:3px;font-weight:bold;">SYSTEM DEFAULT</span>'
|
||||
)
|
||||
return ""
|
||||
|
||||
system_reranker_badge.short_description = "Reranker Default"
|
||||
|
||||
# --- System model actions -----------------------------------------------
|
||||
|
||||
def _set_system_model(self, request, queryset, model_type, field_name, label):
|
||||
"""Generic helper for set-as-system-model admin actions."""
|
||||
if queryset.count() != 1:
|
||||
self.message_user(
|
||||
request,
|
||||
f"Please select exactly ONE model to set as system {label}.",
|
||||
messages.ERROR,
|
||||
)
|
||||
return
|
||||
|
||||
new_model = queryset.first()
|
||||
|
||||
valid_types = [model_type]
|
||||
if model_type == "embedding":
|
||||
valid_types = ["embedding", "multimodal_embed"]
|
||||
|
||||
if new_model.model_type not in valid_types:
|
||||
self.message_user(
|
||||
request,
|
||||
f'Only {label} models can be set as system {label}. '
|
||||
f'"{new_model.name}" is type: {new_model.model_type}',
|
||||
messages.ERROR,
|
||||
)
|
||||
return
|
||||
|
||||
if not new_model.is_active:
|
||||
self.message_user(
|
||||
request,
|
||||
f'Cannot set inactive model "{new_model.name}" as system {label}.',
|
||||
messages.ERROR,
|
||||
)
|
||||
return
|
||||
|
||||
with transaction.atomic():
|
||||
LLMModel.objects.filter(**{field_name: True}).update(**{field_name: False})
|
||||
setattr(new_model, field_name, True)
|
||||
new_model.save(update_fields=[field_name])
|
||||
|
||||
self.message_user(
|
||||
request,
|
||||
f"✓ {new_model.api.name}: {new_model.name} is now the system {label}.",
|
||||
messages.SUCCESS,
|
||||
)
|
||||
|
||||
def set_as_system_embedding_model(self, request, queryset):
|
||||
self._set_system_model(request, queryset, "embedding", "is_system_embedding_model", "embedding model")
|
||||
|
||||
set_as_system_embedding_model.short_description = "Set as System Embedding Model"
|
||||
|
||||
def set_as_system_chat_model(self, request, queryset):
|
||||
self._set_system_model(request, queryset, "chat", "is_system_chat_model", "chat model")
|
||||
|
||||
set_as_system_chat_model.short_description = "Set as System Chat Model"
|
||||
|
||||
def set_as_system_reranker_model(self, request, queryset):
|
||||
self._set_system_model(request, queryset, "reranker", "is_system_reranker_model", "reranker model")
|
||||
|
||||
set_as_system_reranker_model.short_description = "Set as System Reranker Model"
|
||||
|
||||
def save_model(self, request, obj, form, change):
|
||||
"""Ensure only ONE model per type is marked as system default."""
|
||||
type_field_map = {
|
||||
"embedding": "is_system_embedding_model",
|
||||
"multimodal_embed": "is_system_embedding_model",
|
||||
"chat": "is_system_chat_model",
|
||||
"reranker": "is_system_reranker_model",
|
||||
}
|
||||
for mtype, field in type_field_map.items():
|
||||
if getattr(obj, field, False) and obj.model_type == mtype:
|
||||
LLMModel.objects.filter(**{field: True}).exclude(pk=obj.pk).update(**{field: False})
|
||||
self.message_user(
|
||||
request,
|
||||
f"{obj.name} is now the system-wide {mtype} model.",
|
||||
messages.SUCCESS,
|
||||
)
|
||||
elif getattr(obj, field, False) and obj.model_type != mtype:
|
||||
setattr(obj, field, False)
|
||||
super().save_model(request, obj, form, change)
|
||||
|
||||
|
||||
@admin.register(LLMUsage)
|
||||
class LLMUsageAdmin(admin.ModelAdmin):
|
||||
list_display = (
|
||||
"timestamp",
|
||||
"user",
|
||||
"model",
|
||||
"input_tokens",
|
||||
"output_tokens",
|
||||
"cached_tokens",
|
||||
"total_cost",
|
||||
"session_id",
|
||||
"purpose",
|
||||
)
|
||||
list_filter = ("model", "purpose", "timestamp")
|
||||
search_fields = ("user__username", "session_id", "model__name")
|
||||
readonly_fields = (
|
||||
"user",
|
||||
"model",
|
||||
"timestamp",
|
||||
"input_tokens",
|
||||
"output_tokens",
|
||||
"cached_tokens",
|
||||
"total_cost",
|
||||
"session_id",
|
||||
"purpose",
|
||||
"request_metadata",
|
||||
)
|
||||
date_hierarchy = "timestamp"
|
||||
|
||||
def has_add_permission(self, request):
|
||||
return False
|
||||
|
||||
def has_change_permission(self, request, obj=None):
|
||||
return False
|
||||
0
mnemosyne/llm_manager/api/__init__.py
Normal file
0
mnemosyne/llm_manager/api/__init__.py
Normal file
105
mnemosyne/llm_manager/api/serializers.py
Normal file
105
mnemosyne/llm_manager/api/serializers.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
DRF serializers for LLM Manager.
|
||||
"""
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from ..models import LLMApi, LLMModel, LLMUsage
|
||||
|
||||
|
||||
class LLMApiSerializer(serializers.ModelSerializer):
|
||||
model_count = serializers.SerializerMethodField()
|
||||
|
||||
class Meta:
|
||||
model = LLMApi
|
||||
fields = [
|
||||
"id",
|
||||
"name",
|
||||
"api_type",
|
||||
"base_url",
|
||||
"is_active",
|
||||
"supports_streaming",
|
||||
"timeout_seconds",
|
||||
"max_retries",
|
||||
"last_tested_at",
|
||||
"last_test_status",
|
||||
"model_count",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
read_only_fields = [
|
||||
"id",
|
||||
"last_tested_at",
|
||||
"last_test_status",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
|
||||
def get_model_count(self, obj):
|
||||
return obj.models.filter(is_active=True).count()
|
||||
|
||||
|
||||
class LLMModelSerializer(serializers.ModelSerializer):
|
||||
api_name = serializers.CharField(source="api.name", read_only=True)
|
||||
|
||||
class Meta:
|
||||
model = LLMModel
|
||||
fields = [
|
||||
"id",
|
||||
"api",
|
||||
"api_name",
|
||||
"name",
|
||||
"display_name",
|
||||
"model_type",
|
||||
"context_window",
|
||||
"max_output_tokens",
|
||||
"vector_dimensions",
|
||||
"supports_cache",
|
||||
"supports_vision",
|
||||
"supports_multimodal",
|
||||
"supports_function_calling",
|
||||
"supports_json_mode",
|
||||
"input_cost_per_1k",
|
||||
"output_cost_per_1k",
|
||||
"cached_cost_per_1k",
|
||||
"is_active",
|
||||
"is_system_embedding_model",
|
||||
"is_system_chat_model",
|
||||
"is_system_reranker_model",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
read_only_fields = [
|
||||
"id",
|
||||
"is_system_embedding_model",
|
||||
"is_system_chat_model",
|
||||
"is_system_reranker_model",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
|
||||
|
||||
class LLMUsageSerializer(serializers.ModelSerializer):
|
||||
model_name = serializers.CharField(source="model.name", read_only=True)
|
||||
api_name = serializers.CharField(source="model.api.name", read_only=True)
|
||||
username = serializers.CharField(source="user.username", read_only=True)
|
||||
|
||||
class Meta:
|
||||
model = LLMUsage
|
||||
fields = [
|
||||
"id",
|
||||
"user",
|
||||
"username",
|
||||
"model",
|
||||
"model_name",
|
||||
"api_name",
|
||||
"timestamp",
|
||||
"input_tokens",
|
||||
"output_tokens",
|
||||
"cached_tokens",
|
||||
"total_cost",
|
||||
"session_id",
|
||||
"purpose",
|
||||
"request_metadata",
|
||||
]
|
||||
read_only_fields = ["id", "timestamp", "total_cost"]
|
||||
18
mnemosyne/llm_manager/api/urls.py
Normal file
18
mnemosyne/llm_manager/api/urls.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
DRF API URL patterns for LLM Manager.
|
||||
"""
|
||||
|
||||
from django.urls import path
|
||||
|
||||
from . import views
|
||||
|
||||
app_name = "llm-manager-api"
|
||||
|
||||
urlpatterns = [
|
||||
path("apis/", views.api_list, name="api_list"),
|
||||
path("apis/<uuid:pk>/", views.api_detail, name="api_detail"),
|
||||
path("models/", views.model_list, name="model_list"),
|
||||
path("models/<uuid:pk>/", views.model_detail, name="model_detail"),
|
||||
path("models/system/", views.system_models, name="system_models"),
|
||||
path("usage/", views.usage_list, name="usage_list"),
|
||||
]
|
||||
100
mnemosyne/llm_manager/api/views.py
Normal file
100
mnemosyne/llm_manager/api/views.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
DRF API views for LLM Manager — FBVs per Red Panda Standards.
|
||||
"""
|
||||
|
||||
from rest_framework import status
|
||||
from rest_framework.decorators import api_view, permission_classes
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
from rest_framework.response import Response
|
||||
|
||||
from ..models import LLMApi, LLMModel, LLMUsage
|
||||
from .serializers import LLMApiSerializer, LLMModelSerializer, LLMUsageSerializer
|
||||
|
||||
|
||||
@api_view(["GET"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def api_list(request):
|
||||
"""List all LLM APIs."""
|
||||
apis = LLMApi.objects.all().order_by("name")
|
||||
serializer = LLMApiSerializer(apis, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
|
||||
@api_view(["GET"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def api_detail(request, pk):
|
||||
"""Get a specific LLM API."""
|
||||
try:
|
||||
api = LLMApi.objects.get(pk=pk)
|
||||
except LLMApi.DoesNotExist:
|
||||
return Response({"error": "Not found"}, status=status.HTTP_404_NOT_FOUND)
|
||||
serializer = LLMApiSerializer(api)
|
||||
return Response(serializer.data)
|
||||
|
||||
|
||||
@api_view(["GET"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def model_list(request):
|
||||
"""List all LLM Models, optionally filtered by API or type."""
|
||||
qs = LLMModel.objects.select_related("api").order_by("api__name", "name")
|
||||
api_id = request.query_params.get("api")
|
||||
model_type = request.query_params.get("type")
|
||||
active_only = request.query_params.get("active", "").lower() in ("1", "true")
|
||||
if api_id:
|
||||
qs = qs.filter(api_id=api_id)
|
||||
if model_type:
|
||||
qs = qs.filter(model_type=model_type)
|
||||
if active_only:
|
||||
qs = qs.filter(is_active=True)
|
||||
serializer = LLMModelSerializer(qs, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
|
||||
@api_view(["GET"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def model_detail(request, pk):
|
||||
"""Get a specific LLM Model."""
|
||||
try:
|
||||
model = LLMModel.objects.select_related("api").get(pk=pk)
|
||||
except LLMModel.DoesNotExist:
|
||||
return Response({"error": "Not found"}, status=status.HTTP_404_NOT_FOUND)
|
||||
serializer = LLMModelSerializer(model)
|
||||
return Response(serializer.data)
|
||||
|
||||
|
||||
@api_view(["GET"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def system_models(request):
|
||||
"""Get the current system default models."""
|
||||
data = {}
|
||||
embed = LLMModel.get_system_embedding_model()
|
||||
chat = LLMModel.get_system_chat_model()
|
||||
reranker = LLMModel.get_system_reranker_model()
|
||||
if embed:
|
||||
data["embedding"] = LLMModelSerializer(embed).data
|
||||
if chat:
|
||||
data["chat"] = LLMModelSerializer(chat).data
|
||||
if reranker:
|
||||
data["reranker"] = LLMModelSerializer(reranker).data
|
||||
return Response(data)
|
||||
|
||||
|
||||
@api_view(["GET", "POST"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def usage_list(request):
|
||||
"""List usage records for current user, or create a new usage record."""
|
||||
if request.method == "GET":
|
||||
qs = (
|
||||
LLMUsage.objects.filter(user=request.user)
|
||||
.select_related("model", "model__api")
|
||||
.order_by("-timestamp")[:100]
|
||||
)
|
||||
serializer = LLMUsageSerializer(qs, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
# POST — create a usage record
|
||||
serializer = LLMUsageSerializer(data=request.data)
|
||||
if serializer.is_valid():
|
||||
serializer.save(user=request.user)
|
||||
return Response(serializer.data, status=status.HTTP_201_CREATED)
|
||||
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
|
||||
7
mnemosyne/llm_manager/apps.py
Normal file
7
mnemosyne/llm_manager/apps.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class LLMManagerConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "llm_manager"
|
||||
verbose_name = "LLM Manager"
|
||||
65
mnemosyne/llm_manager/encryption.py
Normal file
65
mnemosyne/llm_manager/encryption.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""
|
||||
Fernet encryption field for LLM API keys.
|
||||
|
||||
Uses LLM_API_SECRETS_ENCRYPTION_KEY from settings if available,
|
||||
otherwise derives a key from Django's SECRET_KEY (Themis pattern).
|
||||
"""
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
from django.conf import settings
|
||||
from django.db import models
|
||||
|
||||
|
||||
def _get_fernet():
|
||||
"""
|
||||
Get a Fernet cipher using the configured encryption key.
|
||||
|
||||
Checks for LLM_API_SECRETS_ENCRYPTION_KEY first, then falls
|
||||
back to deriving a key from SECRET_KEY (Themis pattern).
|
||||
"""
|
||||
key = getattr(settings, "LLM_API_SECRETS_ENCRYPTION_KEY", None)
|
||||
if key:
|
||||
return Fernet(key.encode() if isinstance(key, str) else key)
|
||||
# Fallback: derive from SECRET_KEY like Themis
|
||||
secret = settings.SECRET_KEY.encode("utf-8")
|
||||
digest = hashlib.sha256(secret).digest()
|
||||
derived_key = base64.urlsafe_b64encode(digest)
|
||||
return Fernet(derived_key)
|
||||
|
||||
|
||||
class EncryptedCharField(models.CharField):
|
||||
"""
|
||||
CharField that transparently encrypts/decrypts values using Fernet.
|
||||
|
||||
Values are encrypted before saving to the database and decrypted
|
||||
when read. Supports blank/null values gracefully.
|
||||
"""
|
||||
|
||||
description = "Encrypted CharField for storing API secrets"
|
||||
|
||||
def get_prep_value(self, value):
|
||||
"""Encrypt before saving to DB."""
|
||||
if value is None or value == "":
|
||||
return value
|
||||
cipher = _get_fernet()
|
||||
encrypted = cipher.encrypt(value.encode("utf-8"))
|
||||
return base64.b64encode(encrypted).decode("utf-8")
|
||||
|
||||
def from_db_value(self, value, expression, connection):
|
||||
"""Decrypt when loading from DB."""
|
||||
if value is None or value == "":
|
||||
return value
|
||||
try:
|
||||
cipher = _get_fernet()
|
||||
encrypted = base64.b64decode(value)
|
||||
return cipher.decrypt(encrypted).decode("utf-8")
|
||||
except (InvalidToken, Exception):
|
||||
return value
|
||||
|
||||
def to_python(self, value):
|
||||
if isinstance(value, str) or value is None:
|
||||
return value
|
||||
return str(value)
|
||||
82
mnemosyne/llm_manager/forms.py
Normal file
82
mnemosyne/llm_manager/forms.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
Forms for LLM Manager — DaisyUI-styled widgets.
|
||||
"""
|
||||
|
||||
from django import forms
|
||||
|
||||
from .models import LLMApi, LLMModel
|
||||
|
||||
|
||||
class LLMApiForm(forms.ModelForm):
|
||||
class Meta:
|
||||
model = LLMApi
|
||||
fields = [
|
||||
"name",
|
||||
"api_type",
|
||||
"base_url",
|
||||
"api_key",
|
||||
"is_active",
|
||||
"supports_streaming",
|
||||
"timeout_seconds",
|
||||
"max_retries",
|
||||
]
|
||||
widgets = {
|
||||
"name": forms.TextInput(attrs={"class": "input input-bordered w-full"}),
|
||||
"api_type": forms.Select(attrs={"class": "select select-bordered w-full"}),
|
||||
"base_url": forms.URLInput(attrs={"class": "input input-bordered w-full"}),
|
||||
"api_key": forms.PasswordInput(
|
||||
attrs={"class": "input input-bordered w-full", "autocomplete": "off"},
|
||||
render_value=True,
|
||||
),
|
||||
"is_active": forms.CheckboxInput(attrs={"class": "toggle toggle-primary"}),
|
||||
"supports_streaming": forms.CheckboxInput(attrs={"class": "toggle toggle-primary"}),
|
||||
"timeout_seconds": forms.NumberInput(attrs={"class": "input input-bordered w-full"}),
|
||||
"max_retries": forms.NumberInput(attrs={"class": "input input-bordered w-full"}),
|
||||
}
|
||||
|
||||
|
||||
class LLMModelForm(forms.ModelForm):
|
||||
class Meta:
|
||||
model = LLMModel
|
||||
fields = [
|
||||
"api",
|
||||
"name",
|
||||
"display_name",
|
||||
"model_type",
|
||||
"context_window",
|
||||
"max_output_tokens",
|
||||
"vector_dimensions",
|
||||
"supports_cache",
|
||||
"supports_vision",
|
||||
"supports_multimodal",
|
||||
"supports_function_calling",
|
||||
"supports_json_mode",
|
||||
"input_cost_per_1k",
|
||||
"output_cost_per_1k",
|
||||
"cached_cost_per_1k",
|
||||
"is_active",
|
||||
]
|
||||
widgets = {
|
||||
"api": forms.Select(attrs={"class": "select select-bordered w-full"}),
|
||||
"name": forms.TextInput(attrs={"class": "input input-bordered w-full"}),
|
||||
"display_name": forms.TextInput(attrs={"class": "input input-bordered w-full"}),
|
||||
"model_type": forms.Select(attrs={"class": "select select-bordered w-full"}),
|
||||
"context_window": forms.NumberInput(attrs={"class": "input input-bordered w-full"}),
|
||||
"max_output_tokens": forms.NumberInput(attrs={"class": "input input-bordered w-full"}),
|
||||
"vector_dimensions": forms.NumberInput(attrs={"class": "input input-bordered w-full"}),
|
||||
"supports_cache": forms.CheckboxInput(attrs={"class": "toggle toggle-primary"}),
|
||||
"supports_vision": forms.CheckboxInput(attrs={"class": "toggle toggle-primary"}),
|
||||
"supports_multimodal": forms.CheckboxInput(attrs={"class": "toggle toggle-primary"}),
|
||||
"supports_function_calling": forms.CheckboxInput(attrs={"class": "toggle toggle-primary"}),
|
||||
"supports_json_mode": forms.CheckboxInput(attrs={"class": "toggle toggle-primary"}),
|
||||
"input_cost_per_1k": forms.NumberInput(
|
||||
attrs={"class": "input input-bordered w-full", "step": "0.000001"}
|
||||
),
|
||||
"output_cost_per_1k": forms.NumberInput(
|
||||
attrs={"class": "input input-bordered w-full", "step": "0.000001"}
|
||||
),
|
||||
"cached_cost_per_1k": forms.NumberInput(
|
||||
attrs={"class": "input input-bordered w-full", "step": "0.000001"}
|
||||
),
|
||||
"is_active": forms.CheckboxInput(attrs={"class": "toggle toggle-primary"}),
|
||||
}
|
||||
0
mnemosyne/llm_manager/management/__init__.py
Normal file
0
mnemosyne/llm_manager/management/__init__.py
Normal file
@@ -0,0 +1,138 @@
|
||||
"""
|
||||
Management command to load default LLM models for common providers.
|
||||
|
||||
Usage:
|
||||
python manage.py load_default_llm_models
|
||||
python manage.py load_default_llm_models --force # update existing models
|
||||
"""
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from llm_manager.models import LLMApi, LLMModel
|
||||
|
||||
|
||||
DEFAULT_APIS = [
|
||||
{
|
||||
"name": "OpenAI",
|
||||
"api_type": "openai",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
},
|
||||
]
|
||||
|
||||
DEFAULT_MODELS = [
|
||||
# ── Chat models ────────────────────────────────────────────────────
|
||||
{
|
||||
"api_name": "OpenAI",
|
||||
"name": "gpt-4o",
|
||||
"display_name": "GPT-4o",
|
||||
"model_type": "chat",
|
||||
"context_window": 128000,
|
||||
"max_output_tokens": 16384,
|
||||
"input_cost_per_1k": Decimal("0.0025"),
|
||||
"output_cost_per_1k": Decimal("0.01"),
|
||||
"supports_cache": True,
|
||||
"cached_cost_per_1k": Decimal("0.00125"),
|
||||
"supports_vision": True,
|
||||
"supports_function_calling": True,
|
||||
"supports_json_mode": True,
|
||||
},
|
||||
{
|
||||
"api_name": "OpenAI",
|
||||
"name": "gpt-4o-mini",
|
||||
"display_name": "GPT-4o Mini",
|
||||
"model_type": "chat",
|
||||
"context_window": 128000,
|
||||
"max_output_tokens": 16384,
|
||||
"input_cost_per_1k": Decimal("0.00015"),
|
||||
"output_cost_per_1k": Decimal("0.0006"),
|
||||
"supports_cache": True,
|
||||
"cached_cost_per_1k": Decimal("0.000075"),
|
||||
"supports_vision": True,
|
||||
"supports_function_calling": True,
|
||||
"supports_json_mode": True,
|
||||
},
|
||||
# ── Embedding models ───────────────────────────────────────────────
|
||||
{
|
||||
"api_name": "OpenAI",
|
||||
"name": "text-embedding-3-large",
|
||||
"display_name": "Text Embedding 3 Large",
|
||||
"model_type": "embedding",
|
||||
"context_window": 8191,
|
||||
"vector_dimensions": 3072,
|
||||
"input_cost_per_1k": Decimal("0.00013"),
|
||||
"output_cost_per_1k": Decimal("0"),
|
||||
},
|
||||
{
|
||||
"api_name": "OpenAI",
|
||||
"name": "text-embedding-3-small",
|
||||
"display_name": "Text Embedding 3 Small",
|
||||
"model_type": "embedding",
|
||||
"context_window": 8191,
|
||||
"vector_dimensions": 1536,
|
||||
"input_cost_per_1k": Decimal("0.00002"),
|
||||
"output_cost_per_1k": Decimal("0"),
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Load default LLM APIs and models."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Update existing model records with defaults.",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
force = options["force"]
|
||||
|
||||
# Create default APIs
|
||||
api_map = {}
|
||||
for api_data in DEFAULT_APIS:
|
||||
api, created = LLMApi.objects.get_or_create(
|
||||
name=api_data["name"],
|
||||
defaults={
|
||||
"api_type": api_data["api_type"],
|
||||
"base_url": api_data["base_url"],
|
||||
"is_active": True,
|
||||
},
|
||||
)
|
||||
api_map[api_data["name"]] = api
|
||||
if created:
|
||||
self.stdout.write(self.style.SUCCESS(f"Created API: {api.name}"))
|
||||
else:
|
||||
self.stdout.write(self.style.WARNING(f"API already exists: {api.name}"))
|
||||
|
||||
# Create default models
|
||||
for model_data in DEFAULT_MODELS:
|
||||
api_name = model_data.pop("api_name")
|
||||
api = api_map.get(api_name)
|
||||
if not api:
|
||||
self.stdout.write(self.style.ERROR(f"API '{api_name}' not found, skipping model."))
|
||||
model_data["api_name"] = api_name # restore
|
||||
continue
|
||||
|
||||
defaults = {k: v for k, v in model_data.items() if k != "name"}
|
||||
model, created = LLMModel.objects.get_or_create(
|
||||
api=api,
|
||||
name=model_data["name"],
|
||||
defaults=defaults,
|
||||
)
|
||||
|
||||
if created:
|
||||
self.stdout.write(self.style.SUCCESS(f" Created model: {model.name}"))
|
||||
elif force:
|
||||
for key, val in defaults.items():
|
||||
setattr(model, key, val)
|
||||
model.save()
|
||||
self.stdout.write(self.style.SUCCESS(f" Updated model: {model.name}"))
|
||||
else:
|
||||
self.stdout.write(self.style.WARNING(f" Model exists: {model.name}"))
|
||||
|
||||
model_data["api_name"] = api_name # restore
|
||||
|
||||
self.stdout.write(self.style.SUCCESS("Default LLM models loaded."))
|
||||
130
mnemosyne/llm_manager/migrations/0001_initial.py
Normal file
130
mnemosyne/llm_manager/migrations/0001_initial.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# Generated by Django 5.2.12 on 2026-03-10 16:59
|
||||
|
||||
import django.db.models.deletion
|
||||
import llm_manager.encryption
|
||||
import uuid
|
||||
from decimal import Decimal
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='LLMApi',
|
||||
fields=[
|
||||
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||
('name', models.CharField(max_length=100, unique=True)),
|
||||
('api_type', models.CharField(choices=[('openai', 'OpenAI Compatible'), ('azure', 'Azure OpenAI'), ('ollama', 'Ollama'), ('anthropic', 'Anthropic'), ('llama-cpp', 'Llama.cpp'), ('vllm', 'vLLM')], max_length=20)),
|
||||
('base_url', models.URLField()),
|
||||
('api_key', llm_manager.encryption.EncryptedCharField(blank=True, default='', max_length=500)),
|
||||
('is_active', models.BooleanField(default=True)),
|
||||
('supports_streaming', models.BooleanField(default=True)),
|
||||
('timeout_seconds', models.PositiveIntegerField(default=60)),
|
||||
('max_retries', models.PositiveIntegerField(default=3)),
|
||||
('last_tested_at', models.DateTimeField(blank=True, help_text='Last time this API was tested', null=True)),
|
||||
('last_test_status', models.CharField(choices=[('success', 'Success'), ('failed', 'Failed'), ('pending', 'Pending')], default='pending', help_text='Result of the last API test', max_length=20)),
|
||||
('last_test_message', models.TextField(blank=True, help_text='Details from the last test (success message or error)')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('updated_at', models.DateTimeField(auto_now=True)),
|
||||
('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='llm_apis_created', to=settings.AUTH_USER_MODEL)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'LLM API',
|
||||
'verbose_name_plural': 'LLM APIs',
|
||||
'ordering': ['name'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='LLMModel',
|
||||
fields=[
|
||||
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||
('name', models.CharField(max_length=100)),
|
||||
('display_name', models.CharField(blank=True, max_length=200)),
|
||||
('model_type', models.CharField(choices=[('chat', 'Chat/Completion'), ('embedding', 'Embedding'), ('vision', 'Vision'), ('audio', 'Audio'), ('reranker', 'Reranker'), ('multimodal_embed', 'Multimodal Embedding')], max_length=20)),
|
||||
('context_window', models.PositiveIntegerField(help_text='Maximum context in tokens')),
|
||||
('max_output_tokens', models.PositiveIntegerField(blank=True, null=True)),
|
||||
('supports_cache', models.BooleanField(default=False)),
|
||||
('supports_vision', models.BooleanField(default=False)),
|
||||
('supports_function_calling', models.BooleanField(default=False)),
|
||||
('supports_json_mode', models.BooleanField(default=False)),
|
||||
('supports_multimodal', models.BooleanField(default=False, help_text='Flag models that accept image+text input')),
|
||||
('vector_dimensions', models.PositiveIntegerField(blank=True, help_text='Embedding output dimensions (e.g., 4096)', null=True)),
|
||||
('input_cost_per_1k', models.DecimalField(decimal_places=6, default=Decimal('0'), help_text='Cost per 1K input tokens in USD', max_digits=10)),
|
||||
('output_cost_per_1k', models.DecimalField(decimal_places=6, default=Decimal('0'), help_text='Cost per 1K output tokens in USD', max_digits=10)),
|
||||
('cached_cost_per_1k', models.DecimalField(blank=True, decimal_places=6, help_text='Cost per 1K cached tokens (if supported)', max_digits=10, null=True)),
|
||||
('is_active', models.BooleanField(default=True)),
|
||||
('is_system_embedding_model', models.BooleanField(default=False, help_text='Mark this as the system-wide embedding model. Only ONE embedding model should have this set to True.')),
|
||||
('is_system_chat_model', models.BooleanField(default=False, help_text='Mark this as the system-wide chat model. Only ONE chat model should have this set to True.')),
|
||||
('is_system_reranker_model', models.BooleanField(default=False, help_text='Mark this as the system-wide reranker model. Only ONE reranker model should have this set to True.')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('updated_at', models.DateTimeField(auto_now=True)),
|
||||
('api', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='models', to='llm_manager.llmapi')),
|
||||
],
|
||||
options={
|
||||
'ordering': ['api', 'name'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='LLMUsage',
|
||||
fields=[
|
||||
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||
('timestamp', models.DateTimeField(auto_now_add=True, db_index=True)),
|
||||
('input_tokens', models.PositiveIntegerField(default=0)),
|
||||
('output_tokens', models.PositiveIntegerField(default=0)),
|
||||
('cached_tokens', models.PositiveIntegerField(default=0)),
|
||||
('total_cost', models.DecimalField(decimal_places=6, default=Decimal('0'), help_text='Total cost in USD', max_digits=12)),
|
||||
('session_id', models.CharField(blank=True, db_index=True, max_length=100)),
|
||||
('purpose', models.CharField(choices=[('responder', 'RAG Responder'), ('reviewer', 'RAG Reviewer'), ('embeddings', 'Document Embeddings'), ('search', 'Vector Search'), ('reranking', 'Re-ranking'), ('multimodal_embed', 'Multimodal Embedding'), ('other', 'Other')], db_index=True, default='other', max_length=50)),
|
||||
('request_metadata', models.JSONField(blank=True, help_text='Additional context (prompt, temperature, etc.)', null=True)),
|
||||
('model', models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, related_name='usage_records', to='llm_manager.llmmodel')),
|
||||
('user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='llm_usage', to=settings.AUTH_USER_MODEL)),
|
||||
],
|
||||
options={
|
||||
'ordering': ['-timestamp'],
|
||||
},
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='llmmodel',
|
||||
index=models.Index(fields=['api', 'model_type', 'is_active'], name='llm_manager_api_id_140af0_idx'),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='llmmodel',
|
||||
index=models.Index(fields=['is_system_embedding_model', 'model_type'], name='llm_manager_is_syst_39386f_idx'),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='llmmodel',
|
||||
index=models.Index(fields=['is_system_chat_model', 'model_type'], name='llm_manager_is_syst_346eb3_idx'),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='llmmodel',
|
||||
index=models.Index(fields=['is_system_reranker_model', 'model_type'], name='llm_manager_is_syst_cc73c6_idx'),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='llmmodel',
|
||||
unique_together={('api', 'name')},
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='llmusage',
|
||||
index=models.Index(fields=['-timestamp', 'user'], name='llm_manager_timesta_aa66fc_idx'),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='llmusage',
|
||||
index=models.Index(fields=['-timestamp', 'model'], name='llm_manager_timesta_0b5c38_idx'),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='llmusage',
|
||||
index=models.Index(fields=['purpose', '-timestamp'], name='llm_manager_purpose_37c32c_idx'),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='llmusage',
|
||||
index=models.Index(fields=['session_id'], name='llm_manager_session_1da37d_idx'),
|
||||
),
|
||||
]
|
||||
@@ -0,0 +1,34 @@
|
||||
"""
|
||||
Add 'bedrock' to LLMApi.api_type choices.
|
||||
|
||||
Django migrations track field changes including choices — this migration
|
||||
updates the api_type field to include the new Amazon Bedrock option.
|
||||
"""
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("llm_manager", "0001_initial"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="llmapi",
|
||||
name="api_type",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("openai", "OpenAI Compatible"),
|
||||
("azure", "Azure OpenAI"),
|
||||
("ollama", "Ollama"),
|
||||
("anthropic", "Anthropic"),
|
||||
("llama-cpp", "Llama.cpp"),
|
||||
("vllm", "vLLM"),
|
||||
("bedrock", "Amazon Bedrock"),
|
||||
],
|
||||
max_length=20,
|
||||
),
|
||||
),
|
||||
]
|
||||
0
mnemosyne/llm_manager/migrations/__init__.py
Normal file
0
mnemosyne/llm_manager/migrations/__init__.py
Normal file
301
mnemosyne/llm_manager/models.py
Normal file
301
mnemosyne/llm_manager/models.py
Normal file
@@ -0,0 +1,301 @@
|
||||
"""
|
||||
LLM Manager models — ported from Spelunker with Mnemosyne adaptations.
|
||||
|
||||
Changes from Spelunker:
|
||||
- api_key uses EncryptedCharField with key derived from SECRET_KEY (Themis-style)
|
||||
- LLMModel.model_type adds 'reranker' and 'multimodal_embed' choices
|
||||
- LLMModel adds 'supports_multimodal' and 'vector_dimensions' fields
|
||||
- LLMUsage.purpose adds Mnemosyne-specific choices
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from decimal import Decimal
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.db import models
|
||||
|
||||
from .encryption import EncryptedCharField
|
||||
|
||||
User = get_user_model()
|
||||
|
||||
|
||||
class LLMApi(models.Model):
|
||||
"""
|
||||
Represents an LLM API provider (OpenAI-compatible, Arke proxy, etc.).
|
||||
|
||||
API keys are stored encrypted using Fernet symmetric encryption
|
||||
derived from Django's SECRET_KEY.
|
||||
"""
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
name = models.CharField(max_length=100, unique=True)
|
||||
api_type = models.CharField(
|
||||
max_length=20,
|
||||
choices=[
|
||||
("openai", "OpenAI Compatible"),
|
||||
("azure", "Azure OpenAI"),
|
||||
("ollama", "Ollama"),
|
||||
("anthropic", "Anthropic"),
|
||||
("llama-cpp", "Llama.cpp"),
|
||||
("vllm", "vLLM"),
|
||||
("bedrock", "Amazon Bedrock"),
|
||||
],
|
||||
)
|
||||
base_url = models.URLField()
|
||||
api_key = EncryptedCharField(max_length=500, blank=True, default="")
|
||||
is_active = models.BooleanField(default=True)
|
||||
supports_streaming = models.BooleanField(default=True)
|
||||
timeout_seconds = models.PositiveIntegerField(default=60)
|
||||
max_retries = models.PositiveIntegerField(default=3)
|
||||
|
||||
# Testing and validation fields
|
||||
last_tested_at = models.DateTimeField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Last time this API was tested",
|
||||
)
|
||||
last_test_status = models.CharField(
|
||||
max_length=20,
|
||||
choices=[
|
||||
("success", "Success"),
|
||||
("failed", "Failed"),
|
||||
("pending", "Pending"),
|
||||
],
|
||||
default="pending",
|
||||
help_text="Result of the last API test",
|
||||
)
|
||||
last_test_message = models.TextField(
|
||||
blank=True,
|
||||
help_text="Details from the last test (success message or error)",
|
||||
)
|
||||
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
created_by = models.ForeignKey(
|
||||
User,
|
||||
null=True,
|
||||
blank=True,
|
||||
on_delete=models.SET_NULL,
|
||||
related_name="llm_apis_created",
|
||||
)
|
||||
|
||||
class Meta:
|
||||
ordering = ["name"]
|
||||
verbose_name = "LLM API"
|
||||
verbose_name_plural = "LLM APIs"
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name} ({self.api_type})"
|
||||
|
||||
|
||||
class LLMModel(models.Model):
|
||||
"""
|
||||
Represents a specific LLM model provided by an API.
|
||||
|
||||
Mnemosyne additions over Spelunker:
|
||||
- model_type adds 'reranker' and 'multimodal_embed'
|
||||
- supports_multimodal flag for image+text capable models
|
||||
- vector_dimensions for embedding output size
|
||||
"""
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
api = models.ForeignKey(LLMApi, on_delete=models.CASCADE, related_name="models")
|
||||
name = models.CharField(max_length=100)
|
||||
display_name = models.CharField(max_length=200, blank=True)
|
||||
|
||||
model_type = models.CharField(
|
||||
max_length=20,
|
||||
choices=[
|
||||
("chat", "Chat/Completion"),
|
||||
("embedding", "Embedding"),
|
||||
("vision", "Vision"),
|
||||
("audio", "Audio"),
|
||||
("reranker", "Reranker"),
|
||||
("multimodal_embed", "Multimodal Embedding"),
|
||||
],
|
||||
)
|
||||
|
||||
context_window = models.PositiveIntegerField(
|
||||
help_text="Maximum context in tokens"
|
||||
)
|
||||
max_output_tokens = models.PositiveIntegerField(null=True, blank=True)
|
||||
supports_cache = models.BooleanField(default=False)
|
||||
supports_vision = models.BooleanField(default=False)
|
||||
supports_function_calling = models.BooleanField(default=False)
|
||||
supports_json_mode = models.BooleanField(default=False)
|
||||
|
||||
# Mnemosyne additions
|
||||
supports_multimodal = models.BooleanField(
|
||||
default=False,
|
||||
help_text="Flag models that accept image+text input",
|
||||
)
|
||||
vector_dimensions = models.PositiveIntegerField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Embedding output dimensions (e.g., 4096)",
|
||||
)
|
||||
|
||||
# Pricing
|
||||
input_cost_per_1k = models.DecimalField(
|
||||
max_digits=10,
|
||||
decimal_places=6,
|
||||
default=Decimal("0"),
|
||||
help_text="Cost per 1K input tokens in USD",
|
||||
)
|
||||
output_cost_per_1k = models.DecimalField(
|
||||
max_digits=10,
|
||||
decimal_places=6,
|
||||
default=Decimal("0"),
|
||||
help_text="Cost per 1K output tokens in USD",
|
||||
)
|
||||
cached_cost_per_1k = models.DecimalField(
|
||||
max_digits=10,
|
||||
decimal_places=6,
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Cost per 1K cached tokens (if supported)",
|
||||
)
|
||||
|
||||
is_active = models.BooleanField(default=True)
|
||||
is_system_embedding_model = models.BooleanField(
|
||||
default=False,
|
||||
help_text=(
|
||||
"Mark this as the system-wide embedding model. "
|
||||
"Only ONE embedding model should have this set to True."
|
||||
),
|
||||
)
|
||||
is_system_chat_model = models.BooleanField(
|
||||
default=False,
|
||||
help_text=(
|
||||
"Mark this as the system-wide chat model. "
|
||||
"Only ONE chat model should have this set to True."
|
||||
),
|
||||
)
|
||||
is_system_reranker_model = models.BooleanField(
|
||||
default=False,
|
||||
help_text=(
|
||||
"Mark this as the system-wide reranker model. "
|
||||
"Only ONE reranker model should have this set to True."
|
||||
),
|
||||
)
|
||||
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ["api", "name"]
|
||||
unique_together = [("api", "name")]
|
||||
indexes = [
|
||||
models.Index(fields=["api", "model_type", "is_active"]),
|
||||
models.Index(fields=["is_system_embedding_model", "model_type"]),
|
||||
models.Index(fields=["is_system_chat_model", "model_type"]),
|
||||
models.Index(fields=["is_system_reranker_model", "model_type"]),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.api.name}: {self.name}"
|
||||
|
||||
@classmethod
|
||||
def get_system_embedding_model(cls):
|
||||
"""Get the system-wide embedding model."""
|
||||
return cls.objects.filter(
|
||||
is_system_embedding_model=True,
|
||||
is_active=True,
|
||||
model_type__in=["embedding", "multimodal_embed"],
|
||||
).first()
|
||||
|
||||
@classmethod
|
||||
def get_system_chat_model(cls):
|
||||
"""Get the system-wide chat model."""
|
||||
return cls.objects.filter(
|
||||
is_system_chat_model=True,
|
||||
is_active=True,
|
||||
model_type="chat",
|
||||
).first()
|
||||
|
||||
@classmethod
|
||||
def get_system_reranker_model(cls):
|
||||
"""Get the system-wide reranker model."""
|
||||
return cls.objects.filter(
|
||||
is_system_reranker_model=True,
|
||||
is_active=True,
|
||||
model_type="reranker",
|
||||
).first()
|
||||
|
||||
|
||||
class LLMUsage(models.Model):
|
||||
"""
|
||||
Tracks token usage and cost for all LLM API calls.
|
||||
"""
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
user = models.ForeignKey(
|
||||
User, on_delete=models.SET_NULL, null=True, related_name="llm_usage"
|
||||
)
|
||||
model = models.ForeignKey(
|
||||
LLMModel, on_delete=models.PROTECT, related_name="usage_records"
|
||||
)
|
||||
timestamp = models.DateTimeField(auto_now_add=True, db_index=True)
|
||||
|
||||
input_tokens = models.PositiveIntegerField(default=0)
|
||||
output_tokens = models.PositiveIntegerField(default=0)
|
||||
cached_tokens = models.PositiveIntegerField(default=0)
|
||||
|
||||
total_cost = models.DecimalField(
|
||||
max_digits=12,
|
||||
decimal_places=6,
|
||||
default=Decimal("0"),
|
||||
help_text="Total cost in USD",
|
||||
)
|
||||
|
||||
session_id = models.CharField(max_length=100, blank=True, db_index=True)
|
||||
purpose = models.CharField(
|
||||
max_length=50,
|
||||
choices=[
|
||||
("responder", "RAG Responder"),
|
||||
("reviewer", "RAG Reviewer"),
|
||||
("embeddings", "Document Embeddings"),
|
||||
("search", "Vector Search"),
|
||||
("reranking", "Re-ranking"),
|
||||
("multimodal_embed", "Multimodal Embedding"),
|
||||
("other", "Other"),
|
||||
],
|
||||
default="other",
|
||||
db_index=True,
|
||||
)
|
||||
request_metadata = models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="Additional context (prompt, temperature, etc.)",
|
||||
)
|
||||
|
||||
class Meta:
|
||||
ordering = ["-timestamp"]
|
||||
indexes = [
|
||||
models.Index(fields=["-timestamp", "user"]),
|
||||
models.Index(fields=["-timestamp", "model"]),
|
||||
models.Index(fields=["purpose", "-timestamp"]),
|
||||
models.Index(fields=["session_id"]),
|
||||
]
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
if not self.total_cost or self.total_cost == 0:
|
||||
self.total_cost = self.calculate_cost()
|
||||
super().save(*args, **kwargs)
|
||||
|
||||
def calculate_cost(self):
|
||||
"""Calculate cost based on token usage and model pricing."""
|
||||
input_cost = (self.input_tokens / 1000) * float(self.model.input_cost_per_1k)
|
||||
output_cost = (self.output_tokens / 1000) * float(
|
||||
self.model.output_cost_per_1k
|
||||
)
|
||||
cached_cost = 0
|
||||
if self.cached_tokens and self.model.cached_cost_per_1k:
|
||||
cached_cost = (self.cached_tokens / 1000) * float(
|
||||
self.model.cached_cost_per_1k
|
||||
)
|
||||
return Decimal(str(input_cost + output_cost + cached_cost))
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.model.name} - {self.timestamp} - ${self.total_cost}"
|
||||
275
mnemosyne/llm_manager/services.py
Normal file
275
mnemosyne/llm_manager/services.py
Normal file
@@ -0,0 +1,275 @@
|
||||
"""
|
||||
Services for LLM API testing and model discovery.
|
||||
|
||||
Ported from Spelunker with Mnemosyne adaptations.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def test_llm_api(api):
|
||||
"""
|
||||
Test an LLM API connection and discover available models.
|
||||
|
||||
:param api: LLMApi instance to test.
|
||||
:returns: dict with success, models_added/updated/deactivated, message/error.
|
||||
"""
|
||||
from .models import LLMModel
|
||||
|
||||
result = {
|
||||
"success": False,
|
||||
"models_added": 0,
|
||||
"models_updated": 0,
|
||||
"models_deactivated": 0,
|
||||
"message": "",
|
||||
"error": "",
|
||||
}
|
||||
|
||||
logger.info("Testing LLM API: %s (%s) at %s", api.name, api.api_type, api.base_url)
|
||||
|
||||
try:
|
||||
if api.api_type in ("openai", "vllm"):
|
||||
discovered_models = _discover_openai_models(api)
|
||||
elif api.api_type == "ollama":
|
||||
discovered_models = _discover_ollama_models(api)
|
||||
elif api.api_type == "bedrock":
|
||||
discovered_models = _discover_bedrock_models(api)
|
||||
else:
|
||||
result["error"] = f"API type '{api.api_type}' is not yet supported for auto-discovery"
|
||||
logger.warning(result["error"])
|
||||
return result
|
||||
|
||||
if not discovered_models:
|
||||
result["error"] = "No models discovered from API"
|
||||
logger.warning("No models found for API %s", api.name)
|
||||
return result
|
||||
|
||||
logger.info("Discovered %d models from %s", len(discovered_models), api.name)
|
||||
discovered_model_names = {m["name"] for m in discovered_models}
|
||||
|
||||
with transaction.atomic():
|
||||
for model_data in discovered_models:
|
||||
model_name = model_data["name"]
|
||||
try:
|
||||
existing = LLMModel.objects.get(api=api, name=model_name)
|
||||
existing.is_active = True
|
||||
existing.display_name = model_data.get("display_name", model_name)
|
||||
existing.model_type = model_data.get("model_type", "chat")
|
||||
existing.context_window = model_data.get("context_window", 8192)
|
||||
existing.max_output_tokens = model_data.get("max_output_tokens")
|
||||
existing.supports_cache = model_data.get("supports_cache", False)
|
||||
existing.supports_vision = model_data.get("supports_vision", False)
|
||||
existing.supports_function_calling = model_data.get("supports_function_calling", False)
|
||||
existing.supports_json_mode = model_data.get("supports_json_mode", False)
|
||||
existing.save()
|
||||
result["models_updated"] += 1
|
||||
except LLMModel.DoesNotExist:
|
||||
from decimal import Decimal
|
||||
|
||||
LLMModel.objects.create(
|
||||
api=api,
|
||||
name=model_name,
|
||||
display_name=model_data.get("display_name", model_name),
|
||||
model_type=model_data.get("model_type", "chat"),
|
||||
context_window=model_data.get("context_window", 8192),
|
||||
max_output_tokens=model_data.get("max_output_tokens"),
|
||||
supports_cache=model_data.get("supports_cache", False),
|
||||
supports_vision=model_data.get("supports_vision", False),
|
||||
supports_function_calling=model_data.get("supports_function_calling", False),
|
||||
supports_json_mode=model_data.get("supports_json_mode", False),
|
||||
input_cost_per_1k=Decimal("0"),
|
||||
output_cost_per_1k=Decimal("0"),
|
||||
is_active=True,
|
||||
)
|
||||
result["models_added"] += 1
|
||||
logger.info("Added new model: %s::%s", api.name, model_name)
|
||||
|
||||
# Deactivate models no longer available
|
||||
for model in LLMModel.objects.filter(api=api, is_active=True):
|
||||
if model.name not in discovered_model_names:
|
||||
model.is_active = False
|
||||
model.save(update_fields=["is_active"])
|
||||
result["models_deactivated"] += 1
|
||||
logger.warning("Deactivated missing model: %s::%s", api.name, model.name)
|
||||
|
||||
api.last_tested_at = timezone.now()
|
||||
api.last_test_status = "success"
|
||||
api.last_test_message = (
|
||||
f"Added: {result['models_added']}, "
|
||||
f"Updated: {result['models_updated']}, "
|
||||
f"Deactivated: {result['models_deactivated']}"
|
||||
)
|
||||
api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"])
|
||||
|
||||
result["success"] = True
|
||||
result["message"] = api.last_test_message
|
||||
logger.info("API test successful: %s — %s", api.name, result["message"])
|
||||
|
||||
except Exception as e:
|
||||
result["error"] = f"API test failed: {e}"
|
||||
api.last_tested_at = timezone.now()
|
||||
api.last_test_status = "failed"
|
||||
api.last_test_message = result["error"]
|
||||
api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"])
|
||||
logger.error("API test failed for %s: %s", api.name, e, exc_info=True)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _discover_openai_models(api):
|
||||
"""Discover models from an OpenAI-compatible API."""
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
raise ImportError("openai package required for model discovery — pip install openai")
|
||||
|
||||
client = OpenAI(
|
||||
api_key=api.api_key or "dummy",
|
||||
base_url=api.base_url,
|
||||
timeout=api.timeout_seconds,
|
||||
max_retries=api.max_retries,
|
||||
)
|
||||
discovered = []
|
||||
models_response = client.models.list()
|
||||
|
||||
for model in models_response.data:
|
||||
model_id = model.id
|
||||
discovered.append(
|
||||
{
|
||||
"name": model_id,
|
||||
"display_name": model_id,
|
||||
"model_type": _infer_model_type(model_id),
|
||||
"context_window": _infer_context_window(model_id),
|
||||
"max_output_tokens": None,
|
||||
"supports_cache": False,
|
||||
"supports_vision": any(
|
||||
kw in model_id.lower() for kw in ("vision", "gpt-4-turbo", "gpt-4o")
|
||||
),
|
||||
"supports_function_calling": any(
|
||||
kw in model_id.lower() for kw in ("gpt-4", "gpt-3.5-turbo")
|
||||
),
|
||||
"supports_json_mode": any(
|
||||
kw in model_id.lower() for kw in ("gpt-4", "gpt-3.5-turbo")
|
||||
),
|
||||
}
|
||||
)
|
||||
return discovered
|
||||
|
||||
|
||||
def _discover_ollama_models(api):
|
||||
"""Discover models from an Ollama API."""
|
||||
import requests
|
||||
|
||||
url = f"{api.base_url.rstrip('/')}/api/tags"
|
||||
discovered = []
|
||||
resp = requests.get(url, timeout=10)
|
||||
resp.raise_for_status()
|
||||
for model in resp.json().get("models", []):
|
||||
name = model["name"]
|
||||
discovered.append(
|
||||
{
|
||||
"name": name,
|
||||
"display_name": name,
|
||||
"model_type": "chat",
|
||||
"context_window": 4096,
|
||||
"max_output_tokens": None,
|
||||
"supports_cache": False,
|
||||
"supports_vision": False,
|
||||
"supports_function_calling": False,
|
||||
"supports_json_mode": False,
|
||||
}
|
||||
)
|
||||
return discovered
|
||||
|
||||
|
||||
def _discover_bedrock_models(api):
|
||||
"""
|
||||
Discover models from Amazon Bedrock via the Mantle OpenAI-compatible endpoint.
|
||||
|
||||
For Bedrock APIs, the base_url is the bedrock-runtime endpoint. We derive
|
||||
the Mantle endpoint from the region to list models.
|
||||
"""
|
||||
import requests
|
||||
|
||||
# Extract region from base_url (e.g. https://bedrock-runtime.us-east-1.amazonaws.com)
|
||||
base = api.base_url.rstrip("/")
|
||||
region = "us-east-1"
|
||||
if "bedrock-runtime." in base:
|
||||
# Parse region from URL
|
||||
parts = base.split("bedrock-runtime.")[1].split(".")
|
||||
if parts:
|
||||
region = parts[0]
|
||||
|
||||
# Use the Mantle endpoint for model listing (OpenAI-compatible)
|
||||
mantle_url = f"https://bedrock-mantle.{region}.api.aws/v1/models"
|
||||
headers = {}
|
||||
if api.api_key:
|
||||
headers["Authorization"] = f"Bearer {api.api_key}"
|
||||
|
||||
discovered = []
|
||||
try:
|
||||
resp = requests.get(mantle_url, headers=headers, timeout=api.timeout_seconds or 30)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
for model in data.get("data", []):
|
||||
model_id = model.get("id", "")
|
||||
discovered.append(
|
||||
{
|
||||
"name": model_id,
|
||||
"display_name": model_id,
|
||||
"model_type": _infer_model_type(model_id),
|
||||
"context_window": _infer_context_window(model_id),
|
||||
"max_output_tokens": None,
|
||||
"supports_cache": False,
|
||||
"supports_vision": any(
|
||||
kw in model_id.lower() for kw in ("claude-3", "nova", "vision")
|
||||
),
|
||||
"supports_function_calling": False,
|
||||
"supports_json_mode": False,
|
||||
}
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Bedrock Mantle model discovery failed: %s", exc)
|
||||
# Fallback: return empty list (user can manually add models)
|
||||
|
||||
return discovered
|
||||
|
||||
|
||||
def _infer_model_type(model_id):
|
||||
"""Infer model type from its identifier."""
|
||||
lower = model_id.lower()
|
||||
if any(kw in lower for kw in ("embed", "embedding")):
|
||||
return "embedding"
|
||||
if "rerank" in lower:
|
||||
return "reranker"
|
||||
return "chat"
|
||||
|
||||
|
||||
def _infer_context_window(model_id):
|
||||
"""Infer context window size from model identifier."""
|
||||
m = model_id.lower()
|
||||
if any(kw in m for kw in ("gpt-4-turbo", "gpt-4-1106", "gpt-4-0125", "gpt-4o")):
|
||||
return 128000
|
||||
if "gpt-4-32k" in m:
|
||||
return 32768
|
||||
if "gpt-4" in m:
|
||||
return 8192
|
||||
if "gpt-3.5-turbo-16k" in m:
|
||||
return 16384
|
||||
if "gpt-3.5-turbo" in m:
|
||||
return 4096
|
||||
if "claude-3" in m:
|
||||
return 200000
|
||||
if "claude-2" in m:
|
||||
return 100000
|
||||
if "32k" in m:
|
||||
return 32768
|
||||
if "16k" in m:
|
||||
return 16384
|
||||
return 8192
|
||||
86
mnemosyne/llm_manager/tasks.py
Normal file
86
mnemosyne/llm_manager/tasks.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
Celery tasks for LLM Manager — ported from Spelunker.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from celery import shared_task
|
||||
|
||||
from .models import LLMApi
|
||||
from .services import test_llm_api
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@shared_task(name="llm_manager.validate_all_llm_apis")
|
||||
def validate_all_llm_apis():
|
||||
"""
|
||||
Periodic task to validate all active LLM APIs and discover models.
|
||||
|
||||
Schedule via Celery Beat (e.g. hourly or daily).
|
||||
"""
|
||||
logger.info("Starting periodic LLM API validation")
|
||||
active_apis = LLMApi.objects.filter(is_active=True)
|
||||
|
||||
if not active_apis.exists():
|
||||
logger.info("No active APIs to validate")
|
||||
return {"status": "completed", "tested": 0, "successful": 0, "failed": 0}
|
||||
|
||||
results = {
|
||||
"status": "completed",
|
||||
"tested": 0,
|
||||
"successful": 0,
|
||||
"failed": 0,
|
||||
"models_added": 0,
|
||||
"models_updated": 0,
|
||||
"models_deactivated": 0,
|
||||
"details": [],
|
||||
}
|
||||
|
||||
for api in active_apis:
|
||||
results["tested"] += 1
|
||||
try:
|
||||
result = test_llm_api(api)
|
||||
if result["success"]:
|
||||
results["successful"] += 1
|
||||
results["models_added"] += result["models_added"]
|
||||
results["models_updated"] += result["models_updated"]
|
||||
results["models_deactivated"] += result["models_deactivated"]
|
||||
else:
|
||||
results["failed"] += 1
|
||||
results["details"].append(
|
||||
{
|
||||
"api_name": api.name,
|
||||
"success": result["success"],
|
||||
"message": result.get("message") or result.get("error", ""),
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
results["failed"] += 1
|
||||
results["details"].append(
|
||||
{"api_name": api.name, "success": False, "message": str(e)}
|
||||
)
|
||||
logger.error("Unexpected error validating %s: %s", api.name, e, exc_info=True)
|
||||
|
||||
logger.info(
|
||||
"Completed LLM API validation: %d/%d successful, %d failed",
|
||||
results["successful"],
|
||||
results["tested"],
|
||||
results["failed"],
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
@shared_task(name="llm_manager.validate_single_api")
|
||||
def validate_single_api(api_id):
|
||||
"""Validate a single LLM API by ID."""
|
||||
try:
|
||||
api = LLMApi.objects.get(id=api_id)
|
||||
return test_llm_api(api)
|
||||
except LLMApi.DoesNotExist:
|
||||
msg = f"LLM API with id {api_id} not found"
|
||||
logger.error(msg)
|
||||
return {"success": False, "error": msg}
|
||||
except Exception as e:
|
||||
logger.error("Error validating API %s: %s", api_id, e, exc_info=True)
|
||||
return {"success": False, "error": str(e)}
|
||||
@@ -0,0 +1,22 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Delete {{ api.name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="max-w-lg mx-auto mt-8">
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title text-error">Delete LLM API</h2>
|
||||
<p>Are you sure you want to delete <strong>{{ api.name }}</strong>?</p>
|
||||
<p class="text-sm text-base-content/70">This will also remove all associated models. Usage records will be preserved.</p>
|
||||
<div class="card-actions justify-end mt-4">
|
||||
<a href="{% url 'llm_manager:api_detail' api.pk %}" class="btn btn-ghost">Cancel</a>
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
<button type="submit" class="btn btn-error">Delete</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
93
mnemosyne/llm_manager/templates/llm_manager/api_detail.html
Normal file
93
mnemosyne/llm_manager/templates/llm_manager/api_detail.html
Normal file
@@ -0,0 +1,93 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}{{ api.name }} — LLM API{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-6">
|
||||
<div class="text-sm breadcrumbs">
|
||||
<ul>
|
||||
<li><a href="{% url 'llm_manager:dashboard' %}">LLM Manager</a></li>
|
||||
<li><a href="{% url 'llm_manager:api_list' %}">APIs</a></li>
|
||||
<li>{{ api.name }}</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="flex justify-between items-center mt-2">
|
||||
<h1 class="text-2xl font-bold">{{ api.name }}</h1>
|
||||
<div class="flex gap-2">
|
||||
<form method="post" action="{% url 'llm_manager:api_test' api.pk %}">
|
||||
{% csrf_token %}
|
||||
<button type="submit" class="btn btn-sm btn-accent">Test Connection</button>
|
||||
</form>
|
||||
<a href="{% url 'llm_manager:api_edit' api.pk %}" class="btn btn-sm btn-primary">Edit</a>
|
||||
<a href="{% url 'llm_manager:api_delete' api.pk %}" class="btn btn-sm btn-error btn-outline">Delete</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- API details -->
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 gap-6 mb-8">
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title text-lg">Configuration</h2>
|
||||
<div class="grid grid-cols-2 gap-2 text-sm">
|
||||
<span class="font-semibold">Type:</span><span class="badge badge-ghost">{{ api.get_api_type_display }}</span>
|
||||
<span class="font-semibold">Base URL:</span><span class="font-mono text-xs break-all">{{ api.base_url }}</span>
|
||||
<span class="font-semibold">Active:</span><span>{% if api.is_active %}Yes{% else %}No{% endif %}</span>
|
||||
<span class="font-semibold">Streaming:</span><span>{% if api.supports_streaming %}Yes{% else %}No{% endif %}</span>
|
||||
<span class="font-semibold">Timeout:</span><span>{{ api.timeout_seconds }}s</span>
|
||||
<span class="font-semibold">Max Retries:</span><span>{{ api.max_retries }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title text-lg">Test Status</h2>
|
||||
<div class="grid grid-cols-2 gap-2 text-sm">
|
||||
<span class="font-semibold">Status:</span>
|
||||
<span>
|
||||
{% if api.last_test_status == "success" %}<span class="badge badge-success">Success</span>
|
||||
{% elif api.last_test_status == "failed" %}<span class="badge badge-error">Failed</span>
|
||||
{% else %}<span class="badge badge-warning">Pending</span>{% endif %}
|
||||
</span>
|
||||
<span class="font-semibold">Last Tested:</span><span>{{ api.last_tested_at|default:"Never" }}</span>
|
||||
</div>
|
||||
{% if api.last_test_message %}
|
||||
<div class="mt-2 text-xs bg-base-300 p-2 rounded">{{ api.last_test_message }}</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Models -->
|
||||
<div>
|
||||
<div class="flex justify-between items-center mb-3">
|
||||
<h2 class="text-xl font-semibold">Models ({{ models.count }})</h2>
|
||||
<a href="{% url 'llm_manager:model_create' %}?api={{ api.pk }}" class="btn btn-sm btn-primary">Add Model</a>
|
||||
</div>
|
||||
{% if models %}
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table table-zebra table-sm w-full">
|
||||
<thead><tr><th>Name</th><th>Type</th><th>Context</th><th>Dims</th><th>Active</th><th>System</th></tr></thead>
|
||||
<tbody>
|
||||
{% for m in models %}
|
||||
<tr>
|
||||
<td><a href="{% url 'llm_manager:model_detail' m.pk %}" class="link link-primary">{{ m.name }}</a></td>
|
||||
<td><span class="badge badge-ghost badge-sm">{{ m.get_model_type_display }}</span></td>
|
||||
<td>{{ m.context_window|default:"—" }}</td>
|
||||
<td>{{ m.vector_dimensions|default:"—" }}</td>
|
||||
<td>{% if m.is_active %}<span class="badge badge-success badge-xs">✓</span>{% else %}<span class="badge badge-error badge-xs">✗</span>{% endif %}</td>
|
||||
<td>
|
||||
{% if m.is_system_embedding_model %}<span class="badge badge-sm badge-success">Embed</span>{% endif %}
|
||||
{% if m.is_system_chat_model %}<span class="badge badge-sm badge-info">Chat</span>{% endif %}
|
||||
{% if m.is_system_reranker_model %}<span class="badge badge-sm badge-warning">Rerank</span>{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="alert alert-info">No models for this API yet. Use "Test Connection" to auto-discover or add manually.</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endblock %}
|
||||
39
mnemosyne/llm_manager/templates/llm_manager/api_form.html
Normal file
39
mnemosyne/llm_manager/templates/llm_manager/api_form.html
Normal file
@@ -0,0 +1,39 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}{% if is_edit %}Edit {{ api.name }}{% else %}Add LLM API{% endif %}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="text-sm breadcrumbs mb-4">
|
||||
<ul>
|
||||
<li><a href="{% url 'llm_manager:dashboard' %}">LLM Manager</a></li>
|
||||
<li><a href="{% url 'llm_manager:api_list' %}">APIs</a></li>
|
||||
<li>{% if is_edit %}Edit {{ api.name }}{% else %}Add API{% endif %}</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="max-w-2xl">
|
||||
<h1 class="text-2xl font-bold mb-4">{% if is_edit %}Edit {{ api.name }}{% else %}Add LLM API{% endif %}</h1>
|
||||
|
||||
<form method="post" class="space-y-4">
|
||||
{% csrf_token %}
|
||||
|
||||
{% for field in form %}
|
||||
<div class="form-control w-full">
|
||||
<label class="label"><span class="label-text font-semibold">{{ field.label }}</span></label>
|
||||
{{ field }}
|
||||
{% if field.errors %}
|
||||
<label class="label"><span class="label-text-alt text-error">{{ field.errors.0 }}</span></label>
|
||||
{% endif %}
|
||||
{% if field.help_text %}
|
||||
<label class="label"><span class="label-text-alt">{{ field.help_text }}</span></label>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
|
||||
<div class="flex gap-2 pt-4">
|
||||
<button type="submit" class="btn btn-primary">{% if is_edit %}Save Changes{% else %}Create API{% endif %}</button>
|
||||
<a href="{% if is_edit %}{% url 'llm_manager:api_detail' api.pk %}{% else %}{% url 'llm_manager:api_list' %}{% endif %}" class="btn btn-ghost">Cancel</a>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
||||
56
mnemosyne/llm_manager/templates/llm_manager/api_list.html
Normal file
56
mnemosyne/llm_manager/templates/llm_manager/api_list.html
Normal file
@@ -0,0 +1,56 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}LLM APIs{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-between items-center mb-6">
|
||||
<h1 class="text-2xl font-bold">LLM APIs</h1>
|
||||
<a href="{% url 'llm_manager:api_create' %}" class="btn btn-primary btn-sm">Add API</a>
|
||||
</div>
|
||||
|
||||
{% if apis %}
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table table-zebra w-full">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Type</th>
|
||||
<th>Base URL</th>
|
||||
<th>Active</th>
|
||||
<th>Streaming</th>
|
||||
<th>Status</th>
|
||||
<th>Models</th>
|
||||
<th>Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for api in apis %}
|
||||
<tr>
|
||||
<td><a href="{% url 'llm_manager:api_detail' api.pk %}" class="link link-primary font-semibold">{{ api.name }}</a></td>
|
||||
<td><span class="badge badge-ghost">{{ api.get_api_type_display }}</span></td>
|
||||
<td class="font-mono text-xs max-w-xs truncate">{{ api.base_url }}</td>
|
||||
<td>{% if api.is_active %}<span class="badge badge-success badge-sm">Yes</span>{% else %}<span class="badge badge-error badge-sm">No</span>{% endif %}</td>
|
||||
<td>{% if api.supports_streaming %}<span class="badge badge-info badge-sm">Yes</span>{% else %}—{% endif %}</td>
|
||||
<td>
|
||||
{% if api.last_test_status == "success" %}<span class="badge badge-success badge-sm">OK</span>
|
||||
{% elif api.last_test_status == "failed" %}<span class="badge badge-error badge-sm">Failed</span>
|
||||
{% else %}<span class="badge badge-warning badge-sm">Pending</span>{% endif %}
|
||||
</td>
|
||||
<td>{{ api.models.count }}</td>
|
||||
<td>
|
||||
<a href="{% url 'llm_manager:api_edit' api.pk %}" class="btn btn-xs btn-ghost">Edit</a>
|
||||
<a href="{% url 'llm_manager:api_delete' api.pk %}" class="btn btn-xs btn-ghost text-error">Delete</a>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="alert alert-info">No APIs configured. <a href="{% url 'llm_manager:api_create' %}" class="link">Add one now</a>.</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="mt-4">
|
||||
<a href="{% url 'llm_manager:dashboard' %}" class="btn btn-ghost btn-sm">← Dashboard</a>
|
||||
</div>
|
||||
{% endblock %}
|
||||
132
mnemosyne/llm_manager/templates/llm_manager/dashboard.html
Normal file
132
mnemosyne/llm_manager/templates/llm_manager/dashboard.html
Normal file
@@ -0,0 +1,132 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}LLM Manager — Dashboard{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-6">
|
||||
<h1 class="text-3xl font-bold">LLM Manager</h1>
|
||||
<p class="text-base-content/70 mt-1">Manage LLM APIs, models, and usage tracking.</p>
|
||||
</div>
|
||||
|
||||
<!-- Stats cards -->
|
||||
<div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-8">
|
||||
<div class="stat bg-base-200 rounded-box shadow">
|
||||
<div class="stat-title">Active APIs</div>
|
||||
<div class="stat-value text-primary">{{ api_count }}</div>
|
||||
<div class="stat-actions"><a href="{% url 'llm_manager:api_list' %}" class="btn btn-sm btn-primary">Manage</a></div>
|
||||
</div>
|
||||
<div class="stat bg-base-200 rounded-box shadow">
|
||||
<div class="stat-title">Active Models</div>
|
||||
<div class="stat-value text-secondary">{{ model_count }}</div>
|
||||
<div class="stat-actions"><a href="{% url 'llm_manager:model_list' %}" class="btn btn-sm btn-secondary">Browse</a></div>
|
||||
</div>
|
||||
<div class="stat bg-base-200 rounded-box shadow">
|
||||
<div class="stat-title">Your API Calls</div>
|
||||
<div class="stat-value text-accent">{{ usage_count }}</div>
|
||||
<div class="stat-desc">
|
||||
{{ total_input_tokens|default:"0" }} in / {{ total_output_tokens|default:"0" }} out tokens
|
||||
</div>
|
||||
<div class="stat-actions"><a href="{% url 'llm_manager:usage_list' %}" class="btn btn-sm btn-accent">History</a></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- System models -->
|
||||
<div class="mb-8">
|
||||
<h2 class="text-xl font-semibold mb-3">System Default Models</h2>
|
||||
<div class="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body">
|
||||
<h3 class="card-title text-sm">Embedding</h3>
|
||||
{% if system_embedding_model %}
|
||||
<p class="font-mono text-sm">{{ system_embedding_model.name }}</p>
|
||||
<p class="text-xs text-base-content/60">{{ system_embedding_model.api.name }}{% if system_embedding_model.vector_dimensions %} — {{ system_embedding_model.vector_dimensions }}d{% endif %}</p>
|
||||
{% else %}
|
||||
<p class="text-warning text-sm">Not configured</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body">
|
||||
<h3 class="card-title text-sm">Chat</h3>
|
||||
{% if system_chat_model %}
|
||||
<p class="font-mono text-sm">{{ system_chat_model.name }}</p>
|
||||
<p class="text-xs text-base-content/60">{{ system_chat_model.api.name }}</p>
|
||||
{% else %}
|
||||
<p class="text-warning text-sm">Not configured</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body">
|
||||
<h3 class="card-title text-sm">Reranker</h3>
|
||||
{% if system_reranker_model %}
|
||||
<p class="font-mono text-sm">{{ system_reranker_model.name }}</p>
|
||||
<p class="text-xs text-base-content/60">{{ system_reranker_model.api.name }}</p>
|
||||
{% else %}
|
||||
<p class="text-warning text-sm">Not configured</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Active APIs -->
|
||||
<div class="mb-8">
|
||||
<div class="flex justify-between items-center mb-3">
|
||||
<h2 class="text-xl font-semibold">Active APIs</h2>
|
||||
<a href="{% url 'llm_manager:api_create' %}" class="btn btn-sm btn-primary">Add API</a>
|
||||
</div>
|
||||
{% if active_apis %}
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table table-zebra w-full">
|
||||
<thead><tr><th>Name</th><th>Type</th><th>URL</th><th>Status</th><th>Last Tested</th></tr></thead>
|
||||
<tbody>
|
||||
{% for api in active_apis %}
|
||||
<tr>
|
||||
<td><a href="{% url 'llm_manager:api_detail' api.pk %}" class="link link-primary">{{ api.name }}</a></td>
|
||||
<td><span class="badge badge-ghost">{{ api.get_api_type_display }}</span></td>
|
||||
<td class="font-mono text-xs">{{ api.base_url }}</td>
|
||||
<td>
|
||||
{% if api.last_test_status == "success" %}
|
||||
<span class="badge badge-success">OK</span>
|
||||
{% elif api.last_test_status == "failed" %}
|
||||
<span class="badge badge-error">Failed</span>
|
||||
{% else %}
|
||||
<span class="badge badge-warning">Pending</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td class="text-xs">{{ api.last_tested_at|default:"Never" }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="alert alert-info">No active APIs configured yet.</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- Recent usage -->
|
||||
{% if recent_usage %}
|
||||
<div>
|
||||
<h2 class="text-xl font-semibold mb-3">Recent Usage</h2>
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table table-zebra table-sm w-full">
|
||||
<thead><tr><th>Time</th><th>Model</th><th>In</th><th>Out</th><th>Cost</th><th>Purpose</th></tr></thead>
|
||||
<tbody>
|
||||
{% for u in recent_usage %}
|
||||
<tr>
|
||||
<td class="text-xs">{{ u.timestamp|date:"M d H:i" }}</td>
|
||||
<td class="font-mono text-xs">{{ u.model.name }}</td>
|
||||
<td>{{ u.input_tokens }}</td>
|
||||
<td>{{ u.output_tokens }}</td>
|
||||
<td>${{ u.total_cost|floatformat:4 }}</td>
|
||||
<td><span class="badge badge-ghost badge-sm">{{ u.get_purpose_display }}</span></td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,22 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Delete {{ model.name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="max-w-lg mx-auto mt-8">
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title text-error">Delete LLM Model</h2>
|
||||
<p>Are you sure you want to delete <strong class="font-mono">{{ model.name }}</strong> from <strong>{{ model.api.name }}</strong>?</p>
|
||||
<p class="text-sm text-base-content/70">Usage records referencing this model will be preserved.</p>
|
||||
<div class="card-actions justify-end mt-4">
|
||||
<a href="{% url 'llm_manager:model_detail' model.pk %}" class="btn btn-ghost">Cancel</a>
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
<button type="submit" class="btn btn-error">Delete</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,66 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}{{ model.name }} — LLM Model{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="text-sm breadcrumbs mb-4">
|
||||
<ul>
|
||||
<li><a href="{% url 'llm_manager:dashboard' %}">LLM Manager</a></li>
|
||||
<li><a href="{% url 'llm_manager:model_list' %}">Models</a></li>
|
||||
<li>{{ model.name }}</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="flex justify-between items-center mb-6">
|
||||
<h1 class="text-2xl font-bold font-mono">{{ model.name }}</h1>
|
||||
<div class="flex gap-2">
|
||||
<a href="{% url 'llm_manager:model_edit' model.pk %}" class="btn btn-sm btn-primary">Edit</a>
|
||||
<a href="{% url 'llm_manager:model_delete' model.pk %}" class="btn btn-sm btn-error btn-outline">Delete</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title text-lg">Details</h2>
|
||||
<div class="grid grid-cols-2 gap-2 text-sm">
|
||||
<span class="font-semibold">Display Name:</span><span>{{ model.display_name|default:"—" }}</span>
|
||||
<span class="font-semibold">API:</span><span><a href="{% url 'llm_manager:api_detail' model.api.pk %}" class="link link-primary">{{ model.api.name }}</a></span>
|
||||
<span class="font-semibold">Type:</span><span class="badge badge-ghost">{{ model.get_model_type_display }}</span>
|
||||
<span class="font-semibold">Active:</span><span>{% if model.is_active %}Yes{% else %}No{% endif %}</span>
|
||||
<span class="font-semibold">Context Window:</span><span>{{ model.context_window|default:"—" }} tokens</span>
|
||||
<span class="font-semibold">Max Output:</span><span>{{ model.max_output_tokens|default:"—" }} tokens</span>
|
||||
<span class="font-semibold">Dimensions:</span><span>{{ model.vector_dimensions|default:"—" }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title text-lg">Capabilities</h2>
|
||||
<div class="flex flex-wrap gap-2">
|
||||
{% if model.supports_cache %}<span class="badge badge-success">Cache</span>{% endif %}
|
||||
{% if model.supports_vision %}<span class="badge badge-info">Vision</span>{% endif %}
|
||||
{% if model.supports_multimodal %}<span class="badge badge-accent">Multimodal</span>{% endif %}
|
||||
{% if model.supports_function_calling %}<span class="badge badge-secondary">Functions</span>{% endif %}
|
||||
{% if model.supports_json_mode %}<span class="badge badge-warning">JSON Mode</span>{% endif %}
|
||||
</div>
|
||||
<h3 class="font-semibold mt-4 text-sm">Pricing (per 1K tokens)</h3>
|
||||
<div class="grid grid-cols-2 gap-2 text-sm">
|
||||
<span>Input:</span><span>${{ model.input_cost_per_1k }}</span>
|
||||
<span>Output:</span><span>${{ model.output_cost_per_1k }}</span>
|
||||
{% if model.cached_cost_per_1k %}<span>Cached:</span><span>${{ model.cached_cost_per_1k }}</span>{% endif %}
|
||||
</div>
|
||||
<h3 class="font-semibold mt-4 text-sm">System Defaults</h3>
|
||||
<div class="flex flex-wrap gap-2">
|
||||
{% if model.is_system_embedding_model %}<span class="badge badge-success">System Embedding</span>{% endif %}
|
||||
{% if model.is_system_chat_model %}<span class="badge badge-info">System Chat</span>{% endif %}
|
||||
{% if model.is_system_reranker_model %}<span class="badge badge-warning">System Reranker</span>{% endif %}
|
||||
{% if not model.is_system_embedding_model and not model.is_system_chat_model and not model.is_system_reranker_model %}
|
||||
<span class="text-base-content/50 text-sm">Not a system default</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
39
mnemosyne/llm_manager/templates/llm_manager/model_form.html
Normal file
39
mnemosyne/llm_manager/templates/llm_manager/model_form.html
Normal file
@@ -0,0 +1,39 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}{% if is_edit %}Edit {{ model.name }}{% else %}Add LLM Model{% endif %}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="text-sm breadcrumbs mb-4">
|
||||
<ul>
|
||||
<li><a href="{% url 'llm_manager:dashboard' %}">LLM Manager</a></li>
|
||||
<li><a href="{% url 'llm_manager:model_list' %}">Models</a></li>
|
||||
<li>{% if is_edit %}Edit {{ model.name }}{% else %}Add Model{% endif %}</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="max-w-2xl">
|
||||
<h1 class="text-2xl font-bold mb-4">{% if is_edit %}Edit {{ model.name }}{% else %}Add LLM Model{% endif %}</h1>
|
||||
|
||||
<form method="post" class="space-y-4">
|
||||
{% csrf_token %}
|
||||
|
||||
{% for field in form %}
|
||||
<div class="form-control w-full">
|
||||
<label class="label"><span class="label-text font-semibold">{{ field.label }}</span></label>
|
||||
{{ field }}
|
||||
{% if field.errors %}
|
||||
<label class="label"><span class="label-text-alt text-error">{{ field.errors.0 }}</span></label>
|
||||
{% endif %}
|
||||
{% if field.help_text %}
|
||||
<label class="label"><span class="label-text-alt">{{ field.help_text }}</span></label>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
|
||||
<div class="flex gap-2 pt-4">
|
||||
<button type="submit" class="btn btn-primary">{% if is_edit %}Save Changes{% else %}Create Model{% endif %}</button>
|
||||
<a href="{% if is_edit %}{% url 'llm_manager:model_detail' model.pk %}{% else %}{% url 'llm_manager:model_list' %}{% endif %}" class="btn btn-ghost">Cancel</a>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
||||
68
mnemosyne/llm_manager/templates/llm_manager/model_list.html
Normal file
68
mnemosyne/llm_manager/templates/llm_manager/model_list.html
Normal file
@@ -0,0 +1,68 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}LLM Models{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-between items-center mb-6">
|
||||
<h1 class="text-2xl font-bold">LLM Models</h1>
|
||||
<a href="{% url 'llm_manager:model_create' %}" class="btn btn-primary btn-sm">Add Model</a>
|
||||
</div>
|
||||
|
||||
<!-- Filter by API -->
|
||||
<div class="mb-4 flex gap-2 items-center">
|
||||
<span class="font-semibold text-sm">Filter API:</span>
|
||||
<a href="{% url 'llm_manager:model_list' %}" class="btn btn-xs {% if not selected_api %}btn-primary{% else %}btn-ghost{% endif %}">All</a>
|
||||
{% for api in apis %}
|
||||
<a href="{% url 'llm_manager:model_list' %}?api={{ api.pk }}" class="btn btn-xs {% if selected_api == api.pk|slugify %}btn-primary{% else %}btn-ghost{% endif %}">{{ api.name }}</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
{% if models %}
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table table-zebra w-full">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>API</th>
|
||||
<th>Type</th>
|
||||
<th>Context</th>
|
||||
<th>Dims</th>
|
||||
<th>$/1K In</th>
|
||||
<th>$/1K Out</th>
|
||||
<th>Active</th>
|
||||
<th>System</th>
|
||||
<th>Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for m in models %}
|
||||
<tr>
|
||||
<td><a href="{% url 'llm_manager:model_detail' m.pk %}" class="link link-primary font-mono text-sm">{{ m.name }}</a></td>
|
||||
<td class="text-sm">{{ m.api.name }}</td>
|
||||
<td><span class="badge badge-ghost badge-sm">{{ m.get_model_type_display }}</span></td>
|
||||
<td>{{ m.context_window|default:"—" }}</td>
|
||||
<td>{{ m.vector_dimensions|default:"—" }}</td>
|
||||
<td class="text-xs">${{ m.input_cost_per_1k }}</td>
|
||||
<td class="text-xs">${{ m.output_cost_per_1k }}</td>
|
||||
<td>{% if m.is_active %}<span class="badge badge-success badge-xs">✓</span>{% else %}<span class="badge badge-error badge-xs">✗</span>{% endif %}</td>
|
||||
<td>
|
||||
{% if m.is_system_embedding_model %}<span class="badge badge-sm badge-success">Embed</span>{% endif %}
|
||||
{% if m.is_system_chat_model %}<span class="badge badge-sm badge-info">Chat</span>{% endif %}
|
||||
{% if m.is_system_reranker_model %}<span class="badge badge-sm badge-warning">Rerank</span>{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
<a href="{% url 'llm_manager:model_edit' m.pk %}" class="btn btn-xs btn-ghost">Edit</a>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="alert alert-info">No models found.</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="mt-4">
|
||||
<a href="{% url 'llm_manager:dashboard' %}" class="btn btn-ghost btn-sm">← Dashboard</a>
|
||||
</div>
|
||||
{% endblock %}
|
||||
72
mnemosyne/llm_manager/templates/llm_manager/usage_list.html
Normal file
72
mnemosyne/llm_manager/templates/llm_manager/usage_list.html
Normal file
@@ -0,0 +1,72 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Usage History{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-between items-center mb-6">
|
||||
<h1 class="text-2xl font-bold">Usage History</h1>
|
||||
</div>
|
||||
|
||||
<!-- Totals -->
|
||||
{% if totals %}
|
||||
<div class="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6">
|
||||
<div class="stat bg-base-200 rounded-box shadow py-3">
|
||||
<div class="stat-title text-xs">Input Tokens</div>
|
||||
<div class="stat-value text-lg">{{ totals.total_input|default:"0" }}</div>
|
||||
</div>
|
||||
<div class="stat bg-base-200 rounded-box shadow py-3">
|
||||
<div class="stat-title text-xs">Output Tokens</div>
|
||||
<div class="stat-value text-lg">{{ totals.total_output|default:"0" }}</div>
|
||||
</div>
|
||||
<div class="stat bg-base-200 rounded-box shadow py-3">
|
||||
<div class="stat-title text-xs">Cached Tokens</div>
|
||||
<div class="stat-value text-lg">{{ totals.total_cached|default:"0" }}</div>
|
||||
</div>
|
||||
<div class="stat bg-base-200 rounded-box shadow py-3">
|
||||
<div class="stat-title text-xs">Total Cost</div>
|
||||
<div class="stat-value text-lg">${{ totals.total_cost|default:"0"|floatformat:4 }}</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if usage_records %}
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table table-zebra table-sm w-full">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Timestamp</th>
|
||||
<th>Model</th>
|
||||
<th>API</th>
|
||||
<th>Input</th>
|
||||
<th>Output</th>
|
||||
<th>Cached</th>
|
||||
<th>Cost</th>
|
||||
<th>Purpose</th>
|
||||
<th>Session</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for u in usage_records %}
|
||||
<tr>
|
||||
<td class="text-xs">{{ u.timestamp|date:"Y-m-d H:i:s" }}</td>
|
||||
<td class="font-mono text-xs">{{ u.model.name }}</td>
|
||||
<td class="text-xs">{{ u.model.api.name }}</td>
|
||||
<td>{{ u.input_tokens }}</td>
|
||||
<td>{{ u.output_tokens }}</td>
|
||||
<td>{{ u.cached_tokens }}</td>
|
||||
<td class="text-xs">${{ u.total_cost|floatformat:4 }}</td>
|
||||
<td><span class="badge badge-ghost badge-xs">{{ u.get_purpose_display }}</span></td>
|
||||
<td class="font-mono text-xs max-w-[8rem] truncate">{{ u.session_id|default:"—" }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="alert alert-info">No usage records yet.</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="mt-4">
|
||||
<a href="{% url 'llm_manager:dashboard' %}" class="btn btn-ghost btn-sm">← Dashboard</a>
|
||||
</div>
|
||||
{% endblock %}
|
||||
0
mnemosyne/llm_manager/tests/__init__.py
Normal file
0
mnemosyne/llm_manager/tests/__init__.py
Normal file
154
mnemosyne/llm_manager/tests/test_api.py
Normal file
154
mnemosyne/llm_manager/tests/test_api.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
Tests for LLM Manager DRF API endpoints.
|
||||
"""
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from llm_manager.models import LLMApi, LLMModel, LLMUsage
|
||||
|
||||
User = get_user_model()
|
||||
|
||||
|
||||
class LLMApiEndpointTest(TestCase):
|
||||
"""Tests for the LLM API endpoints."""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(username="testuser", password="testpass123")
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.user)
|
||||
self.api = LLMApi.objects.create(
|
||||
name="Test API",
|
||||
api_type="openai",
|
||||
base_url="https://api.example.com/v1",
|
||||
)
|
||||
|
||||
def test_api_list(self):
|
||||
resp = self.client.get(reverse("llm-manager-api:api_list"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertEqual(len(resp.data), 1)
|
||||
self.assertEqual(resp.data[0]["name"], "Test API")
|
||||
|
||||
def test_api_detail(self):
|
||||
resp = self.client.get(reverse("llm-manager-api:api_detail", kwargs={"pk": self.api.pk}))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertEqual(resp.data["name"], "Test API")
|
||||
|
||||
def test_api_not_found(self):
|
||||
import uuid
|
||||
|
||||
resp = self.client.get(reverse("llm-manager-api:api_detail", kwargs={"pk": uuid.uuid4()}))
|
||||
self.assertEqual(resp.status_code, 404)
|
||||
|
||||
def test_requires_auth(self):
|
||||
self.client.force_authenticate(user=None)
|
||||
resp = self.client.get(reverse("llm-manager-api:api_list"))
|
||||
self.assertIn(resp.status_code, [401, 403])
|
||||
|
||||
|
||||
class LLMModelEndpointTest(TestCase):
|
||||
"""Tests for the LLM Model API endpoints."""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(username="testuser", password="testpass123")
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.user)
|
||||
self.api = LLMApi.objects.create(
|
||||
name="Test API",
|
||||
api_type="openai",
|
||||
base_url="https://api.example.com/v1",
|
||||
)
|
||||
self.model = LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name="gpt-4o",
|
||||
model_type="chat",
|
||||
context_window=128000,
|
||||
)
|
||||
|
||||
def test_model_list(self):
|
||||
resp = self.client.get(reverse("llm-manager-api:model_list"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertEqual(len(resp.data), 1)
|
||||
|
||||
def test_model_list_filter_type(self):
|
||||
LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name="embed-model",
|
||||
model_type="embedding",
|
||||
context_window=8191,
|
||||
)
|
||||
resp = self.client.get(reverse("llm-manager-api:model_list") + "?type=embedding")
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertEqual(len(resp.data), 1)
|
||||
self.assertEqual(resp.data[0]["model_type"], "embedding")
|
||||
|
||||
def test_model_detail(self):
|
||||
resp = self.client.get(reverse("llm-manager-api:model_detail", kwargs={"pk": self.model.pk}))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertEqual(resp.data["name"], "gpt-4o")
|
||||
|
||||
def test_system_models_empty(self):
|
||||
resp = self.client.get(reverse("llm-manager-api:system_models"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertEqual(resp.data, {})
|
||||
|
||||
def test_system_models_configured(self):
|
||||
self.model.is_system_chat_model = True
|
||||
self.model.save()
|
||||
resp = self.client.get(reverse("llm-manager-api:system_models"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertIn("chat", resp.data)
|
||||
self.assertEqual(resp.data["chat"]["name"], "gpt-4o")
|
||||
|
||||
|
||||
class LLMUsageEndpointTest(TestCase):
|
||||
"""Tests for the LLM Usage API endpoints."""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(username="testuser", password="testpass123")
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.user)
|
||||
self.api = LLMApi.objects.create(
|
||||
name="Test API",
|
||||
api_type="openai",
|
||||
base_url="https://api.example.com/v1",
|
||||
)
|
||||
self.model = LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name="gpt-4o",
|
||||
model_type="chat",
|
||||
context_window=128000,
|
||||
input_cost_per_1k=Decimal("0.0025"),
|
||||
output_cost_per_1k=Decimal("0.01"),
|
||||
)
|
||||
|
||||
def test_usage_list_empty(self):
|
||||
resp = self.client.get(reverse("llm-manager-api:usage_list"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertEqual(resp.data, [])
|
||||
|
||||
def test_usage_create(self):
|
||||
resp = self.client.post(
|
||||
reverse("llm-manager-api:usage_list"),
|
||||
{
|
||||
"model": str(self.model.pk),
|
||||
"input_tokens": 1000,
|
||||
"output_tokens": 500,
|
||||
"purpose": "other",
|
||||
},
|
||||
format="json",
|
||||
)
|
||||
self.assertEqual(resp.status_code, 201)
|
||||
self.assertEqual(LLMUsage.objects.count(), 1)
|
||||
|
||||
def test_usage_list_returns_own_records(self):
|
||||
other_user = User.objects.create_user(username="other", password="testpass123")
|
||||
LLMUsage.objects.create(user=self.user, model=self.model, input_tokens=100, output_tokens=50)
|
||||
LLMUsage.objects.create(user=other_user, model=self.model, input_tokens=200, output_tokens=100)
|
||||
resp = self.client.get(reverse("llm-manager-api:usage_list"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertEqual(len(resp.data), 1)
|
||||
236
mnemosyne/llm_manager/tests/test_models.py
Normal file
236
mnemosyne/llm_manager/tests/test_models.py
Normal file
@@ -0,0 +1,236 @@
|
||||
"""
|
||||
Tests for LLM Manager models: LLMApi, LLMModel, LLMUsage.
|
||||
"""
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.test import TestCase
|
||||
|
||||
from llm_manager.models import LLMApi, LLMModel, LLMUsage
|
||||
|
||||
User = get_user_model()
|
||||
|
||||
|
||||
class LLMApiModelTest(TestCase):
|
||||
"""Tests for the LLMApi model."""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(username="testuser", password="testpass123")
|
||||
self.api = LLMApi.objects.create(
|
||||
name="Test API",
|
||||
api_type="openai",
|
||||
base_url="https://api.example.com/v1",
|
||||
is_active=True,
|
||||
created_by=self.user,
|
||||
)
|
||||
|
||||
def test_str(self):
|
||||
self.assertEqual(str(self.api), "Test API (openai)")
|
||||
|
||||
def test_default_values(self):
|
||||
self.assertTrue(self.api.is_active)
|
||||
self.assertTrue(self.api.supports_streaming)
|
||||
self.assertEqual(self.api.timeout_seconds, 60)
|
||||
self.assertEqual(self.api.max_retries, 3)
|
||||
self.assertEqual(self.api.last_test_status, "pending")
|
||||
|
||||
def test_uuid_primary_key(self):
|
||||
self.assertIsNotNone(self.api.pk)
|
||||
self.assertEqual(len(str(self.api.pk)), 36) # UUID format
|
||||
|
||||
def test_unique_name(self):
|
||||
with self.assertRaises(Exception):
|
||||
LLMApi.objects.create(
|
||||
name="Test API",
|
||||
api_type="ollama",
|
||||
base_url="http://localhost:11434",
|
||||
)
|
||||
|
||||
|
||||
class LLMApiEncryptionTest(TestCase):
|
||||
"""Tests for API key encryption."""
|
||||
|
||||
def test_api_key_encrypted_at_rest(self):
|
||||
"""API key should be encrypted in the database."""
|
||||
api = LLMApi.objects.create(
|
||||
name="Encrypted Test",
|
||||
api_type="openai",
|
||||
base_url="https://api.example.com/v1",
|
||||
api_key="sk-test-secret-key-12345",
|
||||
)
|
||||
# Re-fetch from database
|
||||
api_fresh = LLMApi.objects.get(pk=api.pk)
|
||||
self.assertEqual(api_fresh.api_key, "sk-test-secret-key-12345")
|
||||
|
||||
def test_blank_api_key(self):
|
||||
api = LLMApi.objects.create(
|
||||
name="No Key",
|
||||
api_type="ollama",
|
||||
base_url="http://localhost:11434",
|
||||
api_key="",
|
||||
)
|
||||
api_fresh = LLMApi.objects.get(pk=api.pk)
|
||||
self.assertEqual(api_fresh.api_key, "")
|
||||
|
||||
|
||||
class LLMModelModelTest(TestCase):
|
||||
"""Tests for the LLMModel model."""
|
||||
|
||||
def setUp(self):
|
||||
self.api = LLMApi.objects.create(
|
||||
name="Test API",
|
||||
api_type="openai",
|
||||
base_url="https://api.example.com/v1",
|
||||
)
|
||||
self.model = LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name="gpt-4o",
|
||||
display_name="GPT-4o",
|
||||
model_type="chat",
|
||||
context_window=128000,
|
||||
max_output_tokens=16384,
|
||||
input_cost_per_1k=Decimal("0.0025"),
|
||||
output_cost_per_1k=Decimal("0.01"),
|
||||
)
|
||||
|
||||
def test_str(self):
|
||||
self.assertEqual(str(self.model), "Test API: gpt-4o")
|
||||
|
||||
def test_unique_together(self):
|
||||
"""Model name must be unique per API."""
|
||||
with self.assertRaises(Exception):
|
||||
LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name="gpt-4o",
|
||||
model_type="chat",
|
||||
context_window=8192,
|
||||
)
|
||||
|
||||
def test_model_types(self):
|
||||
"""All model types should be creatable."""
|
||||
for mtype in ["embedding", "vision", "audio", "reranker", "multimodal_embed"]:
|
||||
m = LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name=f"test-{mtype}",
|
||||
model_type=mtype,
|
||||
context_window=8192,
|
||||
)
|
||||
self.assertEqual(m.model_type, mtype)
|
||||
|
||||
def test_mnemosyne_fields(self):
|
||||
"""Mnemosyne-specific fields: supports_multimodal, vector_dimensions."""
|
||||
embed = LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name="text-embedding-3-large",
|
||||
model_type="embedding",
|
||||
context_window=8191,
|
||||
vector_dimensions=3072,
|
||||
supports_multimodal=False,
|
||||
)
|
||||
self.assertEqual(embed.vector_dimensions, 3072)
|
||||
self.assertFalse(embed.supports_multimodal)
|
||||
|
||||
def test_get_system_embedding_model(self):
|
||||
embed = LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name="embed-model",
|
||||
model_type="embedding",
|
||||
context_window=8191,
|
||||
is_system_embedding_model=True,
|
||||
)
|
||||
result = LLMModel.get_system_embedding_model()
|
||||
self.assertEqual(result.pk, embed.pk)
|
||||
|
||||
def test_get_system_chat_model(self):
|
||||
self.model.is_system_chat_model = True
|
||||
self.model.save()
|
||||
result = LLMModel.get_system_chat_model()
|
||||
self.assertEqual(result.pk, self.model.pk)
|
||||
|
||||
def test_get_system_reranker_model(self):
|
||||
reranker = LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name="reranker-model",
|
||||
model_type="reranker",
|
||||
context_window=8192,
|
||||
is_system_reranker_model=True,
|
||||
)
|
||||
result = LLMModel.get_system_reranker_model()
|
||||
self.assertEqual(result.pk, reranker.pk)
|
||||
|
||||
def test_get_system_model_returns_none(self):
|
||||
"""Returns None when no system model is configured."""
|
||||
self.assertIsNone(LLMModel.get_system_embedding_model())
|
||||
self.assertIsNone(LLMModel.get_system_chat_model())
|
||||
self.assertIsNone(LLMModel.get_system_reranker_model())
|
||||
|
||||
|
||||
class LLMUsageModelTest(TestCase):
|
||||
"""Tests for the LLMUsage model."""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(username="testuser", password="testpass123")
|
||||
self.api = LLMApi.objects.create(
|
||||
name="Test API",
|
||||
api_type="openai",
|
||||
base_url="https://api.example.com/v1",
|
||||
)
|
||||
self.model = LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name="gpt-4o",
|
||||
model_type="chat",
|
||||
context_window=128000,
|
||||
input_cost_per_1k=Decimal("0.0025"),
|
||||
output_cost_per_1k=Decimal("0.01"),
|
||||
)
|
||||
|
||||
def test_cost_calculation(self):
|
||||
"""Total cost is auto-calculated on save."""
|
||||
usage = LLMUsage.objects.create(
|
||||
user=self.user,
|
||||
model=self.model,
|
||||
input_tokens=1000,
|
||||
output_tokens=500,
|
||||
purpose="other",
|
||||
)
|
||||
# 1000/1000 * 0.0025 + 500/1000 * 0.01 = 0.0025 + 0.005 = 0.0075
|
||||
self.assertAlmostEqual(float(usage.total_cost), 0.0075, places=4)
|
||||
|
||||
def test_cost_with_cached_tokens(self):
|
||||
self.model.cached_cost_per_1k = Decimal("0.00125")
|
||||
self.model.save()
|
||||
usage = LLMUsage.objects.create(
|
||||
user=self.user,
|
||||
model=self.model,
|
||||
input_tokens=1000,
|
||||
output_tokens=500,
|
||||
cached_tokens=2000,
|
||||
purpose="responder",
|
||||
)
|
||||
# 0.0025 + 0.005 + 2000/1000 * 0.00125 = 0.0025 + 0.005 + 0.0025 = 0.01
|
||||
self.assertAlmostEqual(float(usage.total_cost), 0.01, places=4)
|
||||
|
||||
def test_purpose_choices(self):
|
||||
for purpose in ["responder", "reviewer", "embeddings", "search", "reranking", "multimodal_embed", "other"]:
|
||||
usage = LLMUsage.objects.create(
|
||||
user=self.user,
|
||||
model=self.model,
|
||||
input_tokens=100,
|
||||
output_tokens=50,
|
||||
purpose=purpose,
|
||||
)
|
||||
self.assertEqual(usage.purpose, purpose)
|
||||
|
||||
def test_protect_model_delete(self):
|
||||
"""Deleting a model with usage records should raise ProtectedError."""
|
||||
LLMUsage.objects.create(
|
||||
user=self.user,
|
||||
model=self.model,
|
||||
input_tokens=100,
|
||||
output_tokens=50,
|
||||
)
|
||||
from django.db.models import ProtectedError
|
||||
|
||||
with self.assertRaises(ProtectedError):
|
||||
self.model.delete()
|
||||
162
mnemosyne/llm_manager/tests/test_views.py
Normal file
162
mnemosyne/llm_manager/tests/test_views.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
Tests for LLM Manager views — FBV-based.
|
||||
"""
|
||||
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
from llm_manager.models import LLMApi, LLMModel
|
||||
|
||||
User = get_user_model()
|
||||
|
||||
|
||||
class LLMDashboardViewTest(TestCase):
|
||||
"""Tests for the LLM Manager dashboard."""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(username="testuser", password="testpass123")
|
||||
self.client.login(username="testuser", password="testpass123")
|
||||
|
||||
def test_dashboard_requires_login(self):
|
||||
self.client.logout()
|
||||
resp = self.client.get(reverse("llm_manager:dashboard"))
|
||||
self.assertEqual(resp.status_code, 302)
|
||||
|
||||
def test_dashboard_renders(self):
|
||||
resp = self.client.get(reverse("llm_manager:dashboard"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertContains(resp, "LLM Manager")
|
||||
|
||||
|
||||
class LLMApiViewTest(TestCase):
|
||||
"""Tests for LLMApi CRUD views."""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(username="testuser", password="testpass123")
|
||||
self.client.login(username="testuser", password="testpass123")
|
||||
self.api = LLMApi.objects.create(
|
||||
name="Test API",
|
||||
api_type="openai",
|
||||
base_url="https://api.example.com/v1",
|
||||
created_by=self.user,
|
||||
)
|
||||
|
||||
def test_api_list(self):
|
||||
resp = self.client.get(reverse("llm_manager:api_list"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertContains(resp, "Test API")
|
||||
|
||||
def test_api_detail(self):
|
||||
resp = self.client.get(reverse("llm_manager:api_detail", kwargs={"pk": self.api.pk}))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertContains(resp, "Test API")
|
||||
|
||||
def test_api_create_get(self):
|
||||
resp = self.client.get(reverse("llm_manager:api_create"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
def test_api_create_post(self):
|
||||
resp = self.client.post(
|
||||
reverse("llm_manager:api_create"),
|
||||
{
|
||||
"name": "New API",
|
||||
"api_type": "ollama",
|
||||
"base_url": "http://localhost:11434",
|
||||
"is_active": True,
|
||||
"supports_streaming": True,
|
||||
"timeout_seconds": 60,
|
||||
"max_retries": 3,
|
||||
},
|
||||
)
|
||||
self.assertEqual(resp.status_code, 302)
|
||||
self.assertTrue(LLMApi.objects.filter(name="New API").exists())
|
||||
|
||||
def test_api_edit(self):
|
||||
resp = self.client.post(
|
||||
reverse("llm_manager:api_edit", kwargs={"pk": self.api.pk}),
|
||||
{
|
||||
"name": "Updated API",
|
||||
"api_type": "openai",
|
||||
"base_url": "https://api.example.com/v2",
|
||||
"is_active": True,
|
||||
"supports_streaming": True,
|
||||
"timeout_seconds": 30,
|
||||
"max_retries": 5,
|
||||
},
|
||||
)
|
||||
self.assertEqual(resp.status_code, 302)
|
||||
self.api.refresh_from_db()
|
||||
self.assertEqual(self.api.name, "Updated API")
|
||||
|
||||
def test_api_delete(self):
|
||||
resp = self.client.post(reverse("llm_manager:api_delete", kwargs={"pk": self.api.pk}))
|
||||
self.assertEqual(resp.status_code, 302)
|
||||
self.assertFalse(LLMApi.objects.filter(pk=self.api.pk).exists())
|
||||
|
||||
|
||||
class LLMModelViewTest(TestCase):
|
||||
"""Tests for LLMModel CRUD views."""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(username="testuser", password="testpass123")
|
||||
self.client.login(username="testuser", password="testpass123")
|
||||
self.api = LLMApi.objects.create(
|
||||
name="Test API",
|
||||
api_type="openai",
|
||||
base_url="https://api.example.com/v1",
|
||||
)
|
||||
self.model = LLMModel.objects.create(
|
||||
api=self.api,
|
||||
name="gpt-4o",
|
||||
model_type="chat",
|
||||
context_window=128000,
|
||||
)
|
||||
|
||||
def test_model_list(self):
|
||||
resp = self.client.get(reverse("llm_manager:model_list"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertContains(resp, "gpt-4o")
|
||||
|
||||
def test_model_list_filter_by_api(self):
|
||||
resp = self.client.get(reverse("llm_manager:model_list") + f"?api={self.api.pk}")
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertContains(resp, "gpt-4o")
|
||||
|
||||
def test_model_detail(self):
|
||||
resp = self.client.get(reverse("llm_manager:model_detail", kwargs={"pk": self.model.pk}))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertContains(resp, "gpt-4o")
|
||||
|
||||
def test_model_create(self):
|
||||
resp = self.client.post(
|
||||
reverse("llm_manager:model_create"),
|
||||
{
|
||||
"api": str(self.api.pk),
|
||||
"name": "gpt-4o-mini",
|
||||
"model_type": "chat",
|
||||
"context_window": 128000,
|
||||
"input_cost_per_1k": "0.000150",
|
||||
"output_cost_per_1k": "0.000600",
|
||||
"is_active": True,
|
||||
},
|
||||
)
|
||||
self.assertEqual(resp.status_code, 302)
|
||||
self.assertTrue(LLMModel.objects.filter(name="gpt-4o-mini").exists())
|
||||
|
||||
def test_model_delete(self):
|
||||
resp = self.client.post(reverse("llm_manager:model_delete", kwargs={"pk": self.model.pk}))
|
||||
self.assertEqual(resp.status_code, 302)
|
||||
self.assertFalse(LLMModel.objects.filter(pk=self.model.pk).exists())
|
||||
|
||||
|
||||
class UsageListViewTest(TestCase):
|
||||
"""Tests for the usage list view."""
|
||||
|
||||
def setUp(self):
|
||||
self.user = User.objects.create_user(username="testuser", password="testpass123")
|
||||
self.client.login(username="testuser", password="testpass123")
|
||||
|
||||
def test_usage_list(self):
|
||||
resp = self.client.get(reverse("llm_manager:usage_list"))
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
28
mnemosyne/llm_manager/urls.py
Normal file
28
mnemosyne/llm_manager/urls.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""
|
||||
URL patterns for LLM Manager — FBVs per Red Panda Standards.
|
||||
"""
|
||||
|
||||
from django.urls import path
|
||||
|
||||
from . import views
|
||||
|
||||
app_name = "llm_manager"
|
||||
|
||||
urlpatterns = [
|
||||
path("", views.dashboard, name="dashboard"),
|
||||
# APIs
|
||||
path("apis/", views.api_list, name="api_list"),
|
||||
path("apis/create/", views.api_create, name="api_create"),
|
||||
path("apis/<uuid:pk>/", views.api_detail, name="api_detail"),
|
||||
path("apis/<uuid:pk>/edit/", views.api_edit, name="api_edit"),
|
||||
path("apis/<uuid:pk>/delete/", views.api_delete, name="api_delete"),
|
||||
path("apis/<uuid:pk>/test/", views.api_test, name="api_test"),
|
||||
# Models
|
||||
path("models/", views.model_list, name="model_list"),
|
||||
path("models/create/", views.model_create, name="model_create"),
|
||||
path("models/<uuid:pk>/", views.model_detail, name="model_detail"),
|
||||
path("models/<uuid:pk>/edit/", views.model_edit, name="model_edit"),
|
||||
path("models/<uuid:pk>/delete/", views.model_delete, name="model_delete"),
|
||||
# Usage
|
||||
path("usage/", views.usage_list, name="usage_list"),
|
||||
]
|
||||
209
mnemosyne/llm_manager/views.py
Normal file
209
mnemosyne/llm_manager/views.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
Views for LLM Manager — FBVs per Red Panda Standards.
|
||||
|
||||
Rewrites Spelunker's CBVs (LLMApiListView, LLMModelListView, LLMUsageListView)
|
||||
as function-based views.
|
||||
"""
|
||||
|
||||
from django.contrib import messages
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.db.models import Count, Sum
|
||||
from django.shortcuts import get_object_or_404, redirect, render
|
||||
|
||||
from .forms import LLMApiForm, LLMModelForm
|
||||
from .models import LLMApi, LLMModel, LLMUsage
|
||||
from .services import test_llm_api
|
||||
|
||||
|
||||
@login_required
|
||||
def dashboard(request):
|
||||
"""LLM Manager dashboard with overview statistics."""
|
||||
totals = LLMUsage.objects.filter(user=request.user).aggregate(
|
||||
total_input=Sum("input_tokens"),
|
||||
total_output=Sum("output_tokens"),
|
||||
total_cost=Sum("total_cost"),
|
||||
count=Count("id"),
|
||||
)
|
||||
context = {
|
||||
"api_count": LLMApi.objects.filter(is_active=True).count(),
|
||||
"model_count": LLMModel.objects.filter(is_active=True).count(),
|
||||
"usage_count": totals["count"] or 0,
|
||||
"total_input_tokens": totals["total_input"] or 0,
|
||||
"total_output_tokens": totals["total_output"] or 0,
|
||||
"total_cost": totals["total_cost"] or 0,
|
||||
"recent_usage": (
|
||||
LLMUsage.objects.filter(user=request.user)
|
||||
.select_related("model", "model__api")
|
||||
.order_by("-timestamp")[:10]
|
||||
),
|
||||
"active_apis": LLMApi.objects.filter(is_active=True),
|
||||
"system_embedding_model": LLMModel.get_system_embedding_model(),
|
||||
"system_chat_model": LLMModel.get_system_chat_model(),
|
||||
"system_reranker_model": LLMModel.get_system_reranker_model(),
|
||||
}
|
||||
return render(request, "llm_manager/dashboard.html", context)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# LLMApi CRUD
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
@login_required
|
||||
def api_list(request):
|
||||
"""List all LLM APIs."""
|
||||
apis = LLMApi.objects.all().order_by("name")
|
||||
return render(request, "llm_manager/api_list.html", {"apis": apis})
|
||||
|
||||
|
||||
@login_required
|
||||
def api_create(request):
|
||||
"""Create a new LLM API."""
|
||||
if request.method == "POST":
|
||||
form = LLMApiForm(request.POST)
|
||||
if form.is_valid():
|
||||
api = form.save(commit=False)
|
||||
api.created_by = request.user
|
||||
api.save()
|
||||
messages.success(request, f"API '{api.name}' created.")
|
||||
return redirect("llm_manager:api_detail", pk=api.pk)
|
||||
else:
|
||||
form = LLMApiForm()
|
||||
return render(request, "llm_manager/api_form.html", {"form": form, "is_edit": False})
|
||||
|
||||
|
||||
@login_required
|
||||
def api_detail(request, pk):
|
||||
"""View LLM API details with associated models."""
|
||||
api = get_object_or_404(LLMApi, pk=pk)
|
||||
models = api.models.all().order_by("name")
|
||||
return render(request, "llm_manager/api_detail.html", {"api": api, "models": models})
|
||||
|
||||
|
||||
@login_required
|
||||
def api_edit(request, pk):
|
||||
"""Edit an LLM API."""
|
||||
api = get_object_or_404(LLMApi, pk=pk)
|
||||
if request.method == "POST":
|
||||
form = LLMApiForm(request.POST, instance=api)
|
||||
if form.is_valid():
|
||||
form.save()
|
||||
messages.success(request, f"API '{api.name}' updated.")
|
||||
return redirect("llm_manager:api_detail", pk=api.pk)
|
||||
else:
|
||||
form = LLMApiForm(instance=api)
|
||||
return render(request, "llm_manager/api_form.html", {"form": form, "api": api, "is_edit": True})
|
||||
|
||||
|
||||
@login_required
|
||||
def api_delete(request, pk):
|
||||
"""Delete an LLM API."""
|
||||
api = get_object_or_404(LLMApi, pk=pk)
|
||||
if request.method == "POST":
|
||||
name = api.name
|
||||
api.delete()
|
||||
messages.success(request, f"API '{name}' deleted.")
|
||||
return redirect("llm_manager:api_list")
|
||||
return render(request, "llm_manager/api_confirm_delete.html", {"api": api})
|
||||
|
||||
|
||||
@login_required
|
||||
def api_test(request, pk):
|
||||
"""Test an LLM API connection and discover models."""
|
||||
api = get_object_or_404(LLMApi, pk=pk)
|
||||
if request.method == "POST":
|
||||
result = test_llm_api(api)
|
||||
if result["success"]:
|
||||
messages.success(request, f"API test successful: {result['message']}")
|
||||
else:
|
||||
messages.error(request, f"API test failed: {result['error']}")
|
||||
return redirect("llm_manager:api_detail", pk=api.pk)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# LLMModel views
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
@login_required
|
||||
def model_list(request):
|
||||
"""List all LLM Models, optionally filtered by API."""
|
||||
qs = LLMModel.objects.select_related("api").order_by("api__name", "name")
|
||||
api_id = request.GET.get("api")
|
||||
if api_id:
|
||||
qs = qs.filter(api_id=api_id)
|
||||
apis = LLMApi.objects.all().order_by("name")
|
||||
return render(request, "llm_manager/model_list.html", {"models": qs, "apis": apis, "selected_api": api_id})
|
||||
|
||||
|
||||
@login_required
|
||||
def model_create(request):
|
||||
"""Create a new LLM Model."""
|
||||
if request.method == "POST":
|
||||
form = LLMModelForm(request.POST)
|
||||
if form.is_valid():
|
||||
model = form.save()
|
||||
messages.success(request, f"Model '{model.name}' created.")
|
||||
return redirect("llm_manager:model_list")
|
||||
else:
|
||||
form = LLMModelForm()
|
||||
api_id = request.GET.get("api")
|
||||
if api_id:
|
||||
form.initial["api"] = api_id
|
||||
return render(request, "llm_manager/model_form.html", {"form": form, "is_edit": False})
|
||||
|
||||
|
||||
@login_required
|
||||
def model_detail(request, pk):
|
||||
"""View LLM Model details."""
|
||||
model = get_object_or_404(LLMModel.objects.select_related("api"), pk=pk)
|
||||
return render(request, "llm_manager/model_detail.html", {"model": model})
|
||||
|
||||
|
||||
@login_required
|
||||
def model_edit(request, pk):
|
||||
"""Edit an LLM Model."""
|
||||
model = get_object_or_404(LLMModel, pk=pk)
|
||||
if request.method == "POST":
|
||||
form = LLMModelForm(request.POST, instance=model)
|
||||
if form.is_valid():
|
||||
form.save()
|
||||
messages.success(request, f"Model '{model.name}' updated.")
|
||||
return redirect("llm_manager:model_detail", pk=model.pk)
|
||||
else:
|
||||
form = LLMModelForm(instance=model)
|
||||
return render(request, "llm_manager/model_form.html", {"form": form, "model": model, "is_edit": True})
|
||||
|
||||
|
||||
@login_required
|
||||
def model_delete(request, pk):
|
||||
"""Delete an LLM Model."""
|
||||
model = get_object_or_404(LLMModel, pk=pk)
|
||||
if request.method == "POST":
|
||||
name = model.name
|
||||
model.delete()
|
||||
messages.success(request, f"Model '{name}' deleted.")
|
||||
return redirect("llm_manager:model_list")
|
||||
return render(request, "llm_manager/model_confirm_delete.html", {"model": model})
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Usage views
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
@login_required
|
||||
def usage_list(request):
|
||||
"""List LLM usage history for the current user."""
|
||||
qs = (
|
||||
LLMUsage.objects.filter(user=request.user)
|
||||
.select_related("model", "model__api")
|
||||
.order_by("-timestamp")
|
||||
)
|
||||
totals = qs.aggregate(
|
||||
total_input=Sum("input_tokens"),
|
||||
total_output=Sum("output_tokens"),
|
||||
total_cached=Sum("cached_tokens"),
|
||||
total_cost=Sum("total_cost"),
|
||||
)
|
||||
return render(request, "llm_manager/usage_list.html", {"usage_records": qs[:100], "totals": totals})
|
||||
23
mnemosyne/manage.py
Executable file
23
mnemosyne/manage.py
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env python
|
||||
"""Django's command-line utility for administrative tasks."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
"""Run administrative tasks."""
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "mnemosyne.settings")
|
||||
try:
|
||||
from django.core.management import execute_from_command_line
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"Couldn't import Django. Are you sure it's installed and "
|
||||
"available on your PYTHONPATH environment variable? Did you "
|
||||
"forget to activate a virtual environment?"
|
||||
) from exc
|
||||
execute_from_command_line(sys.argv)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
5
mnemosyne/mnemosyne/__init__.py
Normal file
5
mnemosyne/mnemosyne/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
# This will make sure the Celery app is always imported when
|
||||
# Django starts so that shared_task will use this app.
|
||||
from .celery import app as celery_app
|
||||
|
||||
__all__ = ("celery_app",)
|
||||
16
mnemosyne/mnemosyne/asgi.py
Normal file
16
mnemosyne/mnemosyne/asgi.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
ASGI config for mnemosyne project.
|
||||
|
||||
It exposes the ASGI callable as a module-level variable named ``application``.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/5.2/howto/deployment/asgi/
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django.core.asgi import get_asgi_application
|
||||
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "mnemosyne.settings")
|
||||
|
||||
application = get_asgi_application()
|
||||
12
mnemosyne/mnemosyne/celery.py
Normal file
12
mnemosyne/mnemosyne/celery.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import os
|
||||
from celery import Celery
|
||||
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "mnemosyne.settings")
|
||||
|
||||
app = Celery("mnemosyne")
|
||||
|
||||
# Load configuration from Django settings using 'CELERY_' namespace
|
||||
app.config_from_object("django.conf:settings", namespace="CELERY")
|
||||
|
||||
# Auto-discover tasks from all registered Django app configs
|
||||
app.autodiscover_tasks()
|
||||
317
mnemosyne/mnemosyne/settings.py
Normal file
317
mnemosyne/mnemosyne/settings.py
Normal file
@@ -0,0 +1,317 @@
|
||||
"""
|
||||
Django settings for Mnemosyne project.
|
||||
|
||||
Environment-variable driven settings for dual-database (PostgreSQL + Neo4j).
|
||||
Uses django-environ to load from .env file.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import environ
|
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
# Initialize environment variables
|
||||
env = environ.Env(
|
||||
DEBUG=(bool, True),
|
||||
)
|
||||
|
||||
# Read .env file if it exists
|
||||
environ.Env.read_env(BASE_DIR / ".env")
|
||||
|
||||
# --- Security ---
|
||||
SECRET_KEY = env("SECRET_KEY", default="django-insecure-change-me-in-production")
|
||||
DEBUG = env.bool("DEBUG", default=True)
|
||||
ALLOWED_HOSTS = env.list("ALLOWED_HOSTS", default=["localhost", "127.0.0.1"])
|
||||
CSRF_TRUSTED_ORIGINS = env.list(
|
||||
"CSRF_TRUSTED_ORIGINS", default=["http://localhost:8000"]
|
||||
)
|
||||
|
||||
# Proxy settings — trust X-Forwarded-Proto from HAProxy
|
||||
SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
|
||||
USE_X_FORWARDED_HOST = True
|
||||
USE_X_FORWARDED_PORT = True
|
||||
|
||||
# --- LLM API Encryption ---
|
||||
LLM_API_SECRETS_ENCRYPTION_KEY = env(
|
||||
"LLM_API_SECRETS_ENCRYPTION_KEY", default=""
|
||||
)
|
||||
|
||||
# --- Application definition ---
|
||||
INSTALLED_APPS = [
|
||||
"django.contrib.admin",
|
||||
"django.contrib.auth",
|
||||
"django.contrib.contenttypes",
|
||||
"django.contrib.sessions",
|
||||
"django.contrib.messages",
|
||||
"django.contrib.staticfiles",
|
||||
"django.contrib.humanize",
|
||||
# Third-party
|
||||
"rest_framework",
|
||||
"storages",
|
||||
"django_neomodel",
|
||||
"django_prometheus",
|
||||
# Mnemosyne apps
|
||||
"themis",
|
||||
"library",
|
||||
"llm_manager",
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
"django_prometheus.middleware.PrometheusBeforeMiddleware",
|
||||
"django.middleware.security.SecurityMiddleware",
|
||||
"django.contrib.sessions.middleware.SessionMiddleware",
|
||||
"django.middleware.common.CommonMiddleware",
|
||||
"django.middleware.csrf.CsrfViewMiddleware",
|
||||
"django.contrib.auth.middleware.AuthenticationMiddleware",
|
||||
"django.contrib.messages.middleware.MessageMiddleware",
|
||||
"django.middleware.clickjacking.XFrameOptionsMiddleware",
|
||||
"themis.middleware.TimezoneMiddleware",
|
||||
"themis.middleware.ThemeMiddleware",
|
||||
"django_prometheus.middleware.PrometheusAfterMiddleware",
|
||||
]
|
||||
|
||||
ROOT_URLCONF = "mnemosyne.urls"
|
||||
|
||||
TEMPLATES = [
|
||||
{
|
||||
"BACKEND": "django.template.backends.django.DjangoTemplates",
|
||||
"DIRS": [BASE_DIR / "mnemosyne" / "templates"],
|
||||
"APP_DIRS": True,
|
||||
"OPTIONS": {
|
||||
"context_processors": [
|
||||
"django.template.context_processors.debug",
|
||||
"django.template.context_processors.request",
|
||||
"django.contrib.auth.context_processors.auth",
|
||||
"django.contrib.messages.context_processors.messages",
|
||||
"themis.context_processors.themis_settings",
|
||||
"themis.context_processors.user_preferences",
|
||||
"themis.context_processors.notifications",
|
||||
],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
WSGI_APPLICATION = "mnemosyne.wsgi.application"
|
||||
|
||||
# --- Database: PostgreSQL (Django ORM) ---
|
||||
DATABASES = {
|
||||
"default": env.db(
|
||||
"DATABASE_URL",
|
||||
default="postgres://mnemosyne:password@portia.incus:5432/mnemosyne",
|
||||
),
|
||||
}
|
||||
|
||||
# --- Neo4j Graph Database ---
|
||||
NEOMODEL_NEO4J_BOLT_URL = env(
|
||||
"NEOMODEL_NEO4J_BOLT_URL",
|
||||
default="bolt://neo4j:password@ariel.incus:25554",
|
||||
)
|
||||
|
||||
# --- Password validation ---
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{
|
||||
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
|
||||
},
|
||||
{
|
||||
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
|
||||
},
|
||||
{
|
||||
"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
|
||||
},
|
||||
{
|
||||
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
|
||||
},
|
||||
]
|
||||
|
||||
# --- Internationalization ---
|
||||
LANGUAGE_CODE = env("LANGUAGE_CODE", default="en-us")
|
||||
TIME_ZONE = env("TIME_ZONE", default="UTC")
|
||||
USE_I18N = True
|
||||
USE_TZ = True
|
||||
|
||||
# --- Static files ---
|
||||
STATIC_URL = "static/"
|
||||
STATIC_ROOT = BASE_DIR / "staticfiles"
|
||||
|
||||
MEDIA_ROOT = BASE_DIR / "media"
|
||||
MEDIA_URL = "/media/"
|
||||
|
||||
# --- Storage (S3 or local) ---
|
||||
USE_LOCAL_STORAGE = env.bool("USE_LOCAL_STORAGE", default=True)
|
||||
|
||||
if USE_LOCAL_STORAGE:
|
||||
STORAGES = {
|
||||
"default": {
|
||||
"BACKEND": "django.core.files.storage.FileSystemStorage",
|
||||
},
|
||||
"staticfiles": {
|
||||
"BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage",
|
||||
},
|
||||
}
|
||||
else:
|
||||
STORAGES = {
|
||||
"default": {
|
||||
"BACKEND": "storages.backends.s3boto3.S3Boto3Storage",
|
||||
"OPTIONS": {
|
||||
"access_key": env("AWS_ACCESS_KEY_ID", default=""),
|
||||
"secret_key": env("AWS_SECRET_ACCESS_KEY", default=""),
|
||||
"bucket_name": env("AWS_STORAGE_BUCKET_NAME", default="mnemosyne-content"),
|
||||
"endpoint_url": env("AWS_S3_ENDPOINT_URL", default=""),
|
||||
"use_ssl": env.bool("AWS_S3_USE_SSL", default=False),
|
||||
"default_acl": "private",
|
||||
"region_name": env("AWS_S3_REGION_NAME", default="us-east-1"),
|
||||
"file_overwrite": False,
|
||||
"querystring_auth": True,
|
||||
"verify": env.bool("AWS_S3_VERIFY", default=True),
|
||||
},
|
||||
},
|
||||
"staticfiles": {
|
||||
"BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage",
|
||||
},
|
||||
}
|
||||
|
||||
# --- Celery / RabbitMQ ---
|
||||
CELERY_BROKER_URL = env(
|
||||
"CELERY_BROKER_URL",
|
||||
default="amqp://mnemosyne:password@oberon.incus:5672/mnemosyne",
|
||||
)
|
||||
CELERY_RESULT_BACKEND = env("CELERY_RESULT_BACKEND", default="rpc://")
|
||||
CELERY_ACCEPT_CONTENT = ["json"]
|
||||
CELERY_TASK_SERIALIZER = "json"
|
||||
CELERY_RESULT_SERIALIZER = "json"
|
||||
CELERY_TIMEZONE = TIME_ZONE
|
||||
CELERY_TASK_ALWAYS_EAGER = env.bool("CELERY_TASK_ALWAYS_EAGER", default=False)
|
||||
CELERY_TASK_ACKS_LATE = True
|
||||
CELERY_WORKER_PREFETCH_MULTIPLIER = 1
|
||||
CELERY_TASK_ROUTES = {
|
||||
"library.tasks.embed_*": {"queue": "embedding"},
|
||||
"library.tasks.batch_*": {"queue": "batch"},
|
||||
}
|
||||
|
||||
# --- Memcached ---
|
||||
KVDB_LOCATION = env("KVDB_LOCATION", default="127.0.0.1:11211")
|
||||
KVDB_PREFIX = env("KVDB_PREFIX", default="mnemosyne")
|
||||
|
||||
CACHES = {
|
||||
"default": {
|
||||
"BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache",
|
||||
"LOCATION": KVDB_LOCATION,
|
||||
"KEY_PREFIX": KVDB_PREFIX,
|
||||
"TIMEOUT": 300,
|
||||
}
|
||||
}
|
||||
|
||||
# --- Email (smtp4dev on Oberon) ---
|
||||
EMAIL_HOST = env("EMAIL_HOST", default="oberon.incus")
|
||||
EMAIL_PORT = env.int("EMAIL_PORT", default=22025)
|
||||
EMAIL_USE_TLS = env.bool("EMAIL_USE_TLS", default=False)
|
||||
EMAIL_BACKEND = "django.core.mail.backends.smtp.EmailBackend"
|
||||
|
||||
# --- Django REST Framework ---
|
||||
REST_FRAMEWORK = {
|
||||
"DEFAULT_AUTHENTICATION_CLASSES": [
|
||||
"rest_framework.authentication.SessionAuthentication",
|
||||
"rest_framework.authentication.BasicAuthentication",
|
||||
],
|
||||
"DEFAULT_PERMISSION_CLASSES": [
|
||||
"rest_framework.permissions.IsAuthenticated",
|
||||
],
|
||||
"DEFAULT_PARSER_CLASSES": [
|
||||
"rest_framework.parsers.JSONParser",
|
||||
"rest_framework.parsers.FormParser",
|
||||
"rest_framework.parsers.MultiPartParser",
|
||||
],
|
||||
"DEFAULT_RENDERER_CLASSES": [
|
||||
"rest_framework.renderers.JSONRenderer",
|
||||
"rest_framework.renderers.BrowsableAPIRenderer",
|
||||
],
|
||||
}
|
||||
|
||||
# --- Default primary key field type ---
|
||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||
|
||||
# --- Login URLs ---
|
||||
LOGIN_URL = "/login/"
|
||||
LOGIN_REDIRECT_URL = "/"
|
||||
LOGOUT_REDIRECT_URL = "/"
|
||||
|
||||
# --- Embedding Pipeline (Phase 2) ---
|
||||
EMBEDDING_BATCH_SIZE = env.int("EMBEDDING_BATCH_SIZE", default=8)
|
||||
EMBEDDING_TIMEOUT = env.int("EMBEDDING_TIMEOUT", default=120)
|
||||
|
||||
# --- Themis app settings ---
|
||||
THEMIS_APP_NAME = "Mnemosyne"
|
||||
THEMIS_NOTIFICATION_POLL_INTERVAL = 60
|
||||
THEMIS_NOTIFICATION_MAX_AGE_DAYS = 90
|
||||
|
||||
# --- Structured Logging ---
|
||||
LOGGING_LEVEL = env("LOGGING_LEVEL", default="INFO")
|
||||
CELERY_LOGGING_LEVEL = env("CELERY_LOGGING_LEVEL", default="INFO")
|
||||
DJANGO_LOGGING_LEVEL = env("DJANGO_LOGGING_LEVEL", default="WARNING")
|
||||
|
||||
LOGGING = {
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"formatters": {
|
||||
"structured": {
|
||||
"format": (
|
||||
"[%(levelname)s] %(asctime)s "
|
||||
"service=mnemosyne "
|
||||
"module=%(name)s "
|
||||
"func=%(funcName)s "
|
||||
"line=%(lineno)d "
|
||||
"%(message)s"
|
||||
),
|
||||
"datefmt": "%Y-%m-%d %H:%M:%S",
|
||||
},
|
||||
"simple": {
|
||||
"format": "[%(levelname)s] %(name)s: %(message)s",
|
||||
},
|
||||
},
|
||||
"handlers": {
|
||||
"console": {
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "structured",
|
||||
"stream": "ext://sys.stdout",
|
||||
},
|
||||
},
|
||||
"loggers": {
|
||||
"library": {
|
||||
"handlers": ["console"],
|
||||
"level": LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
"llm_manager": {
|
||||
"handlers": ["console"],
|
||||
"level": LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
"themis": {
|
||||
"handlers": ["console"],
|
||||
"level": LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
"celery": {
|
||||
"handlers": ["console"],
|
||||
"level": CELERY_LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
"django": {
|
||||
"handlers": ["console"],
|
||||
"level": DJANGO_LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
"django.request": {
|
||||
"handlers": ["console"],
|
||||
"level": DJANGO_LOGGING_LEVEL,
|
||||
"propagate": False,
|
||||
},
|
||||
},
|
||||
"root": {
|
||||
"handlers": ["console"],
|
||||
"level": LOGGING_LEVEL,
|
||||
},
|
||||
}
|
||||
91
mnemosyne/mnemosyne/templates/mnemosyne/dashboard.html
Normal file
91
mnemosyne/mnemosyne/templates/mnemosyne/dashboard.html
Normal file
@@ -0,0 +1,91 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block nav_items_desktop %}
|
||||
<li><a href="{% url 'dashboard' %}" class="font-semibold">Dashboard</a></li>
|
||||
<li><a href="{% url 'library:library-list' %}">Libraries</a></li>
|
||||
<li><a href="{% url 'llm_manager:dashboard' %}">LLM Manager</a></li>
|
||||
<li><a href="{% url 'themis:key-list' %}">API Keys</a></li>
|
||||
{% endblock %}
|
||||
|
||||
{% block nav_items %}
|
||||
<li><a href="{% url 'dashboard' %}">Dashboard</a></li>
|
||||
<li><a href="{% url 'library:library-list' %}">Libraries</a></li>
|
||||
<li><a href="{% url 'llm_manager:dashboard' %}">LLM Manager</a></li>
|
||||
<li><a href="{% url 'themis:key-list' %}">API Keys</a></li>
|
||||
{% endblock %}
|
||||
|
||||
{% block title %}Dashboard — Mnemosyne{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="mb-6">
|
||||
<h1 class="text-3xl font-bold">Dashboard</h1>
|
||||
<p class="text-base-content/70">Welcome back, {{ user.username }}.</p>
|
||||
</div>
|
||||
|
||||
<!-- Overview stats -->
|
||||
<div class="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4 mb-8">
|
||||
<a href="{% url 'library:library-list' %}" class="stat bg-base-200 rounded-box shadow hover:bg-base-300 transition">
|
||||
<div class="stat-title">Libraries</div>
|
||||
<div class="stat-value text-primary">{% if library_count is not None %}{{ library_count }}{% else %}—{% endif %}</div>
|
||||
<div class="stat-desc">{% if library_count is None %}Neo4j offline{% else %}Content collections{% endif %}</div>
|
||||
</a>
|
||||
<a href="{% url 'llm_manager:api_list' %}" class="stat bg-base-200 rounded-box shadow hover:bg-base-300 transition">
|
||||
<div class="stat-title">Active APIs</div>
|
||||
<div class="stat-value text-secondary">{{ api_count }}</div>
|
||||
<div class="stat-desc">LLM providers</div>
|
||||
</a>
|
||||
<a href="{% url 'llm_manager:model_list' %}" class="stat bg-base-200 rounded-box shadow hover:bg-base-300 transition">
|
||||
<div class="stat-title">LLM Models</div>
|
||||
<div class="stat-value text-accent">{{ model_count }}</div>
|
||||
<div class="stat-desc">Available models</div>
|
||||
</a>
|
||||
<a href="{% url 'themis:key-list' %}" class="stat bg-base-200 rounded-box shadow hover:bg-base-300 transition">
|
||||
<div class="stat-title">Your Keys</div>
|
||||
<div class="stat-value">🔑</div>
|
||||
<div class="stat-desc">Manage API keys</div>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<!-- System models -->
|
||||
<div class="mb-8">
|
||||
<h2 class="text-xl font-semibold mb-3">System Models</h2>
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body py-4">
|
||||
<h3 class="text-sm font-semibold text-base-content/60">Embedding Model</h3>
|
||||
{% if system_embedding %}
|
||||
<p class="font-mono">{{ system_embedding.name }}</p>
|
||||
<p class="text-xs text-base-content/50">{{ system_embedding.api.name }}{% if system_embedding.vector_dimensions %} · {{ system_embedding.vector_dimensions }}d{% endif %}</p>
|
||||
{% else %}
|
||||
<p class="text-warning text-sm">Not configured — set via Django admin</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body py-4">
|
||||
<h3 class="text-sm font-semibold text-base-content/60">Chat Model</h3>
|
||||
{% if system_chat %}
|
||||
<p class="font-mono">{{ system_chat.name }}</p>
|
||||
<p class="text-xs text-base-content/50">{{ system_chat.api.name }}</p>
|
||||
{% else %}
|
||||
<p class="text-warning text-sm">Not configured — set via Django admin</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Quick actions -->
|
||||
<div>
|
||||
<h2 class="text-xl font-semibold mb-3">Quick Actions</h2>
|
||||
<div class="flex flex-wrap gap-3">
|
||||
<a href="{% url 'library:library-create' %}" class="btn btn-primary btn-sm">New Library</a>
|
||||
<a href="{% url 'llm_manager:api_create' %}" class="btn btn-secondary btn-sm">Add LLM API</a>
|
||||
<a href="{% url 'themis:key-create' %}" class="btn btn-accent btn-sm">Add API Key</a>
|
||||
<a href="{% url 'themis:profile-settings' %}" class="btn btn-ghost btn-sm">Profile Settings</a>
|
||||
{% if user.is_staff %}
|
||||
<a href="/admin/" class="btn btn-ghost btn-sm">Django Admin</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
59
mnemosyne/mnemosyne/templates/mnemosyne/landing.html
Normal file
59
mnemosyne/mnemosyne/templates/mnemosyne/landing.html
Normal file
@@ -0,0 +1,59 @@
|
||||
{% load static %}
|
||||
<!DOCTYPE html>
|
||||
<html lang="en" data-theme="dim">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Mnemosyne — Knowledge Library</title>
|
||||
<link href="https://cdn.jsdelivr.net/npm/daisyui@4/dist/full.min.css" rel="stylesheet">
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
</head>
|
||||
<body class="min-h-screen flex flex-col">
|
||||
|
||||
<!-- Hero -->
|
||||
<div class="hero min-h-screen bg-base-200">
|
||||
<div class="hero-content text-center">
|
||||
<div class="max-w-lg">
|
||||
<h1 class="text-5xl font-bold">Mnemosyne</h1>
|
||||
<p class="py-6 text-lg text-base-content/80">
|
||||
Your personal knowledge library — organise, embed, and retrieve
|
||||
content across books, music, film, art, and journals.
|
||||
</p>
|
||||
<a href="{% url 'login' %}" class="btn btn-primary btn-lg">Log In</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Features -->
|
||||
<div class="bg-base-100 py-16 px-4">
|
||||
<div class="container mx-auto">
|
||||
<h2 class="text-3xl font-bold text-center mb-10">Features</h2>
|
||||
<div class="grid grid-cols-1 md:grid-cols-3 gap-8 max-w-4xl mx-auto">
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body items-center text-center">
|
||||
<h3 class="card-title">Content Library</h3>
|
||||
<p>Organise fiction, technical references, music, film, art, and journals in a graph-powered knowledge base.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body items-center text-center">
|
||||
<h3 class="card-title">LLM Manager</h3>
|
||||
<p>Manage LLM APIs, track token usage and costs, and configure system-wide embedding and chat models.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card bg-base-200 shadow">
|
||||
<div class="card-body items-center text-center">
|
||||
<h3 class="card-title">Secure Keys</h3>
|
||||
<p>Store and manage your personal API keys with Fernet encryption. Integrated with Themis identity.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<footer class="footer footer-center bg-base-200 text-base-content p-4">
|
||||
<span>© {% now "Y" %} Ouranos — Mnemosyne</span>
|
||||
</footer>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
15
mnemosyne/mnemosyne/templates/registration/logged_out.html
Normal file
15
mnemosyne/mnemosyne/templates/registration/logged_out.html
Normal file
@@ -0,0 +1,15 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Logged Out — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-center items-center min-h-[60vh]">
|
||||
<div class="card bg-base-200 shadow-xl w-full max-w-md">
|
||||
<div class="card-body items-center text-center">
|
||||
<h2 class="card-title text-2xl mb-4">You have been logged out</h2>
|
||||
<p class="mb-6 text-base-content/70">Thanks for using {{ themis_app_name }}.</p>
|
||||
<a href="{% url 'login' %}" class="btn btn-primary">Log In Again</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
60
mnemosyne/mnemosyne/templates/registration/login.html
Normal file
60
mnemosyne/mnemosyne/templates/registration/login.html
Normal file
@@ -0,0 +1,60 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Log In — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-center items-center min-h-[60vh]">
|
||||
<div class="card bg-base-200 shadow-xl w-full max-w-md">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title text-2xl justify-center mb-4">Log In</h2>
|
||||
|
||||
{% if form.errors %}
|
||||
<div class="alert alert-error mb-4">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 shrink-0" fill="none"
|
||||
viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M12 9v2m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<span>Invalid username or password. Please try again.</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<form method="post" action="{% url 'login' %}">
|
||||
{% csrf_token %}
|
||||
|
||||
<div class="form-control mb-4">
|
||||
<label class="label" for="id_username">
|
||||
<span class="label-text">Username</span>
|
||||
</label>
|
||||
<input type="text" name="username" id="id_username"
|
||||
class="input input-bordered w-full{% if form.errors %} input-error{% endif %}"
|
||||
value="{{ form.username.value|default:'' }}"
|
||||
autofocus required maxlength="150">
|
||||
</div>
|
||||
|
||||
<div class="form-control mb-6">
|
||||
<label class="label" for="id_password">
|
||||
<span class="label-text">Password</span>
|
||||
</label>
|
||||
<input type="password" name="password" id="id_password"
|
||||
class="input input-bordered w-full{% if form.errors %} input-error{% endif %}"
|
||||
required>
|
||||
<label class="label">
|
||||
<a href="{% url 'password_reset' %}" class="label-text-alt link link-hover">
|
||||
Forgot password?
|
||||
</a>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
{% if next %}
|
||||
<input type="hidden" name="next" value="{{ next }}">
|
||||
{% endif %}
|
||||
|
||||
<div class="form-control mt-2">
|
||||
<button type="submit" class="btn btn-primary w-full">Log In</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,20 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Password Changed — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-center items-center min-h-[60vh]">
|
||||
<div class="card bg-base-200 shadow-xl w-full max-w-md">
|
||||
<div class="card-body items-center text-center">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="h-16 w-16 text-success mb-4" fill="none"
|
||||
viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<h2 class="card-title text-2xl mb-2">Password Changed</h2>
|
||||
<p class="mb-6 text-base-content/70">Your password has been updated successfully.</p>
|
||||
<a href="{% url 'dashboard' %}" class="btn btn-primary">Back to Dashboard</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,70 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Change Password — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-center items-center min-h-[60vh]">
|
||||
<div class="card bg-base-200 shadow-xl w-full max-w-md">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title text-2xl justify-center mb-4">Change Password</h2>
|
||||
|
||||
{% if form.errors %}
|
||||
<div class="alert alert-error mb-4">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 shrink-0" fill="none"
|
||||
viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M12 9v2m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<div>
|
||||
{% for field in form %}
|
||||
{% for error in field.errors %}
|
||||
<p>{{ error }}</p>
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
{% for error in form.non_field_errors %}
|
||||
<p>{{ error }}</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
|
||||
<div class="form-control mb-4">
|
||||
<label class="label" for="id_old_password">
|
||||
<span class="label-text">Current password</span>
|
||||
</label>
|
||||
<input type="password" name="old_password" id="id_old_password"
|
||||
class="input input-bordered w-full" autofocus required>
|
||||
</div>
|
||||
|
||||
<div class="form-control mb-4">
|
||||
<label class="label" for="id_new_password1">
|
||||
<span class="label-text">New password</span>
|
||||
</label>
|
||||
<input type="password" name="new_password1" id="id_new_password1"
|
||||
class="input input-bordered w-full" required>
|
||||
{% if form.new_password1.help_text %}
|
||||
<label class="label">
|
||||
<span class="label-text-alt text-base-content/60">{{ form.new_password1.help_text }}</span>
|
||||
</label>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="form-control mb-6">
|
||||
<label class="label" for="id_new_password2">
|
||||
<span class="label-text">Confirm new password</span>
|
||||
</label>
|
||||
<input type="password" name="new_password2" id="id_new_password2"
|
||||
class="input input-bordered w-full" required>
|
||||
</div>
|
||||
|
||||
<div class="form-control mt-2">
|
||||
<button type="submit" class="btn btn-primary w-full">Change Password</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,22 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Password Reset Complete — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-center items-center min-h-[60vh]">
|
||||
<div class="card bg-base-200 shadow-xl w-full max-w-md">
|
||||
<div class="card-body items-center text-center">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="h-16 w-16 text-success mb-4" fill="none"
|
||||
viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<h2 class="card-title text-2xl mb-2">Password Reset Complete</h2>
|
||||
<p class="text-base-content/70 mb-6">
|
||||
Your password has been set. You can now log in with your new password.
|
||||
</p>
|
||||
<a href="{% url 'login' %}" class="btn btn-primary">Log In</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,79 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Set New Password — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-center items-center min-h-[60vh]">
|
||||
<div class="card bg-base-200 shadow-xl w-full max-w-md">
|
||||
<div class="card-body">
|
||||
{% if validlink %}
|
||||
<h2 class="card-title text-2xl justify-center mb-4">Set New Password</h2>
|
||||
|
||||
{% if form.errors %}
|
||||
<div class="alert alert-error mb-4">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 shrink-0" fill="none"
|
||||
viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M12 9v2m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<div>
|
||||
{% for field in form %}
|
||||
{% for error in field.errors %}
|
||||
<p>{{ error }}</p>
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
{% for error in form.non_field_errors %}
|
||||
<p>{{ error }}</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
|
||||
<div class="form-control mb-4">
|
||||
<label class="label" for="id_new_password1">
|
||||
<span class="label-text">New password</span>
|
||||
</label>
|
||||
<input type="password" name="new_password1" id="id_new_password1"
|
||||
class="input input-bordered w-full" autofocus required>
|
||||
{% if form.new_password1.help_text %}
|
||||
<label class="label">
|
||||
<span class="label-text-alt text-base-content/60">{{ form.new_password1.help_text }}</span>
|
||||
</label>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="form-control mb-6">
|
||||
<label class="label" for="id_new_password2">
|
||||
<span class="label-text">Confirm new password</span>
|
||||
</label>
|
||||
<input type="password" name="new_password2" id="id_new_password2"
|
||||
class="input input-bordered w-full" required>
|
||||
</div>
|
||||
|
||||
<div class="form-control mt-2">
|
||||
<button type="submit" class="btn btn-primary w-full">Set Password</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
{% else %}
|
||||
<div class="text-center">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="h-16 w-16 text-error mx-auto mb-4" fill="none"
|
||||
viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M12 9v2m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<h2 class="card-title text-2xl justify-center mb-2">Invalid Reset Link</h2>
|
||||
<p class="text-base-content/70 mb-6">
|
||||
This password reset link is invalid or has expired.
|
||||
Please request a new one.
|
||||
</p>
|
||||
<a href="{% url 'password_reset' %}" class="btn btn-primary">Request New Link</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,23 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Reset Link Sent — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-center items-center min-h-[60vh]">
|
||||
<div class="card bg-base-200 shadow-xl w-full max-w-md">
|
||||
<div class="card-body items-center text-center">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="h-16 w-16 text-info mb-4" fill="none"
|
||||
viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M3 8l7.89 5.26a2 2 0 002.22 0L21 8M5 19h14a2 2 0 002-2V7a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z" />
|
||||
</svg>
|
||||
<h2 class="card-title text-2xl mb-2">Check Your Email</h2>
|
||||
<p class="text-base-content/70 mb-6">
|
||||
If an account exists with that email address, we've sent password
|
||||
reset instructions. Check your inbox (and spam folder).
|
||||
</p>
|
||||
<a href="{% url 'login' %}" class="btn btn-primary">Back to Log In</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,56 @@
|
||||
{% extends "themis/base.html" %}
|
||||
|
||||
{% block title %}Reset Password — {{ themis_app_name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="flex justify-center items-center min-h-[60vh]">
|
||||
<div class="card bg-base-200 shadow-xl w-full max-w-md">
|
||||
<div class="card-body">
|
||||
<h2 class="card-title text-2xl justify-center mb-2">Reset Password</h2>
|
||||
<p class="text-base-content/70 text-center mb-4">
|
||||
Enter your email address and we'll send you a link to reset your password.
|
||||
</p>
|
||||
|
||||
{% if form.errors %}
|
||||
<div class="alert alert-error mb-4">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 shrink-0" fill="none"
|
||||
viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M12 9v2m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<div>
|
||||
{% for field in form %}
|
||||
{% for error in field.errors %}
|
||||
<p>{{ error }}</p>
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
|
||||
<div class="form-control mb-6">
|
||||
<label class="label" for="id_email">
|
||||
<span class="label-text">Email address</span>
|
||||
</label>
|
||||
<input type="email" name="email" id="id_email"
|
||||
class="input input-bordered w-full"
|
||||
autofocus required maxlength="254">
|
||||
</div>
|
||||
|
||||
<div class="form-control mt-2">
|
||||
<button type="submit" class="btn btn-primary w-full">Send Reset Link</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<div class="divider">OR</div>
|
||||
|
||||
<div class="text-center">
|
||||
<a href="{% url 'login' %}" class="link link-hover text-sm">Back to Log In</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
28
mnemosyne/mnemosyne/urls.py
Normal file
28
mnemosyne/mnemosyne/urls.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""
|
||||
URL configuration for Mnemosyne project.
|
||||
"""
|
||||
|
||||
from django.contrib import admin
|
||||
from django.urls import include, path
|
||||
|
||||
from . import views
|
||||
|
||||
urlpatterns = [
|
||||
# Landing / Dashboard
|
||||
path("", views.landing, name="landing"),
|
||||
path("dashboard/", views.dashboard, name="dashboard"),
|
||||
# Django auth (login, logout, password reset)
|
||||
path("", include("django.contrib.auth.urls")),
|
||||
# Admin
|
||||
path("admin/", admin.site.urls),
|
||||
# Prometheus metrics
|
||||
path("", include("django_prometheus.urls")),
|
||||
# Themis (auth, keys, profile, notifications)
|
||||
path("", include("themis.urls")),
|
||||
path("api/v1/", include("themis.api.urls")),
|
||||
path("api/v1/llm/", include("llm_manager.api.urls")),
|
||||
# Library (Neo4j content graph)
|
||||
path("library/", include("library.urls")),
|
||||
# LLM Manager
|
||||
path("llm/", include("llm_manager.urls")),
|
||||
]
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user