Files
mnemosyne/mnemosyne/llm_manager/models.py
Robert Helewka 99bdb4ac92 Add Themis application with custom widgets, views, and utilities
- Implemented custom form widgets for date, time, and datetime fields with DaisyUI styling.
- Created utility functions for formatting dates, times, and numbers according to user preferences.
- Developed views for profile settings, API key management, and notifications, including health check endpoints.
- Added URL configurations for Themis tests and main application routes.
- Established test cases for custom widgets to ensure proper functionality and integration.
- Defined project metadata and dependencies in pyproject.toml for package management.
2026-03-21 02:00:18 +00:00

302 lines
9.6 KiB
Python

"""
LLM Manager models — ported from Spelunker with Mnemosyne adaptations.
Changes from Spelunker:
- api_key uses EncryptedCharField with key derived from SECRET_KEY (Themis-style)
- LLMModel.model_type adds 'reranker' and 'multimodal_embed' choices
- LLMModel adds 'supports_multimodal' and 'vector_dimensions' fields
- LLMUsage.purpose adds Mnemosyne-specific choices
"""
import uuid
from decimal import Decimal
from django.conf import settings
from django.contrib.auth import get_user_model
from django.db import models
from .encryption import EncryptedCharField
User = get_user_model()
class LLMApi(models.Model):
"""
Represents an LLM API provider (OpenAI-compatible, Arke proxy, etc.).
API keys are stored encrypted using Fernet symmetric encryption
derived from Django's SECRET_KEY.
"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(max_length=100, unique=True)
api_type = models.CharField(
max_length=20,
choices=[
("openai", "OpenAI Compatible"),
("azure", "Azure OpenAI"),
("ollama", "Ollama"),
("anthropic", "Anthropic"),
("llama-cpp", "Llama.cpp"),
("vllm", "vLLM"),
("bedrock", "Amazon Bedrock"),
],
)
base_url = models.URLField()
api_key = EncryptedCharField(max_length=500, blank=True, default="")
is_active = models.BooleanField(default=True)
supports_streaming = models.BooleanField(default=True)
timeout_seconds = models.PositiveIntegerField(default=60)
max_retries = models.PositiveIntegerField(default=3)
# Testing and validation fields
last_tested_at = models.DateTimeField(
null=True,
blank=True,
help_text="Last time this API was tested",
)
last_test_status = models.CharField(
max_length=20,
choices=[
("success", "Success"),
("failed", "Failed"),
("pending", "Pending"),
],
default="pending",
help_text="Result of the last API test",
)
last_test_message = models.TextField(
blank=True,
help_text="Details from the last test (success message or error)",
)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
created_by = models.ForeignKey(
User,
null=True,
blank=True,
on_delete=models.SET_NULL,
related_name="llm_apis_created",
)
class Meta:
ordering = ["name"]
verbose_name = "LLM API"
verbose_name_plural = "LLM APIs"
def __str__(self):
return f"{self.name} ({self.api_type})"
class LLMModel(models.Model):
"""
Represents a specific LLM model provided by an API.
Mnemosyne additions over Spelunker:
- model_type adds 'reranker' and 'multimodal_embed'
- supports_multimodal flag for image+text capable models
- vector_dimensions for embedding output size
"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
api = models.ForeignKey(LLMApi, on_delete=models.CASCADE, related_name="models")
name = models.CharField(max_length=100)
display_name = models.CharField(max_length=200, blank=True)
model_type = models.CharField(
max_length=20,
choices=[
("chat", "Chat/Completion"),
("embedding", "Embedding"),
("vision", "Vision"),
("audio", "Audio"),
("reranker", "Reranker"),
("multimodal_embed", "Multimodal Embedding"),
],
)
context_window = models.PositiveIntegerField(
help_text="Maximum context in tokens"
)
max_output_tokens = models.PositiveIntegerField(null=True, blank=True)
supports_cache = models.BooleanField(default=False)
supports_vision = models.BooleanField(default=False)
supports_function_calling = models.BooleanField(default=False)
supports_json_mode = models.BooleanField(default=False)
# Mnemosyne additions
supports_multimodal = models.BooleanField(
default=False,
help_text="Flag models that accept image+text input",
)
vector_dimensions = models.PositiveIntegerField(
null=True,
blank=True,
help_text="Embedding output dimensions (e.g., 4096)",
)
# Pricing
input_cost_per_1k = models.DecimalField(
max_digits=10,
decimal_places=6,
default=Decimal("0"),
help_text="Cost per 1K input tokens in USD",
)
output_cost_per_1k = models.DecimalField(
max_digits=10,
decimal_places=6,
default=Decimal("0"),
help_text="Cost per 1K output tokens in USD",
)
cached_cost_per_1k = models.DecimalField(
max_digits=10,
decimal_places=6,
null=True,
blank=True,
help_text="Cost per 1K cached tokens (if supported)",
)
is_active = models.BooleanField(default=True)
is_system_embedding_model = models.BooleanField(
default=False,
help_text=(
"Mark this as the system-wide embedding model. "
"Only ONE embedding model should have this set to True."
),
)
is_system_chat_model = models.BooleanField(
default=False,
help_text=(
"Mark this as the system-wide chat model. "
"Only ONE chat model should have this set to True."
),
)
is_system_reranker_model = models.BooleanField(
default=False,
help_text=(
"Mark this as the system-wide reranker model. "
"Only ONE reranker model should have this set to True."
),
)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ["api", "name"]
unique_together = [("api", "name")]
indexes = [
models.Index(fields=["api", "model_type", "is_active"]),
models.Index(fields=["is_system_embedding_model", "model_type"]),
models.Index(fields=["is_system_chat_model", "model_type"]),
models.Index(fields=["is_system_reranker_model", "model_type"]),
]
def __str__(self):
return f"{self.api.name}: {self.name}"
@classmethod
def get_system_embedding_model(cls):
"""Get the system-wide embedding model."""
return cls.objects.filter(
is_system_embedding_model=True,
is_active=True,
model_type__in=["embedding", "multimodal_embed"],
).first()
@classmethod
def get_system_chat_model(cls):
"""Get the system-wide chat model."""
return cls.objects.filter(
is_system_chat_model=True,
is_active=True,
model_type="chat",
).first()
@classmethod
def get_system_reranker_model(cls):
"""Get the system-wide reranker model."""
return cls.objects.filter(
is_system_reranker_model=True,
is_active=True,
model_type="reranker",
).first()
class LLMUsage(models.Model):
"""
Tracks token usage and cost for all LLM API calls.
"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
user = models.ForeignKey(
User, on_delete=models.SET_NULL, null=True, related_name="llm_usage"
)
model = models.ForeignKey(
LLMModel, on_delete=models.PROTECT, related_name="usage_records"
)
timestamp = models.DateTimeField(auto_now_add=True, db_index=True)
input_tokens = models.PositiveIntegerField(default=0)
output_tokens = models.PositiveIntegerField(default=0)
cached_tokens = models.PositiveIntegerField(default=0)
total_cost = models.DecimalField(
max_digits=12,
decimal_places=6,
default=Decimal("0"),
help_text="Total cost in USD",
)
session_id = models.CharField(max_length=100, blank=True, db_index=True)
purpose = models.CharField(
max_length=50,
choices=[
("responder", "RAG Responder"),
("reviewer", "RAG Reviewer"),
("embeddings", "Document Embeddings"),
("search", "Vector Search"),
("reranking", "Re-ranking"),
("multimodal_embed", "Multimodal Embedding"),
("other", "Other"),
],
default="other",
db_index=True,
)
request_metadata = models.JSONField(
null=True,
blank=True,
help_text="Additional context (prompt, temperature, etc.)",
)
class Meta:
ordering = ["-timestamp"]
indexes = [
models.Index(fields=["-timestamp", "user"]),
models.Index(fields=["-timestamp", "model"]),
models.Index(fields=["purpose", "-timestamp"]),
models.Index(fields=["session_id"]),
]
def save(self, *args, **kwargs):
if not self.total_cost or self.total_cost == 0:
self.total_cost = self.calculate_cost()
super().save(*args, **kwargs)
def calculate_cost(self):
"""Calculate cost based on token usage and model pricing."""
input_cost = (self.input_tokens / 1000) * float(self.model.input_cost_per_1k)
output_cost = (self.output_tokens / 1000) * float(
self.model.output_cost_per_1k
)
cached_cost = 0
if self.cached_tokens and self.model.cached_cost_per_1k:
cached_cost = (self.cached_tokens / 1000) * float(
self.model.cached_cost_per_1k
)
return Decimal(str(input_cost + output_cost + cached_cost))
def __str__(self):
return f"{self.model.name} - {self.timestamp} - ${self.total_cost}"