- Implemented custom form widgets for date, time, and datetime fields with DaisyUI styling. - Created utility functions for formatting dates, times, and numbers according to user preferences. - Developed views for profile settings, API key management, and notifications, including health check endpoints. - Added URL configurations for Themis tests and main application routes. - Established test cases for custom widgets to ensure proper functionality and integration. - Defined project metadata and dependencies in pyproject.toml for package management.
302 lines
9.6 KiB
Python
302 lines
9.6 KiB
Python
"""
|
|
LLM Manager models — ported from Spelunker with Mnemosyne adaptations.
|
|
|
|
Changes from Spelunker:
|
|
- api_key uses EncryptedCharField with key derived from SECRET_KEY (Themis-style)
|
|
- LLMModel.model_type adds 'reranker' and 'multimodal_embed' choices
|
|
- LLMModel adds 'supports_multimodal' and 'vector_dimensions' fields
|
|
- LLMUsage.purpose adds Mnemosyne-specific choices
|
|
"""
|
|
|
|
import uuid
|
|
from decimal import Decimal
|
|
|
|
from django.conf import settings
|
|
from django.contrib.auth import get_user_model
|
|
from django.db import models
|
|
|
|
from .encryption import EncryptedCharField
|
|
|
|
User = get_user_model()
|
|
|
|
|
|
class LLMApi(models.Model):
|
|
"""
|
|
Represents an LLM API provider (OpenAI-compatible, Arke proxy, etc.).
|
|
|
|
API keys are stored encrypted using Fernet symmetric encryption
|
|
derived from Django's SECRET_KEY.
|
|
"""
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
name = models.CharField(max_length=100, unique=True)
|
|
api_type = models.CharField(
|
|
max_length=20,
|
|
choices=[
|
|
("openai", "OpenAI Compatible"),
|
|
("azure", "Azure OpenAI"),
|
|
("ollama", "Ollama"),
|
|
("anthropic", "Anthropic"),
|
|
("llama-cpp", "Llama.cpp"),
|
|
("vllm", "vLLM"),
|
|
("bedrock", "Amazon Bedrock"),
|
|
],
|
|
)
|
|
base_url = models.URLField()
|
|
api_key = EncryptedCharField(max_length=500, blank=True, default="")
|
|
is_active = models.BooleanField(default=True)
|
|
supports_streaming = models.BooleanField(default=True)
|
|
timeout_seconds = models.PositiveIntegerField(default=60)
|
|
max_retries = models.PositiveIntegerField(default=3)
|
|
|
|
# Testing and validation fields
|
|
last_tested_at = models.DateTimeField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Last time this API was tested",
|
|
)
|
|
last_test_status = models.CharField(
|
|
max_length=20,
|
|
choices=[
|
|
("success", "Success"),
|
|
("failed", "Failed"),
|
|
("pending", "Pending"),
|
|
],
|
|
default="pending",
|
|
help_text="Result of the last API test",
|
|
)
|
|
last_test_message = models.TextField(
|
|
blank=True,
|
|
help_text="Details from the last test (success message or error)",
|
|
)
|
|
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
created_by = models.ForeignKey(
|
|
User,
|
|
null=True,
|
|
blank=True,
|
|
on_delete=models.SET_NULL,
|
|
related_name="llm_apis_created",
|
|
)
|
|
|
|
class Meta:
|
|
ordering = ["name"]
|
|
verbose_name = "LLM API"
|
|
verbose_name_plural = "LLM APIs"
|
|
|
|
def __str__(self):
|
|
return f"{self.name} ({self.api_type})"
|
|
|
|
|
|
class LLMModel(models.Model):
|
|
"""
|
|
Represents a specific LLM model provided by an API.
|
|
|
|
Mnemosyne additions over Spelunker:
|
|
- model_type adds 'reranker' and 'multimodal_embed'
|
|
- supports_multimodal flag for image+text capable models
|
|
- vector_dimensions for embedding output size
|
|
"""
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
api = models.ForeignKey(LLMApi, on_delete=models.CASCADE, related_name="models")
|
|
name = models.CharField(max_length=100)
|
|
display_name = models.CharField(max_length=200, blank=True)
|
|
|
|
model_type = models.CharField(
|
|
max_length=20,
|
|
choices=[
|
|
("chat", "Chat/Completion"),
|
|
("embedding", "Embedding"),
|
|
("vision", "Vision"),
|
|
("audio", "Audio"),
|
|
("reranker", "Reranker"),
|
|
("multimodal_embed", "Multimodal Embedding"),
|
|
],
|
|
)
|
|
|
|
context_window = models.PositiveIntegerField(
|
|
help_text="Maximum context in tokens"
|
|
)
|
|
max_output_tokens = models.PositiveIntegerField(null=True, blank=True)
|
|
supports_cache = models.BooleanField(default=False)
|
|
supports_vision = models.BooleanField(default=False)
|
|
supports_function_calling = models.BooleanField(default=False)
|
|
supports_json_mode = models.BooleanField(default=False)
|
|
|
|
# Mnemosyne additions
|
|
supports_multimodal = models.BooleanField(
|
|
default=False,
|
|
help_text="Flag models that accept image+text input",
|
|
)
|
|
vector_dimensions = models.PositiveIntegerField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Embedding output dimensions (e.g., 4096)",
|
|
)
|
|
|
|
# Pricing
|
|
input_cost_per_1k = models.DecimalField(
|
|
max_digits=10,
|
|
decimal_places=6,
|
|
default=Decimal("0"),
|
|
help_text="Cost per 1K input tokens in USD",
|
|
)
|
|
output_cost_per_1k = models.DecimalField(
|
|
max_digits=10,
|
|
decimal_places=6,
|
|
default=Decimal("0"),
|
|
help_text="Cost per 1K output tokens in USD",
|
|
)
|
|
cached_cost_per_1k = models.DecimalField(
|
|
max_digits=10,
|
|
decimal_places=6,
|
|
null=True,
|
|
blank=True,
|
|
help_text="Cost per 1K cached tokens (if supported)",
|
|
)
|
|
|
|
is_active = models.BooleanField(default=True)
|
|
is_system_embedding_model = models.BooleanField(
|
|
default=False,
|
|
help_text=(
|
|
"Mark this as the system-wide embedding model. "
|
|
"Only ONE embedding model should have this set to True."
|
|
),
|
|
)
|
|
is_system_chat_model = models.BooleanField(
|
|
default=False,
|
|
help_text=(
|
|
"Mark this as the system-wide chat model. "
|
|
"Only ONE chat model should have this set to True."
|
|
),
|
|
)
|
|
is_system_reranker_model = models.BooleanField(
|
|
default=False,
|
|
help_text=(
|
|
"Mark this as the system-wide reranker model. "
|
|
"Only ONE reranker model should have this set to True."
|
|
),
|
|
)
|
|
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
|
|
class Meta:
|
|
ordering = ["api", "name"]
|
|
unique_together = [("api", "name")]
|
|
indexes = [
|
|
models.Index(fields=["api", "model_type", "is_active"]),
|
|
models.Index(fields=["is_system_embedding_model", "model_type"]),
|
|
models.Index(fields=["is_system_chat_model", "model_type"]),
|
|
models.Index(fields=["is_system_reranker_model", "model_type"]),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.api.name}: {self.name}"
|
|
|
|
@classmethod
|
|
def get_system_embedding_model(cls):
|
|
"""Get the system-wide embedding model."""
|
|
return cls.objects.filter(
|
|
is_system_embedding_model=True,
|
|
is_active=True,
|
|
model_type__in=["embedding", "multimodal_embed"],
|
|
).first()
|
|
|
|
@classmethod
|
|
def get_system_chat_model(cls):
|
|
"""Get the system-wide chat model."""
|
|
return cls.objects.filter(
|
|
is_system_chat_model=True,
|
|
is_active=True,
|
|
model_type="chat",
|
|
).first()
|
|
|
|
@classmethod
|
|
def get_system_reranker_model(cls):
|
|
"""Get the system-wide reranker model."""
|
|
return cls.objects.filter(
|
|
is_system_reranker_model=True,
|
|
is_active=True,
|
|
model_type="reranker",
|
|
).first()
|
|
|
|
|
|
class LLMUsage(models.Model):
|
|
"""
|
|
Tracks token usage and cost for all LLM API calls.
|
|
"""
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
user = models.ForeignKey(
|
|
User, on_delete=models.SET_NULL, null=True, related_name="llm_usage"
|
|
)
|
|
model = models.ForeignKey(
|
|
LLMModel, on_delete=models.PROTECT, related_name="usage_records"
|
|
)
|
|
timestamp = models.DateTimeField(auto_now_add=True, db_index=True)
|
|
|
|
input_tokens = models.PositiveIntegerField(default=0)
|
|
output_tokens = models.PositiveIntegerField(default=0)
|
|
cached_tokens = models.PositiveIntegerField(default=0)
|
|
|
|
total_cost = models.DecimalField(
|
|
max_digits=12,
|
|
decimal_places=6,
|
|
default=Decimal("0"),
|
|
help_text="Total cost in USD",
|
|
)
|
|
|
|
session_id = models.CharField(max_length=100, blank=True, db_index=True)
|
|
purpose = models.CharField(
|
|
max_length=50,
|
|
choices=[
|
|
("responder", "RAG Responder"),
|
|
("reviewer", "RAG Reviewer"),
|
|
("embeddings", "Document Embeddings"),
|
|
("search", "Vector Search"),
|
|
("reranking", "Re-ranking"),
|
|
("multimodal_embed", "Multimodal Embedding"),
|
|
("other", "Other"),
|
|
],
|
|
default="other",
|
|
db_index=True,
|
|
)
|
|
request_metadata = models.JSONField(
|
|
null=True,
|
|
blank=True,
|
|
help_text="Additional context (prompt, temperature, etc.)",
|
|
)
|
|
|
|
class Meta:
|
|
ordering = ["-timestamp"]
|
|
indexes = [
|
|
models.Index(fields=["-timestamp", "user"]),
|
|
models.Index(fields=["-timestamp", "model"]),
|
|
models.Index(fields=["purpose", "-timestamp"]),
|
|
models.Index(fields=["session_id"]),
|
|
]
|
|
|
|
def save(self, *args, **kwargs):
|
|
if not self.total_cost or self.total_cost == 0:
|
|
self.total_cost = self.calculate_cost()
|
|
super().save(*args, **kwargs)
|
|
|
|
def calculate_cost(self):
|
|
"""Calculate cost based on token usage and model pricing."""
|
|
input_cost = (self.input_tokens / 1000) * float(self.model.input_cost_per_1k)
|
|
output_cost = (self.output_tokens / 1000) * float(
|
|
self.model.output_cost_per_1k
|
|
)
|
|
cached_cost = 0
|
|
if self.cached_tokens and self.model.cached_cost_per_1k:
|
|
cached_cost = (self.cached_tokens / 1000) * float(
|
|
self.model.cached_cost_per_1k
|
|
)
|
|
return Decimal(str(input_cost + output_cost + cached_cost))
|
|
|
|
def __str__(self):
|
|
return f"{self.model.name} - {self.timestamp} - ${self.total_cost}"
|