mnemosyne/mnemosyne/llm_manager/models.py

"""
LLM Manager models — ported from Spelunker with Mnemosyne adaptations.

Changes from Spelunker:
- api_key uses EncryptedCharField with key derived from SECRET_KEY (Themis-style)
- LLMModel.model_type adds 'reranker' and 'multimodal_embed' choices
- LLMModel adds 'supports_multimodal' and 'vector_dimensions' fields
- LLMUsage.purpose adds Mnemosyne-specific choices
"""

import uuid
from decimal import Decimal

from django.conf import settings
from django.contrib.auth import get_user_model
from django.db import models

from .encryption import EncryptedCharField

User = get_user_model()


class LLMApi(models.Model):
    """
    Represents an LLM API provider (OpenAI-compatible, Arke proxy, etc.).

    API keys are stored encrypted using Fernet symmetric encryption
    derived from Django's SECRET_KEY.
    """

    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
    name = models.CharField(max_length=100, unique=True)
    api_type = models.CharField(
        max_length=20,
        choices=[
            ("openai", "OpenAI Compatible"),
            ("azure", "Azure OpenAI"),
            ("ollama", "Ollama"),
            ("anthropic", "Anthropic"),
            ("llama-cpp", "Llama.cpp"),
            ("vllm", "vLLM"),
            ("bedrock", "Amazon Bedrock"),
        ],
    )
    base_url = models.URLField()
    api_key = EncryptedCharField(max_length=500, blank=True, default="")
    is_active = models.BooleanField(default=True)
    supports_streaming = models.BooleanField(default=True)
    timeout_seconds = models.PositiveIntegerField(default=60)
    max_retries = models.PositiveIntegerField(default=3)

    # Testing and validation fields
    last_tested_at = models.DateTimeField(
        null=True,
        blank=True,
        help_text="Last time this API was tested",
    )
    last_test_status = models.CharField(
        max_length=20,
        choices=[
            ("success", "Success"),
            ("failed", "Failed"),
            ("pending", "Pending"),
        ],
        default="pending",
        help_text="Result of the last API test",
    )
    last_test_message = models.TextField(
        blank=True,
        help_text="Details from the last test (success message or error)",
    )

    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)
    created_by = models.ForeignKey(
        User,
        null=True,
        blank=True,
        on_delete=models.SET_NULL,
        related_name="llm_apis_created",
    )

    class Meta:
        ordering = ["name"]
        verbose_name = "LLM API"
        verbose_name_plural = "LLM APIs"

    def __str__(self):
        return f"{self.name} ({self.api_type})"


class LLMModel(models.Model):
    """
    Represents a specific LLM model provided by an API.

    Mnemosyne additions over Spelunker:
    - model_type adds 'reranker' and 'multimodal_embed'
    - supports_multimodal flag for image+text capable models
    - vector_dimensions for embedding output size
    """

    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
    api = models.ForeignKey(LLMApi, on_delete=models.CASCADE, related_name="models")
    name = models.CharField(max_length=100)
    display_name = models.CharField(max_length=200, blank=True)

    model_type = models.CharField(
        max_length=20,
        choices=[
            ("chat", "Chat/Completion"),
            ("embedding", "Embedding"),
            ("vision", "Vision"),
            ("audio", "Audio"),
            ("reranker", "Reranker"),
            ("multimodal_embed", "Multimodal Embedding"),
        ],
    )

    context_window = models.PositiveIntegerField(
        help_text="Maximum context in tokens"
    )
    max_output_tokens = models.PositiveIntegerField(null=True, blank=True)
    supports_cache = models.BooleanField(default=False)
    supports_vision = models.BooleanField(default=False)
    supports_function_calling = models.BooleanField(default=False)
    supports_json_mode = models.BooleanField(default=False)

    # Mnemosyne additions
    supports_multimodal = models.BooleanField(
        default=False,
        help_text="Flag models that accept image+text input",
    )
    vector_dimensions = models.PositiveIntegerField(
        null=True,
        blank=True,
        help_text="Embedding output dimensions (e.g., 4096)",
    )

    # Pricing
    input_cost_per_1k = models.DecimalField(
        max_digits=10,
        decimal_places=6,
        default=Decimal("0"),
        help_text="Cost per 1K input tokens in USD",
    )
    output_cost_per_1k = models.DecimalField(
        max_digits=10,
        decimal_places=6,
        default=Decimal("0"),
        help_text="Cost per 1K output tokens in USD",
    )
    cached_cost_per_1k = models.DecimalField(
        max_digits=10,
        decimal_places=6,
        null=True,
        blank=True,
        help_text="Cost per 1K cached tokens (if supported)",
    )

    is_active = models.BooleanField(default=True)
    is_system_embedding_model = models.BooleanField(
        default=False,
        help_text=(
            "Mark this as the system-wide embedding model. "
            "Only ONE embedding model should have this set to True."
        ),
    )
    is_system_chat_model = models.BooleanField(
        default=False,
        help_text=(
            "Mark this as the system-wide chat model. "
            "Only ONE chat model should have this set to True."
        ),
    )
    is_system_reranker_model = models.BooleanField(
        default=False,
        help_text=(
            "Mark this as the system-wide reranker model. "
            "Only ONE reranker model should have this set to True."
        ),
    )

    created_at = models.DateTimeField(auto_now_add=True)
    updated_at = models.DateTimeField(auto_now=True)

    class Meta:
        ordering = ["api", "name"]
        unique_together = [("api", "name")]
        indexes = [
            models.Index(fields=["api", "model_type", "is_active"]),
            models.Index(fields=["is_system_embedding_model", "model_type"]),
            models.Index(fields=["is_system_chat_model", "model_type"]),
            models.Index(fields=["is_system_reranker_model", "model_type"]),
        ]

    def __str__(self):
        return f"{self.api.name}: {self.name}"

    @classmethod
    def get_system_embedding_model(cls):
        """Get the system-wide embedding model."""
        return cls.objects.filter(
            is_system_embedding_model=True,
            is_active=True,
            model_type__in=["embedding", "multimodal_embed"],
        ).first()

    @classmethod
    def get_system_chat_model(cls):
        """Get the system-wide chat model."""
        return cls.objects.filter(
            is_system_chat_model=True,
            is_active=True,
            model_type="chat",
        ).first()

    @classmethod
    def get_system_reranker_model(cls):
        """Get the system-wide reranker model."""
        return cls.objects.filter(
            is_system_reranker_model=True,
            is_active=True,
            model_type="reranker",
        ).first()


class LLMUsage(models.Model):
    """
    Tracks token usage and cost for all LLM API calls.
    """

    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
    user = models.ForeignKey(
        User, on_delete=models.SET_NULL, null=True, related_name="llm_usage"
    )
    model = models.ForeignKey(
        LLMModel, on_delete=models.PROTECT, related_name="usage_records"
    )
    timestamp = models.DateTimeField(auto_now_add=True, db_index=True)

    input_tokens = models.PositiveIntegerField(default=0)
    output_tokens = models.PositiveIntegerField(default=0)
    cached_tokens = models.PositiveIntegerField(default=0)

    total_cost = models.DecimalField(
        max_digits=12,
        decimal_places=6,
        default=Decimal("0"),
        help_text="Total cost in USD",
    )

    session_id = models.CharField(max_length=100, blank=True, db_index=True)
    purpose = models.CharField(
        max_length=50,
        choices=[
            ("responder", "RAG Responder"),
            ("reviewer", "RAG Reviewer"),
            ("embeddings", "Document Embeddings"),
            ("search", "Vector Search"),
            ("reranking", "Re-ranking"),
            ("multimodal_embed", "Multimodal Embedding"),
            ("other", "Other"),
        ],
        default="other",
        db_index=True,
    )
    request_metadata = models.JSONField(
        null=True,
        blank=True,
        help_text="Additional context (prompt, temperature, etc.)",
    )

    class Meta:
        ordering = ["-timestamp"]
        indexes = [
            models.Index(fields=["-timestamp", "user"]),
            models.Index(fields=["-timestamp", "model"]),
            models.Index(fields=["purpose", "-timestamp"]),
            models.Index(fields=["session_id"]),
        ]

    def save(self, *args, **kwargs):
        if not self.total_cost or self.total_cost == 0:
            self.total_cost = self.calculate_cost()
        super().save(*args, **kwargs)

    def calculate_cost(self):
        """Calculate cost based on token usage and model pricing."""
        input_cost = (self.input_tokens / 1000) * float(self.model.input_cost_per_1k)
        output_cost = (self.output_tokens / 1000) * float(
            self.model.output_cost_per_1k
        )
        cached_cost = 0
        if self.cached_tokens and self.model.cached_cost_per_1k:
            cached_cost = (self.cached_tokens / 1000) * float(
                self.model.cached_cost_per_1k
            )
        return Decimal(str(input_cost + output_cost + cached_cost))

    def __str__(self):
        return f"{self.model.name} - {self.timestamp} - ${self.total_cost}"