Files
mnemosyne/mnemosyne/llm_manager/services.py
Robert Helewka 99bdb4ac92 Add Themis application with custom widgets, views, and utilities
- Implemented custom form widgets for date, time, and datetime fields with DaisyUI styling.
- Created utility functions for formatting dates, times, and numbers according to user preferences.
- Developed views for profile settings, API key management, and notifications, including health check endpoints.
- Added URL configurations for Themis tests and main application routes.
- Established test cases for custom widgets to ensure proper functionality and integration.
- Defined project metadata and dependencies in pyproject.toml for package management.
2026-03-21 02:00:18 +00:00

276 lines
10 KiB
Python

"""
Services for LLM API testing and model discovery.
Ported from Spelunker with Mnemosyne adaptations.
"""
import logging
from django.db import transaction
from django.utils import timezone
logger = logging.getLogger(__name__)
def test_llm_api(api):
"""
Test an LLM API connection and discover available models.
:param api: LLMApi instance to test.
:returns: dict with success, models_added/updated/deactivated, message/error.
"""
from .models import LLMModel
result = {
"success": False,
"models_added": 0,
"models_updated": 0,
"models_deactivated": 0,
"message": "",
"error": "",
}
logger.info("Testing LLM API: %s (%s) at %s", api.name, api.api_type, api.base_url)
try:
if api.api_type in ("openai", "vllm"):
discovered_models = _discover_openai_models(api)
elif api.api_type == "ollama":
discovered_models = _discover_ollama_models(api)
elif api.api_type == "bedrock":
discovered_models = _discover_bedrock_models(api)
else:
result["error"] = f"API type '{api.api_type}' is not yet supported for auto-discovery"
logger.warning(result["error"])
return result
if not discovered_models:
result["error"] = "No models discovered from API"
logger.warning("No models found for API %s", api.name)
return result
logger.info("Discovered %d models from %s", len(discovered_models), api.name)
discovered_model_names = {m["name"] for m in discovered_models}
with transaction.atomic():
for model_data in discovered_models:
model_name = model_data["name"]
try:
existing = LLMModel.objects.get(api=api, name=model_name)
existing.is_active = True
existing.display_name = model_data.get("display_name", model_name)
existing.model_type = model_data.get("model_type", "chat")
existing.context_window = model_data.get("context_window", 8192)
existing.max_output_tokens = model_data.get("max_output_tokens")
existing.supports_cache = model_data.get("supports_cache", False)
existing.supports_vision = model_data.get("supports_vision", False)
existing.supports_function_calling = model_data.get("supports_function_calling", False)
existing.supports_json_mode = model_data.get("supports_json_mode", False)
existing.save()
result["models_updated"] += 1
except LLMModel.DoesNotExist:
from decimal import Decimal
LLMModel.objects.create(
api=api,
name=model_name,
display_name=model_data.get("display_name", model_name),
model_type=model_data.get("model_type", "chat"),
context_window=model_data.get("context_window", 8192),
max_output_tokens=model_data.get("max_output_tokens"),
supports_cache=model_data.get("supports_cache", False),
supports_vision=model_data.get("supports_vision", False),
supports_function_calling=model_data.get("supports_function_calling", False),
supports_json_mode=model_data.get("supports_json_mode", False),
input_cost_per_1k=Decimal("0"),
output_cost_per_1k=Decimal("0"),
is_active=True,
)
result["models_added"] += 1
logger.info("Added new model: %s::%s", api.name, model_name)
# Deactivate models no longer available
for model in LLMModel.objects.filter(api=api, is_active=True):
if model.name not in discovered_model_names:
model.is_active = False
model.save(update_fields=["is_active"])
result["models_deactivated"] += 1
logger.warning("Deactivated missing model: %s::%s", api.name, model.name)
api.last_tested_at = timezone.now()
api.last_test_status = "success"
api.last_test_message = (
f"Added: {result['models_added']}, "
f"Updated: {result['models_updated']}, "
f"Deactivated: {result['models_deactivated']}"
)
api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"])
result["success"] = True
result["message"] = api.last_test_message
logger.info("API test successful: %s%s", api.name, result["message"])
except Exception as e:
result["error"] = f"API test failed: {e}"
api.last_tested_at = timezone.now()
api.last_test_status = "failed"
api.last_test_message = result["error"]
api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"])
logger.error("API test failed for %s: %s", api.name, e, exc_info=True)
return result
def _discover_openai_models(api):
"""Discover models from an OpenAI-compatible API."""
try:
from openai import OpenAI
except ImportError:
raise ImportError("openai package required for model discovery — pip install openai")
client = OpenAI(
api_key=api.api_key or "dummy",
base_url=api.base_url,
timeout=api.timeout_seconds,
max_retries=api.max_retries,
)
discovered = []
models_response = client.models.list()
for model in models_response.data:
model_id = model.id
discovered.append(
{
"name": model_id,
"display_name": model_id,
"model_type": _infer_model_type(model_id),
"context_window": _infer_context_window(model_id),
"max_output_tokens": None,
"supports_cache": False,
"supports_vision": any(
kw in model_id.lower() for kw in ("vision", "gpt-4-turbo", "gpt-4o")
),
"supports_function_calling": any(
kw in model_id.lower() for kw in ("gpt-4", "gpt-3.5-turbo")
),
"supports_json_mode": any(
kw in model_id.lower() for kw in ("gpt-4", "gpt-3.5-turbo")
),
}
)
return discovered
def _discover_ollama_models(api):
"""Discover models from an Ollama API."""
import requests
url = f"{api.base_url.rstrip('/')}/api/tags"
discovered = []
resp = requests.get(url, timeout=10)
resp.raise_for_status()
for model in resp.json().get("models", []):
name = model["name"]
discovered.append(
{
"name": name,
"display_name": name,
"model_type": "chat",
"context_window": 4096,
"max_output_tokens": None,
"supports_cache": False,
"supports_vision": False,
"supports_function_calling": False,
"supports_json_mode": False,
}
)
return discovered
def _discover_bedrock_models(api):
"""
Discover models from Amazon Bedrock via the Mantle OpenAI-compatible endpoint.
For Bedrock APIs, the base_url is the bedrock-runtime endpoint. We derive
the Mantle endpoint from the region to list models.
"""
import requests
# Extract region from base_url (e.g. https://bedrock-runtime.us-east-1.amazonaws.com)
base = api.base_url.rstrip("/")
region = "us-east-1"
if "bedrock-runtime." in base:
# Parse region from URL
parts = base.split("bedrock-runtime.")[1].split(".")
if parts:
region = parts[0]
# Use the Mantle endpoint for model listing (OpenAI-compatible)
mantle_url = f"https://bedrock-mantle.{region}.api.aws/v1/models"
headers = {}
if api.api_key:
headers["Authorization"] = f"Bearer {api.api_key}"
discovered = []
try:
resp = requests.get(mantle_url, headers=headers, timeout=api.timeout_seconds or 30)
resp.raise_for_status()
data = resp.json()
for model in data.get("data", []):
model_id = model.get("id", "")
discovered.append(
{
"name": model_id,
"display_name": model_id,
"model_type": _infer_model_type(model_id),
"context_window": _infer_context_window(model_id),
"max_output_tokens": None,
"supports_cache": False,
"supports_vision": any(
kw in model_id.lower() for kw in ("claude-3", "nova", "vision")
),
"supports_function_calling": False,
"supports_json_mode": False,
}
)
except Exception as exc:
logger.warning("Bedrock Mantle model discovery failed: %s", exc)
# Fallback: return empty list (user can manually add models)
return discovered
def _infer_model_type(model_id):
"""Infer model type from its identifier."""
lower = model_id.lower()
if any(kw in lower for kw in ("embed", "embedding")):
return "embedding"
if "rerank" in lower:
return "reranker"
return "chat"
def _infer_context_window(model_id):
"""Infer context window size from model identifier."""
m = model_id.lower()
if any(kw in m for kw in ("gpt-4-turbo", "gpt-4-1106", "gpt-4-0125", "gpt-4o")):
return 128000
if "gpt-4-32k" in m:
return 32768
if "gpt-4" in m:
return 8192
if "gpt-3.5-turbo-16k" in m:
return 16384
if "gpt-3.5-turbo" in m:
return 4096
if "claude-3" in m:
return 200000
if "claude-2" in m:
return 100000
if "32k" in m:
return 32768
if "16k" in m:
return 16384
return 8192