- Implemented custom form widgets for date, time, and datetime fields with DaisyUI styling. - Created utility functions for formatting dates, times, and numbers according to user preferences. - Developed views for profile settings, API key management, and notifications, including health check endpoints. - Added URL configurations for Themis tests and main application routes. - Established test cases for custom widgets to ensure proper functionality and integration. - Defined project metadata and dependencies in pyproject.toml for package management.
276 lines
10 KiB
Python
276 lines
10 KiB
Python
"""
|
|
Services for LLM API testing and model discovery.
|
|
|
|
Ported from Spelunker with Mnemosyne adaptations.
|
|
"""
|
|
|
|
import logging
|
|
|
|
from django.db import transaction
|
|
from django.utils import timezone
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def test_llm_api(api):
|
|
"""
|
|
Test an LLM API connection and discover available models.
|
|
|
|
:param api: LLMApi instance to test.
|
|
:returns: dict with success, models_added/updated/deactivated, message/error.
|
|
"""
|
|
from .models import LLMModel
|
|
|
|
result = {
|
|
"success": False,
|
|
"models_added": 0,
|
|
"models_updated": 0,
|
|
"models_deactivated": 0,
|
|
"message": "",
|
|
"error": "",
|
|
}
|
|
|
|
logger.info("Testing LLM API: %s (%s) at %s", api.name, api.api_type, api.base_url)
|
|
|
|
try:
|
|
if api.api_type in ("openai", "vllm"):
|
|
discovered_models = _discover_openai_models(api)
|
|
elif api.api_type == "ollama":
|
|
discovered_models = _discover_ollama_models(api)
|
|
elif api.api_type == "bedrock":
|
|
discovered_models = _discover_bedrock_models(api)
|
|
else:
|
|
result["error"] = f"API type '{api.api_type}' is not yet supported for auto-discovery"
|
|
logger.warning(result["error"])
|
|
return result
|
|
|
|
if not discovered_models:
|
|
result["error"] = "No models discovered from API"
|
|
logger.warning("No models found for API %s", api.name)
|
|
return result
|
|
|
|
logger.info("Discovered %d models from %s", len(discovered_models), api.name)
|
|
discovered_model_names = {m["name"] for m in discovered_models}
|
|
|
|
with transaction.atomic():
|
|
for model_data in discovered_models:
|
|
model_name = model_data["name"]
|
|
try:
|
|
existing = LLMModel.objects.get(api=api, name=model_name)
|
|
existing.is_active = True
|
|
existing.display_name = model_data.get("display_name", model_name)
|
|
existing.model_type = model_data.get("model_type", "chat")
|
|
existing.context_window = model_data.get("context_window", 8192)
|
|
existing.max_output_tokens = model_data.get("max_output_tokens")
|
|
existing.supports_cache = model_data.get("supports_cache", False)
|
|
existing.supports_vision = model_data.get("supports_vision", False)
|
|
existing.supports_function_calling = model_data.get("supports_function_calling", False)
|
|
existing.supports_json_mode = model_data.get("supports_json_mode", False)
|
|
existing.save()
|
|
result["models_updated"] += 1
|
|
except LLMModel.DoesNotExist:
|
|
from decimal import Decimal
|
|
|
|
LLMModel.objects.create(
|
|
api=api,
|
|
name=model_name,
|
|
display_name=model_data.get("display_name", model_name),
|
|
model_type=model_data.get("model_type", "chat"),
|
|
context_window=model_data.get("context_window", 8192),
|
|
max_output_tokens=model_data.get("max_output_tokens"),
|
|
supports_cache=model_data.get("supports_cache", False),
|
|
supports_vision=model_data.get("supports_vision", False),
|
|
supports_function_calling=model_data.get("supports_function_calling", False),
|
|
supports_json_mode=model_data.get("supports_json_mode", False),
|
|
input_cost_per_1k=Decimal("0"),
|
|
output_cost_per_1k=Decimal("0"),
|
|
is_active=True,
|
|
)
|
|
result["models_added"] += 1
|
|
logger.info("Added new model: %s::%s", api.name, model_name)
|
|
|
|
# Deactivate models no longer available
|
|
for model in LLMModel.objects.filter(api=api, is_active=True):
|
|
if model.name not in discovered_model_names:
|
|
model.is_active = False
|
|
model.save(update_fields=["is_active"])
|
|
result["models_deactivated"] += 1
|
|
logger.warning("Deactivated missing model: %s::%s", api.name, model.name)
|
|
|
|
api.last_tested_at = timezone.now()
|
|
api.last_test_status = "success"
|
|
api.last_test_message = (
|
|
f"Added: {result['models_added']}, "
|
|
f"Updated: {result['models_updated']}, "
|
|
f"Deactivated: {result['models_deactivated']}"
|
|
)
|
|
api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"])
|
|
|
|
result["success"] = True
|
|
result["message"] = api.last_test_message
|
|
logger.info("API test successful: %s — %s", api.name, result["message"])
|
|
|
|
except Exception as e:
|
|
result["error"] = f"API test failed: {e}"
|
|
api.last_tested_at = timezone.now()
|
|
api.last_test_status = "failed"
|
|
api.last_test_message = result["error"]
|
|
api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"])
|
|
logger.error("API test failed for %s: %s", api.name, e, exc_info=True)
|
|
|
|
return result
|
|
|
|
|
|
def _discover_openai_models(api):
|
|
"""Discover models from an OpenAI-compatible API."""
|
|
try:
|
|
from openai import OpenAI
|
|
except ImportError:
|
|
raise ImportError("openai package required for model discovery — pip install openai")
|
|
|
|
client = OpenAI(
|
|
api_key=api.api_key or "dummy",
|
|
base_url=api.base_url,
|
|
timeout=api.timeout_seconds,
|
|
max_retries=api.max_retries,
|
|
)
|
|
discovered = []
|
|
models_response = client.models.list()
|
|
|
|
for model in models_response.data:
|
|
model_id = model.id
|
|
discovered.append(
|
|
{
|
|
"name": model_id,
|
|
"display_name": model_id,
|
|
"model_type": _infer_model_type(model_id),
|
|
"context_window": _infer_context_window(model_id),
|
|
"max_output_tokens": None,
|
|
"supports_cache": False,
|
|
"supports_vision": any(
|
|
kw in model_id.lower() for kw in ("vision", "gpt-4-turbo", "gpt-4o")
|
|
),
|
|
"supports_function_calling": any(
|
|
kw in model_id.lower() for kw in ("gpt-4", "gpt-3.5-turbo")
|
|
),
|
|
"supports_json_mode": any(
|
|
kw in model_id.lower() for kw in ("gpt-4", "gpt-3.5-turbo")
|
|
),
|
|
}
|
|
)
|
|
return discovered
|
|
|
|
|
|
def _discover_ollama_models(api):
|
|
"""Discover models from an Ollama API."""
|
|
import requests
|
|
|
|
url = f"{api.base_url.rstrip('/')}/api/tags"
|
|
discovered = []
|
|
resp = requests.get(url, timeout=10)
|
|
resp.raise_for_status()
|
|
for model in resp.json().get("models", []):
|
|
name = model["name"]
|
|
discovered.append(
|
|
{
|
|
"name": name,
|
|
"display_name": name,
|
|
"model_type": "chat",
|
|
"context_window": 4096,
|
|
"max_output_tokens": None,
|
|
"supports_cache": False,
|
|
"supports_vision": False,
|
|
"supports_function_calling": False,
|
|
"supports_json_mode": False,
|
|
}
|
|
)
|
|
return discovered
|
|
|
|
|
|
def _discover_bedrock_models(api):
|
|
"""
|
|
Discover models from Amazon Bedrock via the Mantle OpenAI-compatible endpoint.
|
|
|
|
For Bedrock APIs, the base_url is the bedrock-runtime endpoint. We derive
|
|
the Mantle endpoint from the region to list models.
|
|
"""
|
|
import requests
|
|
|
|
# Extract region from base_url (e.g. https://bedrock-runtime.us-east-1.amazonaws.com)
|
|
base = api.base_url.rstrip("/")
|
|
region = "us-east-1"
|
|
if "bedrock-runtime." in base:
|
|
# Parse region from URL
|
|
parts = base.split("bedrock-runtime.")[1].split(".")
|
|
if parts:
|
|
region = parts[0]
|
|
|
|
# Use the Mantle endpoint for model listing (OpenAI-compatible)
|
|
mantle_url = f"https://bedrock-mantle.{region}.api.aws/v1/models"
|
|
headers = {}
|
|
if api.api_key:
|
|
headers["Authorization"] = f"Bearer {api.api_key}"
|
|
|
|
discovered = []
|
|
try:
|
|
resp = requests.get(mantle_url, headers=headers, timeout=api.timeout_seconds or 30)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
|
|
for model in data.get("data", []):
|
|
model_id = model.get("id", "")
|
|
discovered.append(
|
|
{
|
|
"name": model_id,
|
|
"display_name": model_id,
|
|
"model_type": _infer_model_type(model_id),
|
|
"context_window": _infer_context_window(model_id),
|
|
"max_output_tokens": None,
|
|
"supports_cache": False,
|
|
"supports_vision": any(
|
|
kw in model_id.lower() for kw in ("claude-3", "nova", "vision")
|
|
),
|
|
"supports_function_calling": False,
|
|
"supports_json_mode": False,
|
|
}
|
|
)
|
|
except Exception as exc:
|
|
logger.warning("Bedrock Mantle model discovery failed: %s", exc)
|
|
# Fallback: return empty list (user can manually add models)
|
|
|
|
return discovered
|
|
|
|
|
|
def _infer_model_type(model_id):
|
|
"""Infer model type from its identifier."""
|
|
lower = model_id.lower()
|
|
if any(kw in lower for kw in ("embed", "embedding")):
|
|
return "embedding"
|
|
if "rerank" in lower:
|
|
return "reranker"
|
|
return "chat"
|
|
|
|
|
|
def _infer_context_window(model_id):
|
|
"""Infer context window size from model identifier."""
|
|
m = model_id.lower()
|
|
if any(kw in m for kw in ("gpt-4-turbo", "gpt-4-1106", "gpt-4-0125", "gpt-4o")):
|
|
return 128000
|
|
if "gpt-4-32k" in m:
|
|
return 32768
|
|
if "gpt-4" in m:
|
|
return 8192
|
|
if "gpt-3.5-turbo-16k" in m:
|
|
return 16384
|
|
if "gpt-3.5-turbo" in m:
|
|
return 4096
|
|
if "claude-3" in m:
|
|
return 200000
|
|
if "claude-2" in m:
|
|
return 100000
|
|
if "32k" in m:
|
|
return 32768
|
|
if "16k" in m:
|
|
return 16384
|
|
return 8192
|