Add Themis application with custom widgets, views, and utilities
- Implemented custom form widgets for date, time, and datetime fields with DaisyUI styling. - Created utility functions for formatting dates, times, and numbers according to user preferences. - Developed views for profile settings, API key management, and notifications, including health check endpoints. - Added URL configurations for Themis tests and main application routes. - Established test cases for custom widgets to ensure proper functionality and integration. - Defined project metadata and dependencies in pyproject.toml for package management.
This commit is contained in:
275
mnemosyne/llm_manager/services.py
Normal file
275
mnemosyne/llm_manager/services.py
Normal file
@@ -0,0 +1,275 @@
|
||||
"""
|
||||
Services for LLM API testing and model discovery.
|
||||
|
||||
Ported from Spelunker with Mnemosyne adaptations.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def test_llm_api(api):
|
||||
"""
|
||||
Test an LLM API connection and discover available models.
|
||||
|
||||
:param api: LLMApi instance to test.
|
||||
:returns: dict with success, models_added/updated/deactivated, message/error.
|
||||
"""
|
||||
from .models import LLMModel
|
||||
|
||||
result = {
|
||||
"success": False,
|
||||
"models_added": 0,
|
||||
"models_updated": 0,
|
||||
"models_deactivated": 0,
|
||||
"message": "",
|
||||
"error": "",
|
||||
}
|
||||
|
||||
logger.info("Testing LLM API: %s (%s) at %s", api.name, api.api_type, api.base_url)
|
||||
|
||||
try:
|
||||
if api.api_type in ("openai", "vllm"):
|
||||
discovered_models = _discover_openai_models(api)
|
||||
elif api.api_type == "ollama":
|
||||
discovered_models = _discover_ollama_models(api)
|
||||
elif api.api_type == "bedrock":
|
||||
discovered_models = _discover_bedrock_models(api)
|
||||
else:
|
||||
result["error"] = f"API type '{api.api_type}' is not yet supported for auto-discovery"
|
||||
logger.warning(result["error"])
|
||||
return result
|
||||
|
||||
if not discovered_models:
|
||||
result["error"] = "No models discovered from API"
|
||||
logger.warning("No models found for API %s", api.name)
|
||||
return result
|
||||
|
||||
logger.info("Discovered %d models from %s", len(discovered_models), api.name)
|
||||
discovered_model_names = {m["name"] for m in discovered_models}
|
||||
|
||||
with transaction.atomic():
|
||||
for model_data in discovered_models:
|
||||
model_name = model_data["name"]
|
||||
try:
|
||||
existing = LLMModel.objects.get(api=api, name=model_name)
|
||||
existing.is_active = True
|
||||
existing.display_name = model_data.get("display_name", model_name)
|
||||
existing.model_type = model_data.get("model_type", "chat")
|
||||
existing.context_window = model_data.get("context_window", 8192)
|
||||
existing.max_output_tokens = model_data.get("max_output_tokens")
|
||||
existing.supports_cache = model_data.get("supports_cache", False)
|
||||
existing.supports_vision = model_data.get("supports_vision", False)
|
||||
existing.supports_function_calling = model_data.get("supports_function_calling", False)
|
||||
existing.supports_json_mode = model_data.get("supports_json_mode", False)
|
||||
existing.save()
|
||||
result["models_updated"] += 1
|
||||
except LLMModel.DoesNotExist:
|
||||
from decimal import Decimal
|
||||
|
||||
LLMModel.objects.create(
|
||||
api=api,
|
||||
name=model_name,
|
||||
display_name=model_data.get("display_name", model_name),
|
||||
model_type=model_data.get("model_type", "chat"),
|
||||
context_window=model_data.get("context_window", 8192),
|
||||
max_output_tokens=model_data.get("max_output_tokens"),
|
||||
supports_cache=model_data.get("supports_cache", False),
|
||||
supports_vision=model_data.get("supports_vision", False),
|
||||
supports_function_calling=model_data.get("supports_function_calling", False),
|
||||
supports_json_mode=model_data.get("supports_json_mode", False),
|
||||
input_cost_per_1k=Decimal("0"),
|
||||
output_cost_per_1k=Decimal("0"),
|
||||
is_active=True,
|
||||
)
|
||||
result["models_added"] += 1
|
||||
logger.info("Added new model: %s::%s", api.name, model_name)
|
||||
|
||||
# Deactivate models no longer available
|
||||
for model in LLMModel.objects.filter(api=api, is_active=True):
|
||||
if model.name not in discovered_model_names:
|
||||
model.is_active = False
|
||||
model.save(update_fields=["is_active"])
|
||||
result["models_deactivated"] += 1
|
||||
logger.warning("Deactivated missing model: %s::%s", api.name, model.name)
|
||||
|
||||
api.last_tested_at = timezone.now()
|
||||
api.last_test_status = "success"
|
||||
api.last_test_message = (
|
||||
f"Added: {result['models_added']}, "
|
||||
f"Updated: {result['models_updated']}, "
|
||||
f"Deactivated: {result['models_deactivated']}"
|
||||
)
|
||||
api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"])
|
||||
|
||||
result["success"] = True
|
||||
result["message"] = api.last_test_message
|
||||
logger.info("API test successful: %s — %s", api.name, result["message"])
|
||||
|
||||
except Exception as e:
|
||||
result["error"] = f"API test failed: {e}"
|
||||
api.last_tested_at = timezone.now()
|
||||
api.last_test_status = "failed"
|
||||
api.last_test_message = result["error"]
|
||||
api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"])
|
||||
logger.error("API test failed for %s: %s", api.name, e, exc_info=True)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _discover_openai_models(api):
|
||||
"""Discover models from an OpenAI-compatible API."""
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
raise ImportError("openai package required for model discovery — pip install openai")
|
||||
|
||||
client = OpenAI(
|
||||
api_key=api.api_key or "dummy",
|
||||
base_url=api.base_url,
|
||||
timeout=api.timeout_seconds,
|
||||
max_retries=api.max_retries,
|
||||
)
|
||||
discovered = []
|
||||
models_response = client.models.list()
|
||||
|
||||
for model in models_response.data:
|
||||
model_id = model.id
|
||||
discovered.append(
|
||||
{
|
||||
"name": model_id,
|
||||
"display_name": model_id,
|
||||
"model_type": _infer_model_type(model_id),
|
||||
"context_window": _infer_context_window(model_id),
|
||||
"max_output_tokens": None,
|
||||
"supports_cache": False,
|
||||
"supports_vision": any(
|
||||
kw in model_id.lower() for kw in ("vision", "gpt-4-turbo", "gpt-4o")
|
||||
),
|
||||
"supports_function_calling": any(
|
||||
kw in model_id.lower() for kw in ("gpt-4", "gpt-3.5-turbo")
|
||||
),
|
||||
"supports_json_mode": any(
|
||||
kw in model_id.lower() for kw in ("gpt-4", "gpt-3.5-turbo")
|
||||
),
|
||||
}
|
||||
)
|
||||
return discovered
|
||||
|
||||
|
||||
def _discover_ollama_models(api):
|
||||
"""Discover models from an Ollama API."""
|
||||
import requests
|
||||
|
||||
url = f"{api.base_url.rstrip('/')}/api/tags"
|
||||
discovered = []
|
||||
resp = requests.get(url, timeout=10)
|
||||
resp.raise_for_status()
|
||||
for model in resp.json().get("models", []):
|
||||
name = model["name"]
|
||||
discovered.append(
|
||||
{
|
||||
"name": name,
|
||||
"display_name": name,
|
||||
"model_type": "chat",
|
||||
"context_window": 4096,
|
||||
"max_output_tokens": None,
|
||||
"supports_cache": False,
|
||||
"supports_vision": False,
|
||||
"supports_function_calling": False,
|
||||
"supports_json_mode": False,
|
||||
}
|
||||
)
|
||||
return discovered
|
||||
|
||||
|
||||
def _discover_bedrock_models(api):
|
||||
"""
|
||||
Discover models from Amazon Bedrock via the Mantle OpenAI-compatible endpoint.
|
||||
|
||||
For Bedrock APIs, the base_url is the bedrock-runtime endpoint. We derive
|
||||
the Mantle endpoint from the region to list models.
|
||||
"""
|
||||
import requests
|
||||
|
||||
# Extract region from base_url (e.g. https://bedrock-runtime.us-east-1.amazonaws.com)
|
||||
base = api.base_url.rstrip("/")
|
||||
region = "us-east-1"
|
||||
if "bedrock-runtime." in base:
|
||||
# Parse region from URL
|
||||
parts = base.split("bedrock-runtime.")[1].split(".")
|
||||
if parts:
|
||||
region = parts[0]
|
||||
|
||||
# Use the Mantle endpoint for model listing (OpenAI-compatible)
|
||||
mantle_url = f"https://bedrock-mantle.{region}.api.aws/v1/models"
|
||||
headers = {}
|
||||
if api.api_key:
|
||||
headers["Authorization"] = f"Bearer {api.api_key}"
|
||||
|
||||
discovered = []
|
||||
try:
|
||||
resp = requests.get(mantle_url, headers=headers, timeout=api.timeout_seconds or 30)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
for model in data.get("data", []):
|
||||
model_id = model.get("id", "")
|
||||
discovered.append(
|
||||
{
|
||||
"name": model_id,
|
||||
"display_name": model_id,
|
||||
"model_type": _infer_model_type(model_id),
|
||||
"context_window": _infer_context_window(model_id),
|
||||
"max_output_tokens": None,
|
||||
"supports_cache": False,
|
||||
"supports_vision": any(
|
||||
kw in model_id.lower() for kw in ("claude-3", "nova", "vision")
|
||||
),
|
||||
"supports_function_calling": False,
|
||||
"supports_json_mode": False,
|
||||
}
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Bedrock Mantle model discovery failed: %s", exc)
|
||||
# Fallback: return empty list (user can manually add models)
|
||||
|
||||
return discovered
|
||||
|
||||
|
||||
def _infer_model_type(model_id):
|
||||
"""Infer model type from its identifier."""
|
||||
lower = model_id.lower()
|
||||
if any(kw in lower for kw in ("embed", "embedding")):
|
||||
return "embedding"
|
||||
if "rerank" in lower:
|
||||
return "reranker"
|
||||
return "chat"
|
||||
|
||||
|
||||
def _infer_context_window(model_id):
|
||||
"""Infer context window size from model identifier."""
|
||||
m = model_id.lower()
|
||||
if any(kw in m for kw in ("gpt-4-turbo", "gpt-4-1106", "gpt-4-0125", "gpt-4o")):
|
||||
return 128000
|
||||
if "gpt-4-32k" in m:
|
||||
return 32768
|
||||
if "gpt-4" in m:
|
||||
return 8192
|
||||
if "gpt-3.5-turbo-16k" in m:
|
||||
return 16384
|
||||
if "gpt-3.5-turbo" in m:
|
||||
return 4096
|
||||
if "claude-3" in m:
|
||||
return 200000
|
||||
if "claude-2" in m:
|
||||
return 100000
|
||||
if "32k" in m:
|
||||
return 32768
|
||||
if "16k" in m:
|
||||
return 16384
|
||||
return 8192
|
||||
Reference in New Issue
Block a user