diff --git a/mnemosyne/llm_manager/services.py b/mnemosyne/llm_manager/services.py index ada1b21..944a6ae 100644 --- a/mnemosyne/llm_manager/services.py +++ b/mnemosyne/llm_manager/services.py @@ -34,6 +34,15 @@ def test_llm_api(api): try: if api.api_type in ("openai", "vllm"): + convention_error = _check_openai_v1_convention(api) + if convention_error: + result["error"] = convention_error + logger.error("API %s base_url convention check failed: %s", api.name, convention_error) + api.last_tested_at = timezone.now() + api.last_test_status = "failed" + api.last_test_message = convention_error + api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"]) + return result discovered_models = _discover_openai_models(api) elif api.api_type == "ollama": discovered_models = _discover_ollama_models(api) @@ -121,6 +130,37 @@ def test_llm_api(api): return result +def _check_openai_v1_convention(api): + # Why: every runtime client in this repo (embedding_client, vision, + # concepts, reranker — see reranker.py:82-87) expects base_url to be + # the OpenAI-style /v1 root and appends path-only segments like + # "/embeddings". The OpenAI SDK we use for discovery is tolerant of + # base_url without /v1 (it auto-adds it for the probe), so a malformed + # base_url silently passes Test & Discover and then 404s at embed time. + # Surface that mismatch here as a failure with a clear explanation. + import requests + + base_url = api.base_url.rstrip("/") + if base_url.endswith("/v1"): + return None + + try: + probe = requests.get(f"{base_url}/v1/models", timeout=10) + except requests.RequestException: + return None # Let the SDK call surface the real connectivity error. + + if probe.status_code == 200: + return ( + f"base_url '{api.base_url}' is missing the '/v1' suffix that the rest " + f"of Mnemosyne expects. A probe of '{base_url}/v1/models' succeeded, " + f"so the server is up — but at embed/chat/rerank time the runtime " + f"clients build URLs like '{base_url}/embeddings' which will 404. " + f"Set base_url to '{base_url}/v1' and re-test." + ) + + return None + + def _discover_openai_models(api): """Discover models from an OpenAI-compatible API.""" try: