From bc80d90b38143db87b01d5b0407e97421753855e Mon Sep 17 00:00:00 2001 From: Robert Helewka Date: Sat, 23 May 2026 21:21:26 -0400 Subject: [PATCH] fix(llm_manager): fail Test & Discover when openai base_url is missing /v1 The OpenAI SDK used by _discover_openai_models tolerates a base_url without /v1 (it auto-adds it for the probe), but every runtime client (embedding_client, vision, concepts, reranker) treats base_url as the /v1 root and appends path-only segments. A non-conforming base_url silently passed Test & Discover and then 404'd at embed/chat/rerank time. Add _check_openai_v1_convention() which probes {base_url}/v1/models when the URL doesn't end in /v1; on 200, fail the test with an explicit "set base_url to .../v1 and re-test" message that points at the exact bare-vs-/v1 mismatch. Co-Authored-By: Claude Opus 4.7 --- mnemosyne/llm_manager/services.py | 40 +++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/mnemosyne/llm_manager/services.py b/mnemosyne/llm_manager/services.py index ada1b21..944a6ae 100644 --- a/mnemosyne/llm_manager/services.py +++ b/mnemosyne/llm_manager/services.py @@ -34,6 +34,15 @@ def test_llm_api(api): try: if api.api_type in ("openai", "vllm"): + convention_error = _check_openai_v1_convention(api) + if convention_error: + result["error"] = convention_error + logger.error("API %s base_url convention check failed: %s", api.name, convention_error) + api.last_tested_at = timezone.now() + api.last_test_status = "failed" + api.last_test_message = convention_error + api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"]) + return result discovered_models = _discover_openai_models(api) elif api.api_type == "ollama": discovered_models = _discover_ollama_models(api) @@ -121,6 +130,37 @@ def test_llm_api(api): return result +def _check_openai_v1_convention(api): + # Why: every runtime client in this repo (embedding_client, vision, + # concepts, reranker — see reranker.py:82-87) expects base_url to be + # the OpenAI-style /v1 root and appends path-only segments like + # "/embeddings". The OpenAI SDK we use for discovery is tolerant of + # base_url without /v1 (it auto-adds it for the probe), so a malformed + # base_url silently passes Test & Discover and then 404s at embed time. + # Surface that mismatch here as a failure with a clear explanation. + import requests + + base_url = api.base_url.rstrip("/") + if base_url.endswith("/v1"): + return None + + try: + probe = requests.get(f"{base_url}/v1/models", timeout=10) + except requests.RequestException: + return None # Let the SDK call surface the real connectivity error. + + if probe.status_code == 200: + return ( + f"base_url '{api.base_url}' is missing the '/v1' suffix that the rest " + f"of Mnemosyne expects. A probe of '{base_url}/v1/models' succeeded, " + f"so the server is up — but at embed/chat/rerank time the runtime " + f"clients build URLs like '{base_url}/embeddings' which will 404. " + f"Set base_url to '{base_url}/v1' and re-test." + ) + + return None + + def _discover_openai_models(api): """Discover models from an OpenAI-compatible API.""" try: