fix(llm_manager): fail Test & Discover when openai base_url is missing /v1

The OpenAI SDK used by _discover_openai_models tolerates a base_url without /v1 (it auto-adds it for the probe), but every runtime client (embedding_client, vision, concepts, reranker) treats base_url as the /v1 root and appends path-only segments. A non-conforming base_url silently passed Test & Discover and then 404'd at embed/chat/rerank time. Add _check_openai_v1_convention() which probes {base_url}/v1/models when the URL doesn't end in /v1; on 200, fail the test with an explicit "set base_url to .../v1 and re-test" message that points at the exact bare-vs-/v1 mismatch. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 21:21:26 -04:00
parent 7d95133c74
commit bc80d90b38
1 changed files with 40 additions and 0 deletions
--- a/mnemosyne/llm_manager/services.py
+++ b/mnemosyne/llm_manager/services.py
@@ -34,6 +34,15 @@ def test_llm_api(api):

    try:
        if api.api_type in ("openai", "vllm"):
+            convention_error = _check_openai_v1_convention(api)
+            if convention_error:
+                result["error"] = convention_error
+                logger.error("API %s base_url convention check failed: %s", api.name, convention_error)
+                api.last_tested_at = timezone.now()
+                api.last_test_status = "failed"
+                api.last_test_message = convention_error
+                api.save(update_fields=["last_tested_at", "last_test_status", "last_test_message"])
+                return result
            discovered_models = _discover_openai_models(api)
        elif api.api_type == "ollama":
            discovered_models = _discover_ollama_models(api)
@@ -121,6 +130,37 @@ def test_llm_api(api):
    return result


+def _check_openai_v1_convention(api):
+    # Why: every runtime client in this repo (embedding_client, vision,
+    # concepts, reranker — see reranker.py:82-87) expects base_url to be
+    # the OpenAI-style /v1 root and appends path-only segments like
+    # "/embeddings". The OpenAI SDK we use for discovery is tolerant of
+    # base_url without /v1 (it auto-adds it for the probe), so a malformed
+    # base_url silently passes Test & Discover and then 404s at embed time.
+    # Surface that mismatch here as a failure with a clear explanation.
+    import requests
+
+    base_url = api.base_url.rstrip("/")
+    if base_url.endswith("/v1"):
+        return None
+
+    try:
+        probe = requests.get(f"{base_url}/v1/models", timeout=10)
+    except requests.RequestException:
+        return None  # Let the SDK call surface the real connectivity error.
+
+    if probe.status_code == 200:
+        return (
+            f"base_url '{api.base_url}' is missing the '/v1' suffix that the rest "
+            f"of Mnemosyne expects. A probe of '{base_url}/v1/models' succeeded, "
+            f"so the server is up — but at embed/chat/rerank time the runtime "
+            f"clients build URLs like '{base_url}/embeddings' which will 404. "
+            f"Set base_url to '{base_url}/v1' and re-test."
+        )
+
+    return None
+
+
 def _discover_openai_models(api):
    """Discover models from an OpenAI-compatible API."""
    try: