docs(pallas): expand LLM preflight docs and refactor health probes
This commit is contained in:
522
tests/test_health.py
Normal file
522
tests/test_health.py
Normal file
@@ -0,0 +1,522 @@
|
||||
"""Tests for pallas.health — per-provider preflight dispatch.
|
||||
|
||||
Covers the matrix documented in ``pallas/pallas/health.py``:
|
||||
|
||||
- ``anthropic`` (direct, Mantle)
|
||||
- ``openai``
|
||||
- ``generic``
|
||||
- ``bedrock`` (presence-only, no HTTP)
|
||||
- unknown / malformed provider name
|
||||
|
||||
All HTTP is faked with ``httpx.MockTransport`` so nothing touches the network.
|
||||
Tests use ``asyncio.run`` directly to match the existing convention in
|
||||
``tests/test_mantle_shims.py`` (pallas has no pytest-asyncio dependency).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from pallas import health
|
||||
|
||||
|
||||
def _run(coro):
|
||||
return asyncio.run(coro)
|
||||
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _patch_httpx(monkeypatch: pytest.MonkeyPatch, handler) -> None:
|
||||
"""Replace ``health.httpx.AsyncClient`` so validate_llm_providers uses the mock."""
|
||||
original_client = httpx.AsyncClient
|
||||
|
||||
def patched_client(*args, **kwargs):
|
||||
kwargs["transport"] = httpx.MockTransport(handler)
|
||||
return original_client(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(health.httpx, "AsyncClient", patched_client)
|
||||
|
||||
|
||||
def _patch_httpx_raising(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Install a transport that raises on any request — used to prove that
|
||||
bedrock / unknown paths make no HTTP call at all."""
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
raise AssertionError(
|
||||
f"no HTTP call should be made, but got {request.method} {request.url}"
|
||||
)
|
||||
|
||||
_patch_httpx(monkeypatch, handler)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def workspace(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
||||
"""Chdir into a clean temp workspace and isolate env variables.
|
||||
|
||||
``validate_llm_providers`` reads ``fastagent.config.yaml`` /
|
||||
``fastagent.secrets.yaml`` from cwd and also consults env vars for
|
||||
fallback; each test starts with a clean slate.
|
||||
"""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
for var in (
|
||||
"ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_BASE_URL",
|
||||
"OPENAI_API_KEY",
|
||||
"OPENAI_BASE_URL",
|
||||
"GENERIC_API_KEY",
|
||||
"GENERIC_BASE_URL",
|
||||
"AWS_BEARER_TOKEN_BEDROCK",
|
||||
"AWS_ACCESS_KEY_ID",
|
||||
"AWS_SECRET_ACCESS_KEY",
|
||||
"AWS_PROFILE",
|
||||
):
|
||||
monkeypatch.delenv(var, raising=False)
|
||||
return tmp_path
|
||||
|
||||
|
||||
# ── _mantle_root_from_anthropic_base ────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"base,expected",
|
||||
[
|
||||
(
|
||||
"https://bedrock-mantle.us-east-1.api.aws/anthropic",
|
||||
"https://bedrock-mantle.us-east-1.api.aws",
|
||||
),
|
||||
(
|
||||
"https://bedrock-mantle.us-east-1.api.aws/anthropic/",
|
||||
"https://bedrock-mantle.us-east-1.api.aws",
|
||||
),
|
||||
(
|
||||
"https://bedrock-mantle.us-east-1.api.aws",
|
||||
"https://bedrock-mantle.us-east-1.api.aws",
|
||||
),
|
||||
(
|
||||
"https://example.com/proxy/anthropic",
|
||||
"https://example.com/proxy",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_mantle_root_from_anthropic_base(base: str, expected: str) -> None:
|
||||
assert health._mantle_root_from_anthropic_base(base) == expected
|
||||
|
||||
|
||||
# ── _check_anthropic (direct + Mantle share this probe) ──────────────────────
|
||||
|
||||
|
||||
def test_check_anthropic_success_direct() -> None:
|
||||
captured: list[httpx.Request] = []
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
captured.append(request)
|
||||
return httpx.Response(200, json={"id": "claude-sonnet-4-5"})
|
||||
|
||||
async def go() -> str | None:
|
||||
async with httpx.AsyncClient(transport=httpx.MockTransport(handler)) as client:
|
||||
return await health._check_anthropic(
|
||||
client,
|
||||
"sk-ant-real",
|
||||
"claude-sonnet-4-5",
|
||||
"https://api.anthropic.com/v1",
|
||||
)
|
||||
|
||||
assert _run(go()) is None
|
||||
assert str(captured[0].url) == "https://api.anthropic.com/v1/models/claude-sonnet-4-5"
|
||||
assert captured[0].headers["x-api-key"] == "sk-ant-real"
|
||||
assert captured[0].headers["anthropic-version"] == "2023-06-01"
|
||||
|
||||
|
||||
def test_check_anthropic_success_mantle_root() -> None:
|
||||
captured: list[httpx.Request] = []
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
captured.append(request)
|
||||
return httpx.Response(200, json={"id": "anthropic.claude-opus-4-7"})
|
||||
|
||||
async def go() -> str | None:
|
||||
async with httpx.AsyncClient(transport=httpx.MockTransport(handler)) as client:
|
||||
return await health._check_anthropic(
|
||||
client,
|
||||
"sk-bedrock-fake",
|
||||
"anthropic.claude-opus-4-7",
|
||||
"https://bedrock-mantle.us-east-1.api.aws/v1",
|
||||
)
|
||||
|
||||
assert _run(go()) is None
|
||||
# Must hit the Mantle region root, not `/anthropic/v1/models/...`.
|
||||
assert str(captured[0].url) == (
|
||||
"https://bedrock-mantle.us-east-1.api.aws/v1"
|
||||
"/models/anthropic.claude-opus-4-7"
|
||||
)
|
||||
|
||||
|
||||
def test_check_anthropic_401() -> None:
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(401, json={"error": "invalid_api_key"})
|
||||
|
||||
async def go() -> str | None:
|
||||
async with httpx.AsyncClient(transport=httpx.MockTransport(handler)) as client:
|
||||
return await health._check_anthropic(
|
||||
client,
|
||||
"bad-key",
|
||||
"claude-sonnet-4-5",
|
||||
"https://api.anthropic.com/v1",
|
||||
)
|
||||
|
||||
assert _run(go()) == "API request failed (401)"
|
||||
|
||||
|
||||
def test_check_anthropic_404_model_missing() -> None:
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(404, json={})
|
||||
|
||||
async def go() -> str | None:
|
||||
async with httpx.AsyncClient(transport=httpx.MockTransport(handler)) as client:
|
||||
return await health._check_anthropic(
|
||||
client,
|
||||
"key",
|
||||
"claude-foo",
|
||||
"https://api.anthropic.com/v1",
|
||||
)
|
||||
|
||||
assert _run(go()) == "model 'claude-foo' not found"
|
||||
|
||||
|
||||
# ── validate_llm_providers: anthropic direct ─────────────────────────────────
|
||||
|
||||
|
||||
def test_validate_anthropic_direct_ok(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: anthropic.claude-sonnet-4-5\n"
|
||||
)
|
||||
(workspace / "fastagent.secrets.yaml").write_text(
|
||||
'anthropic:\n api_key: "sk-ant-real"\n'
|
||||
)
|
||||
|
||||
captured: list[httpx.Request] = []
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
captured.append(request)
|
||||
return httpx.Response(200, json={"id": "claude-sonnet-4-5"})
|
||||
|
||||
_patch_httpx(monkeypatch, handler)
|
||||
|
||||
assert _run(health.validate_llm_providers(timeout=1.0)) == {
|
||||
"anthropic": {"status": "ok", "model": "claude-sonnet-4-5"}
|
||||
}
|
||||
assert str(captured[0].url) == "https://api.anthropic.com/v1/models/claude-sonnet-4-5"
|
||||
|
||||
|
||||
def test_validate_anthropic_missing_key(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: anthropic.claude-sonnet-4-5\n"
|
||||
)
|
||||
# No secrets file at all → ProviderKeyManager raises ProviderKeyError.
|
||||
_patch_httpx_raising(monkeypatch)
|
||||
|
||||
results = _run(health.validate_llm_providers(timeout=1.0))
|
||||
assert results["anthropic"]["status"] == "error"
|
||||
assert "API key" in results["anthropic"]["message"]
|
||||
|
||||
|
||||
# ── validate_llm_providers: anthropic via Mantle ─────────────────────────────
|
||||
|
||||
|
||||
def test_validate_anthropic_mantle_uses_region_root(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: anthropic.claude-opus-4-7\n"
|
||||
"anthropic:\n"
|
||||
' base_url: "https://bedrock-mantle.us-east-1.api.aws/anthropic"\n'
|
||||
)
|
||||
(workspace / "fastagent.secrets.yaml").write_text(
|
||||
'anthropic:\n api_key: "sk-bedrock-fake"\n'
|
||||
)
|
||||
|
||||
captured: list[httpx.Request] = []
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
captured.append(request)
|
||||
return httpx.Response(200, json={"id": "anthropic.claude-opus-4-7"})
|
||||
|
||||
_patch_httpx(monkeypatch, handler)
|
||||
|
||||
assert _run(health.validate_llm_providers(timeout=1.0)) == {
|
||||
"anthropic": {"status": "ok", "model": "anthropic.claude-opus-4-7"}
|
||||
}
|
||||
# Must strip the `/anthropic` suffix AND apply the wire-name prefix.
|
||||
assert str(captured[0].url) == (
|
||||
"https://bedrock-mantle.us-east-1.api.aws/v1"
|
||||
"/models/anthropic.claude-opus-4-7"
|
||||
)
|
||||
|
||||
|
||||
def test_validate_anthropic_mantle_401(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: anthropic.claude-opus-4-7\n"
|
||||
"anthropic:\n"
|
||||
' base_url: "https://bedrock-mantle.us-east-1.api.aws/anthropic"\n'
|
||||
)
|
||||
(workspace / "fastagent.secrets.yaml").write_text(
|
||||
'anthropic:\n api_key: "sk-bogus"\n'
|
||||
)
|
||||
|
||||
captured: list[httpx.Request] = []
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
captured.append(request)
|
||||
return httpx.Response(401, json={"error": "unauthorized"})
|
||||
|
||||
_patch_httpx(monkeypatch, handler)
|
||||
|
||||
results = _run(health.validate_llm_providers(timeout=1.0))
|
||||
assert results == {
|
||||
"anthropic": {
|
||||
"status": "error",
|
||||
"model": "anthropic.claude-opus-4-7",
|
||||
"message": "API request failed (401)",
|
||||
}
|
||||
}
|
||||
assert "bedrock-mantle" in str(captured[0].url)
|
||||
|
||||
|
||||
# ── validate_llm_providers: openai ───────────────────────────────────────────
|
||||
|
||||
|
||||
def test_validate_openai_model_in_list(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: openai.gpt-4o-mini\n"
|
||||
)
|
||||
(workspace / "fastagent.secrets.yaml").write_text(
|
||||
'openai:\n api_key: "sk-openai-real"\n'
|
||||
)
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(
|
||||
200,
|
||||
json={"data": [{"id": "gpt-4o-mini"}, {"id": "gpt-4o"}]},
|
||||
)
|
||||
|
||||
_patch_httpx(monkeypatch, handler)
|
||||
|
||||
results = _run(health.validate_llm_providers(timeout=1.0))
|
||||
assert results["openai"]["status"] == "ok"
|
||||
assert results["openai"]["model"] == "gpt-4o-mini"
|
||||
|
||||
|
||||
def test_validate_openai_model_missing_from_list(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: openai.gpt-nonexistent\n"
|
||||
)
|
||||
(workspace / "fastagent.secrets.yaml").write_text(
|
||||
'openai:\n api_key: "sk-openai-real"\n'
|
||||
)
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(200, json={"data": [{"id": "gpt-4o-mini"}]})
|
||||
|
||||
_patch_httpx(monkeypatch, handler)
|
||||
|
||||
results = _run(health.validate_llm_providers(timeout=1.0))
|
||||
assert results["openai"]["status"] == "error"
|
||||
assert "gpt-nonexistent" in results["openai"]["message"]
|
||||
assert "gpt-4o-mini" in results["openai"]["message"] # includes available list
|
||||
|
||||
|
||||
# ── validate_llm_providers: generic ──────────────────────────────────────────
|
||||
|
||||
|
||||
def test_validate_generic_ok_regardless_of_body(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""llama.cpp returns a non-OpenAI-shaped ``/v1/models`` payload; we only
|
||||
care that the endpoint responds 200."""
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: generic.Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf\n"
|
||||
"generic:\n"
|
||||
' base_url: "http://nyx.helu.ca:22079/v1"\n'
|
||||
)
|
||||
# generic requires no api_key; no secrets file needed.
|
||||
|
||||
captured: list[httpx.Request] = []
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
captured.append(request)
|
||||
# Match llama.cpp's shape (Ollama-style `models` alongside OpenAI `data`).
|
||||
return httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"models": [{"name": "Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf"}],
|
||||
"object": "list",
|
||||
"data": [{"id": "Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf"}],
|
||||
},
|
||||
)
|
||||
|
||||
_patch_httpx(monkeypatch, handler)
|
||||
|
||||
assert _run(health.validate_llm_providers(timeout=1.0)) == {
|
||||
"generic": {
|
||||
"status": "ok",
|
||||
"model": "Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf",
|
||||
}
|
||||
}
|
||||
assert str(captured[0].url) == "http://nyx.helu.ca:22079/v1/models"
|
||||
|
||||
|
||||
def test_validate_generic_unreachable(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: generic.Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf\n"
|
||||
"generic:\n"
|
||||
' base_url: "http://nyx.helu.ca:22079/v1"\n'
|
||||
)
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
raise httpx.ConnectError("connection refused")
|
||||
|
||||
_patch_httpx(monkeypatch, handler)
|
||||
|
||||
results = _run(health.validate_llm_providers(timeout=1.0))
|
||||
assert results["generic"]["status"] == "error"
|
||||
assert "unreachable" in results["generic"]["message"].lower()
|
||||
|
||||
|
||||
def test_validate_generic_503(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: generic.Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf\n"
|
||||
"generic:\n"
|
||||
' base_url: "http://nyx.helu.ca:22079/v1"\n'
|
||||
)
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(503)
|
||||
|
||||
_patch_httpx(monkeypatch, handler)
|
||||
|
||||
results = _run(health.validate_llm_providers(timeout=1.0))
|
||||
assert results["generic"] == {
|
||||
"status": "error",
|
||||
"model": "Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf",
|
||||
"message": "API request failed (503)",
|
||||
}
|
||||
|
||||
|
||||
# ── validate_llm_providers: bedrock (no HTTP) ────────────────────────────────
|
||||
|
||||
|
||||
def test_validate_bedrock_ok_with_bearer(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: bedrock.anthropic.claude-sonnet-4-6\n"
|
||||
)
|
||||
monkeypatch.setenv("AWS_BEARER_TOKEN_BEDROCK", "abs-fake")
|
||||
_patch_httpx_raising(monkeypatch) # any HTTP call is a test failure
|
||||
|
||||
results = _run(health.validate_llm_providers(timeout=1.0))
|
||||
assert results["bedrock"]["status"] == "ok"
|
||||
assert results["bedrock"]["model"] == "anthropic.claude-sonnet-4-6"
|
||||
|
||||
|
||||
def test_validate_bedrock_no_credentials(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: bedrock.anthropic.claude-sonnet-4-6\n"
|
||||
)
|
||||
# The real user has an ~/.aws/credentials file which would cause a false
|
||||
# positive; redirect HOME so Path.home() / ".aws" does not exist.
|
||||
monkeypatch.setenv("HOME", str(workspace))
|
||||
_patch_httpx_raising(monkeypatch)
|
||||
|
||||
results = _run(health.validate_llm_providers(timeout=1.0))
|
||||
assert results["bedrock"]["status"] == "error"
|
||||
assert "AWS credentials" in results["bedrock"]["message"]
|
||||
|
||||
|
||||
# ── validate_llm_providers: malformed / unknown provider ─────────────────────
|
||||
|
||||
|
||||
def test_validate_default_model_missing_prefix(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text("default_model: just-a-name\n")
|
||||
_patch_httpx_raising(monkeypatch)
|
||||
|
||||
results = _run(health.validate_llm_providers(timeout=1.0))
|
||||
assert results["unknown"]["status"] == "error"
|
||||
assert "provider prefix" in results["unknown"]["message"]
|
||||
|
||||
|
||||
def test_validate_unknown_provider(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: imaginary.some-model\n"
|
||||
)
|
||||
_patch_httpx_raising(monkeypatch)
|
||||
|
||||
results = _run(health.validate_llm_providers(timeout=1.0))
|
||||
assert results["imaginary"]["status"] == "error"
|
||||
assert "unknown provider" in results["imaginary"]["message"]
|
||||
|
||||
|
||||
# ── get_health() payload ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_get_health_reports_generic_ok(
|
||||
workspace: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""End-to-end: after a successful generic preflight, get_health() should
|
||||
return status=ok with no LLM error in the message. This is the exact
|
||||
regression case that was showing up as ``LLM: generic: error`` in Daedalus.
|
||||
"""
|
||||
(workspace / "fastagent.config.yaml").write_text(
|
||||
"default_model: generic.Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf\n"
|
||||
"generic:\n"
|
||||
' base_url: "http://nyx.helu.ca:22079/v1"\n'
|
||||
)
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(200, json={"data": []})
|
||||
|
||||
_patch_httpx(monkeypatch, handler)
|
||||
_run(health.validate_llm_providers(timeout=1.0))
|
||||
|
||||
# Simulate the MCP `get_health` tool by calling check_downstream_health
|
||||
# with an empty server map and composing the message the same way
|
||||
# register_health_tool does.
|
||||
async def call() -> dict:
|
||||
result = await health.check_downstream_health({}, timeout=1.0)
|
||||
active = health._llm_status.get(health._active_provider)
|
||||
if active is not None and active.get("status") != "ok":
|
||||
result["status"] = "degraded"
|
||||
existing = result.get("message", "")
|
||||
msg = (
|
||||
f"LLM: {health._active_provider}: "
|
||||
f"{active.get('message', 'unknown error')}"
|
||||
)
|
||||
result["message"] = f"{existing}; {msg}" if existing else msg
|
||||
return result
|
||||
|
||||
final = _run(call())
|
||||
assert final["status"] == "ok"
|
||||
assert "LLM" not in final.get("message", "")
|
||||
Reference in New Issue
Block a user