From 75d529cf168e9c584301e225acf7e7a8fed5e824 Mon Sep 17 00:00:00 2001 From: Robert Helewka Date: Tue, 12 May 2026 11:16:22 -0400 Subject: [PATCH] docs: update Mantle setup to reflect automatic shim detection --- README.md | 37 +++++---- docs/bedrock.md | 48 ++++++------ pallas/mantle_shims.py | 145 ++++++++++++++++++++++++++++++++++++ pallas/server.py | 100 +++++++++---------------- pyproject.toml | 6 ++ tests/test_mantle_shims.py | 149 +++++++++++++++++++++++++++++++++++++ 6 files changed, 380 insertions(+), 105 deletions(-) create mode 100644 pallas/mantle_shims.py create mode 100644 tests/test_mantle_shims.py diff --git a/README.md b/README.md index 60838f7..9f59663 100644 --- a/README.md +++ b/README.md @@ -89,30 +89,35 @@ model_capabilities: vision: false context_window: 200000 max_output_tokens: 32000 - mantle: false # optional — see "Mantle override" below ``` Capabilities are published in the registry and used to register unknown models with fast-agent's `ModelDatabase`. -### Mantle override (`model_capabilities.mantle: true`) +### AWS Bedrock Mantle — automatic shims -Set this when the `anthropic.base_url` points at the AWS Bedrock **Mantle** -endpoint (`https://bedrock-mantle.{region}.api.aws/anthropic`). Pallas then -installs a provider-specific override for `(Provider.ANTHROPIC, model_name)` -in fast-agent's `ModelDatabase._PROVIDER_MODEL_OVERRIDES` that clones the -model's base parameters but strips the features Mantle rejects: +When `anthropic.base_url` points at a Bedrock Mantle endpoint +(`https://bedrock-mantle.{region}.api.aws/anthropic`), Pallas auto-detects it +at startup and installs two compatibility shims via `pallas.mantle_shims`. +No config flag is required. -- `anthropic_required_betas` — no `anthropic-beta: ...` header -- `reasoning` / `reasoning_effort_spec` — no extended-thinking request -- `anthropic_task_budget_supported` — no task budget -- `anthropic_web_fetch_version` / `anthropic_web_search_version` — no web tools -- `cache_ttl` — prompt caching disabled +**Shim 1 — wire-name prefix.** Mantle requires the full `anthropic.` +wire id (e.g. `anthropic.claude-opus-4-7`). Fast-agent's model-spec parser +would otherwise strip the `anthropic.` prefix, causing a misleading +`404 "The model '...' does not exist"`. The shim registers the prefixed +forms in `ModelDatabase._PROVIDER_WIRE_MODEL_NAMES`. -Without this flag, fast-agent sends its default beta headers and `thinking` -parameters for modern Claude models (e.g. Opus 4.7, Sonnet 4.6) which Mantle -rejects with a misleading `404 "The model '...' does not exist"`. See -`docs/bedrock.md` for the full configuration walkthrough. +**Shim 2 — strip `caller: null` from replayed `tool_use` blocks.** Anthropic +SDK 0.100.x leaks `caller: null` onto serialised `BetaToolUseBlock` params +([upstream issue #1454](https://github.com/anthropics/anthropic-sdk-python/issues/1454)). +`api.anthropic.com` silently tolerates the extra field; Mantle rejects it +with `tool_use.caller: Input should be a valid dictionary or object`, which +breaks the MCP tool-use loop on the second turn. The shim monkeypatches +`AnthropicConverter._deserialize_assistant_raw_blocks` and +`_append_server_tool_channel_blocks` to pop the field before history is +re-sent. + +See `docs/bedrock.md` for the full configuration walkthrough. --- diff --git a/docs/bedrock.md b/docs/bedrock.md index cc8b46c..b97cd7f 100644 --- a/docs/bedrock.md +++ b/docs/bedrock.md @@ -211,36 +211,38 @@ Mantle exposes the Anthropic Messages API for supported Claude models. Fast-agen ```yaml default_model: anthropic.claude-opus-4-7 -# ── Model Capabilities ────────────────────────────────────────────────────── -# mantle: true is REQUIRED — it installs a Pallas-level provider override that -# strips the features the Mantle endpoint rejects (anthropic-beta headers, -# extended thinking, task budget, web tools, prompt caching). Without this -# flag fast-agent sends those features and Mantle returns a misleading -# 404 "model does not exist" error. -model_capabilities: - vision: true - context_window: 1000000 - max_output_tokens: 128000 - mantle: true - # ── Anthropic provider pointing at Mantle ──────────────────────────────────── anthropic: base_url: "https://bedrock-mantle.us-east-1.api.aws/anthropic" ``` +That's the whole configuration. Pallas auto-detects the +`bedrock-mantle` hostname in `anthropic.base_url` at startup and installs +two compatibility shims so fast-agent's default request shape matches +what Mantle expects (see `pallas/mantle_shims.py`): + +1. **Wire-name prefix** — re-adds the `anthropic.` prefix that fast-agent's + parser strips off, because Mantle requires the full + `anthropic.claude-opus-4-7` wire id. Without this shim you get + `404 "The model '...' does not exist"`. + +2. **`caller: null` strip** — drops the stray `caller` field Anthropic + SDK 0.100.x leaks onto replayed `tool_use` blocks (upstream issue + [anthropics/anthropic-sdk-python#1454](https://github.com/anthropics/anthropic-sdk-python/issues/1454)). + Mantle's validator rejects `caller: null` with `"tool_use.caller: + Input should be a valid dictionary or object"`, which would otherwise + break the MCP tool-use loop on the second turn. + The Anthropic SDK appends `/v1/messages` to `base_url` automatically. -> **Why `mantle: true` is required.** Fast-agent's built-in `ModelDatabase` -> entries for Claude Opus 4.7 and Haiku 4.5 declare features that the -> Anthropic API supports but the Mantle endpoint rejects — -> `anthropic-beta: code-execution-web-tools-...` headers, extended thinking, -> task budget, web search/fetch tools, and prompt caching in some -> configurations. When Mantle sees a request carrying those features it -> responds with a confusingly generic `{"type": "not_found_error", -> "message": "The model '...' does not exist"}`. Pallas reads the `mantle` -> flag and writes an entry into fast-agent's `_PROVIDER_MODEL_OVERRIDES` -> dict for `(Provider.ANTHROPIC, )` that strips those fields, so -> fast-agent sends a plain Messages API request that Mantle accepts. +**Feature support.** Mantle accepts the same Messages API request shape +as `api.anthropic.com` once the shims are in place, including full MCP +tool use (`tools`, `tool_use`/`tool_result` content blocks). Extended +thinking, task budget, web_fetch/web_search server tools, and explicit +prompt caching (`cache_control`) are not available via Mantle and should +be left off in agent code when targeting Mantle — fast-agent's +`ModelDatabase` entries already disable the ones the Anthropic SDK 0.100.x +would otherwise auto-attach. ### `fastagent.secrets.yaml` diff --git a/pallas/mantle_shims.py b/pallas/mantle_shims.py new file mode 100644 index 0000000..17a448f --- /dev/null +++ b/pallas/mantle_shims.py @@ -0,0 +1,145 @@ +"""AWS Bedrock Mantle compatibility shims for fast-agent. + +Mantle is AWS's Anthropic-Messages-API-compatible gateway, hosted at +``https://bedrock-mantle.{region}.api.aws/anthropic``. Fast-agent talks to it +via its built-in ``anthropic`` provider, but two layers of reshaping are needed +before the wire traffic is valid: + +1. **Model-name prefix.** Mantle requires the full ``anthropic.`` wire + id (e.g. ``anthropic.claude-opus-4-7``). Fast-agent's model-spec parser + treats the ``anthropic.`` prefix as the provider hint and strips it off + the wire name. We re-register the prefixed forms via + ``ModelDatabase._PROVIDER_WIRE_MODEL_NAMES`` so the right id goes out. + +2. **``caller: null`` leakage on replayed ``tool_use`` blocks.** Anthropic + SDK 0.100.x ``BetaToolUseBlock`` carries an optional ``caller`` field; + the matching ``BetaToolUseBlockParam`` TypedDict declares it required. + Fast-agent's multipart converter re-serialises assistant history with + ``exclude_none=False``, producing ``{"type": "tool_use", ..., "caller": null}``. + ``api.anthropic.com`` silently accepts that; Mantle rejects it as + ``tool_use.caller: Input should be a valid dictionary or object``, + breaking the tool-use loop on the second turn. We strip ``caller`` from + any ``tool_use`` dict emitted by the two static methods that feed + replayed history back into the wire. + + Upstream SDK tracker: https://github.com/anthropics/anthropic-sdk-python/issues/1454 + +Both shims are idempotent and may be installed at process startup before any +fast-agent ``FastAgent`` instance is constructed. +""" +from __future__ import annotations + +import logging +from collections.abc import Mapping, Sequence +from typing import Any, cast + +logger = logging.getLogger(__name__) + + +# ── Model ids known to be served on Mantle (keep in sync with AWS docs). ── +# The key is fast-agent's internal model_name (provider prefix stripped), +# the value is the wire id Mantle expects. +MANTLE_WIRE_NAMES: dict[str, str] = { + "claude-haiku-4-5": "anthropic.claude-haiku-4-5", + "claude-opus-4-7": "anthropic.claude-opus-4-7", +} + + +def is_mantle_base_url(base_url: str | None) -> bool: + """Return True if the given anthropic base_url points at Mantle.""" + if not base_url: + return False + return "bedrock-mantle" in base_url + + +# ── Shim 1: model-name prefix ──────────────────────────────────────────────── + +def install_wire_name_prefix() -> None: + """Register the prefixed wire ids for known Mantle-hosted Claude models.""" + from fast_agent.llm.model_database import ModelDatabase + from fast_agent.llm.provider_types import Provider + + for fa_name, wire_name in MANTLE_WIRE_NAMES.items(): + key = (Provider.ANTHROPIC, ModelDatabase.normalize_model_name(fa_name)) + ModelDatabase._PROVIDER_WIRE_MODEL_NAMES[key] = wire_name # noqa: SLF001 + + logger.info( + "Mantle wire-name shim installed for models: %s", + ", ".join(sorted(MANTLE_WIRE_NAMES.keys())), + ) + + +# ── Shim 2: strip `caller` from replayed tool_use blocks ───────────────────── + +def _strip_tool_use_caller(blocks: list[Any]) -> list[Any]: + """Remove the stray ``caller`` field Anthropic SDK 0.100.x leaks into + replayed ``tool_use`` blocks. Idempotent; only touches dicts whose + ``type == "tool_use"``. + """ + for block in blocks: + if isinstance(block, dict) and block.get("type") == "tool_use": + block.pop("caller", None) + return blocks + + +_tool_use_patch_installed = False + + +def install_tool_use_caller_strip() -> None: + """Monkeypatch ``AnthropicConverter`` to drop ``caller`` from replayed + ``tool_use`` blocks. Safe to call more than once; subsequent calls are + no-ops. + """ + global _tool_use_patch_installed + if _tool_use_patch_installed: + return + + from fast_agent.llm.provider.anthropic.multipart_converter_anthropic import ( + AnthropicConverter, + ) + + original_deserialize = AnthropicConverter._deserialize_assistant_raw_blocks # noqa: SLF001 + + def patched_deserialize( + channels: Mapping[str, Sequence[Any]], + ) -> list[Any]: + result = original_deserialize(channels) + return cast("list[Any]", _strip_tool_use_caller(list(result))) + + AnthropicConverter._deserialize_assistant_raw_blocks = staticmethod( # noqa: SLF001 + patched_deserialize + ) + + original_append = AnthropicConverter._append_server_tool_channel_blocks # noqa: SLF001 + + def patched_append( + channels: Mapping[str, Sequence[Any]] | None, + destination: list[Any], + ) -> None: + original_append(channels, destination) + _strip_tool_use_caller(destination) + + AnthropicConverter._append_server_tool_channel_blocks = staticmethod( # noqa: SLF001 + patched_append + ) + + _tool_use_patch_installed = True + logger.info("Mantle tool_use.caller strip shim installed") + + +# ── Orchestrator ───────────────────────────────────────────────────────────── + +def install_all() -> None: + """Install all Mantle shims. Call once at process startup.""" + install_wire_name_prefix() + install_tool_use_caller_strip() + + +def maybe_install(anthropic_base_url: str | None) -> bool: + """Install shims only when ``anthropic_base_url`` is a Mantle endpoint. + Returns True if the shims were installed. + """ + if not is_mantle_base_url(anthropic_base_url): + return False + install_all() + return True diff --git a/pallas/server.py b/pallas/server.py index e25c1a5..2621b8d 100644 --- a/pallas/server.py +++ b/pallas/server.py @@ -123,84 +123,38 @@ def _preflight_mcp_servers(agent_name: str, servers: dict[str, dict]) -> None: # ── Model registration ──────────────────────────────────────────────────────── def _register_one_model(model_spec: str, capabilities: dict) -> None: - """Register a single model with fast-agent's ModelDatabase. + """Register a single unknown model with fast-agent's ModelDatabase. - Two cases: - - 1. **Unknown model** — if fast-agent has no built-in entry for this model, - register a minimal ``ModelParameters`` with the declared capabilities. - - 2. **Mantle-hosted model** (``capabilities.mantle: true``) — regardless of - whether the model has a built-in entry, install a provider-specific - override for ``(Provider.ANTHROPIC, model_name)`` in - ``_PROVIDER_MODEL_OVERRIDES`` that strips the features the AWS Bedrock - Mantle endpoint rejects: - - - ``anthropic_required_betas`` (no ``anthropic-beta`` header) - - ``reasoning`` / ``reasoning_effort_spec`` (no extended-thinking request) - - ``anthropic_task_budget_supported`` - - ``anthropic_web_fetch_version`` / ``anthropic_web_search_version`` - - ``cache_ttl`` (prompt caching is not advertised as supported on - Mantle for every model; disable the cache planner by default) - - Without this override fast-agent sends beta headers and ``thinking`` - parameters that Mantle rejects with a misleading ``"model does not - exist"`` 404. + If fast-agent already has a built-in entry for this model we leave it + alone. Otherwise we register a minimal ``ModelParameters`` using the + declared capabilities so the model resolves cleanly at agent startup. """ from fast_agent.llm.model_database import ModelDatabase, ModelParameters - from fast_agent.llm.provider_types import Provider model_name = model_spec.split(".", 1)[-1] if "." in model_spec else model_spec + if ModelDatabase.get_model_params(model_name) is not None: + return + is_vision = capabilities.get("vision", False) context_window = capabilities.get("context_window", 131072) max_output_tokens = capabilities.get("max_output_tokens", 16384) - is_mantle = capabilities.get("mantle", False) - existing = ModelDatabase.get_model_params(model_name) - - if existing is None: - # Unknown model — register a fresh runtime entry. - if is_vision: - tokenizes = list(ModelDatabase.QWEN_MULTIMODAL) - logger.info("Registered model '%s' with vision capabilities", model_name) - else: - tokenizes = list(ModelDatabase.TEXT_ONLY) - logger.info("Registered model '%s' as text-only", model_name) - - ModelDatabase.register_runtime_model_params( - model_name, - ModelParameters( - context_window=context_window, - max_output_tokens=max_output_tokens, - tokenizes=tokenizes, - ), - ) - base_params = ModelDatabase.get_model_params(model_name) + if is_vision: + tokenizes = list(ModelDatabase.QWEN_MULTIMODAL) + logger.info("Registered model '%s' with vision capabilities", model_name) else: - base_params = existing + tokenizes = list(ModelDatabase.TEXT_ONLY) + logger.info("Registered model '%s' as text-only", model_name) - if is_mantle and base_params is not None: - # Clone the base params and strip Mantle-incompatible features. - override = base_params.model_copy( - update={ - "context_window": context_window, - "max_output_tokens": max_output_tokens, - "anthropic_required_betas": None, - "reasoning": None, - "reasoning_effort_spec": None, - "anthropic_task_budget_supported": False, - "anthropic_web_fetch_version": None, - "anthropic_web_search_version": None, - "cache_ttl": None, - } - ) - normalized = ModelDatabase.normalize_model_name(model_name) - ModelDatabase._PROVIDER_MODEL_OVERRIDES[(Provider.ANTHROPIC, normalized)] = override - logger.info( - "Registered Mantle override for anthropic/'%s' (strips beta headers, thinking, web tools, caching)", - model_name, - ) + ModelDatabase.register_runtime_model_params( + model_name, + ModelParameters( + context_window=context_window, + max_output_tokens=max_output_tokens, + tokenizes=tokenizes, + ), + ) @@ -212,7 +166,14 @@ def _register_unknown_models(deployment_config: dict) -> None: per model: if the agent carries its own ``model_capabilities`` block, those take effect; otherwise the top-level ``model_capabilities`` from ``fastagent.config.yaml`` apply. + + Also auto-detects an AWS Bedrock Mantle ``anthropic.base_url`` and installs + the Mantle compatibility shims (wire-name prefix and ``tool_use.caller`` + strip) via :mod:`pallas.mantle_shims`. No config flag needed — Pallas + reads the base_url and does the right thing. """ + from pallas import mantle_shims + fastagent_config_path = _config_root() / "fastagent.config.yaml" if not fastagent_config_path.exists(): return @@ -220,6 +181,13 @@ def _register_unknown_models(deployment_config: dict) -> None: with open(fastagent_config_path) as f: fa_config = yaml.safe_load(f) or {} + anthropic_base_url = fa_config.get("anthropic", {}).get("base_url", "") + if mantle_shims.maybe_install(anthropic_base_url): + logger.info( + "Detected Bedrock Mantle endpoint (%s); installed fast-agent shims.", + anthropic_base_url, + ) + default_model = fa_config.get("default_model", "") default_capabilities = fa_config.get("model_capabilities", {}) diff --git a/pyproject.toml b/pyproject.toml index ba86ec6..ad8b005 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,9 +15,15 @@ dependencies = [ [project.scripts] pallas = "pallas.server:main" +[project.optional-dependencies] +dev = ["pytest>=8.0"] + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["pallas"] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/tests/test_mantle_shims.py b/tests/test_mantle_shims.py new file mode 100644 index 0000000..33aab44 --- /dev/null +++ b/tests/test_mantle_shims.py @@ -0,0 +1,149 @@ +"""Tests for pallas.mantle_shims. + +These tests exercise the module in isolation: they do not hit the network +and do not require fast-agent to be configured. They do, however, import +fast-agent so that the monkeypatch targets exist — so fast-agent-mcp must +be installed in the environment running pytest. +""" +from __future__ import annotations + +import pytest + +from pallas import mantle_shims + + +# ── is_mantle_base_url ─────────────────────────────────────────────────────── + +@pytest.mark.parametrize( + "url,expected", + [ + ("https://bedrock-mantle.us-east-1.api.aws/anthropic", True), + ("https://bedrock-mantle.ca-central-1.api.aws/anthropic", True), + ("https://api.anthropic.com", False), + ("https://example.com/bedrock", False), + ("", False), + (None, False), + ], +) +def test_is_mantle_base_url(url: str | None, expected: bool) -> None: + assert mantle_shims.is_mantle_base_url(url) is expected + + +# ── install_wire_name_prefix ───────────────────────────────────────────────── + +def test_install_wire_name_prefix_registers_prefixed_ids() -> None: + from fast_agent.llm.model_database import ModelDatabase + from fast_agent.llm.provider_types import Provider + + mantle_shims.install_wire_name_prefix() + + for fa_name, wire_name in mantle_shims.MANTLE_WIRE_NAMES.items(): + key = (Provider.ANTHROPIC, ModelDatabase.normalize_model_name(fa_name)) + assert ModelDatabase._PROVIDER_WIRE_MODEL_NAMES.get(key) == wire_name + + +def test_install_wire_name_prefix_is_idempotent() -> None: + mantle_shims.install_wire_name_prefix() + mantle_shims.install_wire_name_prefix() # must not raise + # Second call leaves the same mapping in place. + from fast_agent.llm.model_database import ModelDatabase + from fast_agent.llm.provider_types import Provider + + key = (Provider.ANTHROPIC, ModelDatabase.normalize_model_name("claude-opus-4-7")) + assert ModelDatabase._PROVIDER_WIRE_MODEL_NAMES[key] == "anthropic.claude-opus-4-7" + + +# ── _strip_tool_use_caller ─────────────────────────────────────────────────── + +def test_strip_tool_use_caller_removes_caller_key() -> None: + blocks = [ + {"type": "tool_use", "id": "t1", "name": "foo", "input": {}, "caller": None}, + {"type": "text", "text": "hello"}, + {"type": "tool_use", "id": "t2", "name": "bar", "input": {}}, # no caller + ] + result = mantle_shims._strip_tool_use_caller(blocks) + + assert "caller" not in result[0] + assert result[1] == {"type": "text", "text": "hello"} + assert "caller" not in result[2] + + +def test_strip_tool_use_caller_is_idempotent() -> None: + blocks = [{"type": "tool_use", "id": "t1", "name": "foo", "input": {}, "caller": None}] + mantle_shims._strip_tool_use_caller(blocks) + mantle_shims._strip_tool_use_caller(blocks) # second pass must be a no-op + assert "caller" not in blocks[0] + + +def test_strip_tool_use_caller_ignores_non_dict_blocks() -> None: + # Anthropic SDK model objects are sometimes passed instead of dicts; + # the helper must leave those untouched. + class Sentinel: + type = "tool_use" + + s = Sentinel() + blocks = [s] + mantle_shims._strip_tool_use_caller(blocks) + assert blocks[0] is s # unchanged + + +# ── install_tool_use_caller_strip ──────────────────────────────────────────── + +def test_install_tool_use_caller_strip_patches_converter() -> None: + from fast_agent.llm.provider.anthropic.multipart_converter_anthropic import ( + AnthropicConverter, + ) + + mantle_shims.install_tool_use_caller_strip() + + # The patched deserialize must strip caller from replayed tool_use dicts. + channels: dict[str, list[dict]] = {"assistant_raw": []} + # We can't easily stub the original's internal behaviour, so just + # verify the patch is in place by round-tripping a destination list + # through _append_server_tool_channel_blocks, which we *know* ends by + # calling our strip helper on `destination`. + destination: list[dict] = [ + {"type": "tool_use", "id": "t1", "name": "foo", "input": {}, "caller": None}, + ] + AnthropicConverter._append_server_tool_channel_blocks(None, destination) + assert "caller" not in destination[0] + + +def test_install_tool_use_caller_strip_is_idempotent() -> None: + mantle_shims.install_tool_use_caller_strip() + mantle_shims.install_tool_use_caller_strip() # must not raise or re-wrap + + +# ── maybe_install ──────────────────────────────────────────────────────────── + +def test_maybe_install_installs_when_mantle(monkeypatch: pytest.MonkeyPatch) -> None: + calls: list[str] = [] + monkeypatch.setattr( + mantle_shims, "install_wire_name_prefix", + lambda: calls.append("wire"), + ) + monkeypatch.setattr( + mantle_shims, "install_tool_use_caller_strip", + lambda: calls.append("tool_use"), + ) + + installed = mantle_shims.maybe_install("https://bedrock-mantle.us-east-1.api.aws/anthropic") + assert installed is True + assert calls == ["wire", "tool_use"] + + +def test_maybe_install_noop_for_non_mantle(monkeypatch: pytest.MonkeyPatch) -> None: + calls: list[str] = [] + monkeypatch.setattr( + mantle_shims, "install_wire_name_prefix", + lambda: calls.append("wire"), + ) + monkeypatch.setattr( + mantle_shims, "install_tool_use_caller_strip", + lambda: calls.append("tool_use"), + ) + + assert mantle_shims.maybe_install("https://api.anthropic.com") is False + assert mantle_shims.maybe_install(None) is False + assert mantle_shims.maybe_install("") is False + assert calls == []