From 440f7fb60cc47387f8f2dff79dcfd3a9563dfb9c Mon Sep 17 00:00:00 2001 From: Robert Helewka Date: Wed, 27 May 2026 05:41:08 -0400 Subject: [PATCH] feat: add per-agent loop safeguards for tool-call turns Introduce three optional per-agent config fields to bound tool-call loop execution: `max_iterations` (default 15), `streaming_timeout` (default 120s), and `turn_timeout` (default 300s wall-clock). - Plumb limits from agent config through `_build_agents_table` and `_start_agent` into `MultimodalAgentMCPServer` via `request_limits` - Apply `max_iterations` and `streaming_timeout` to `RequestParams` - Wrap turn dispatch in `asyncio.wait_for` to enforce `turn_timeout`, logging a warning on timeout - Document the new fields in README --- README.md | 22 ++++++++++++++++++++++ pallas/multimodal_server.py | 29 ++++++++++++++++++++++++----- pallas/server.py | 10 ++++++++++ pyproject.toml | 2 +- 4 files changed, 57 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 9f59663..2c7b5bd 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,28 @@ agents: description: "Web search and knowledge graph" ``` +### Loop safeguards + +Three optional fields bound how long an agent's tool-call loop can run: + +| Field | Type | Default | Purpose | +|---|---|---|---| +| `max_iterations` | int | 15 | Maximum tool calls in a single agent turn | +| `streaming_timeout` | float | 120 | Max idle seconds between streaming events | +| `turn_timeout` | float | 300 | Hard wall-clock limit for a full turn (seconds) | + +All three are optional. Agents that omit them use the defaults shown above. + +```yaml +agents: + research: + module: agents.research + port: 8250 + max_iterations: 10 # this agent only needs a few search calls + streaming_timeout: 60 # fail fast on a slow search MCP + turn_timeout: 120 # research turns should not take more than 2 min +``` + --- ## `fastagent.config.yaml` extensions diff --git a/pallas/multimodal_server.py b/pallas/multimodal_server.py index c93c87f..a760ec7 100644 --- a/pallas/multimodal_server.py +++ b/pallas/multimodal_server.py @@ -19,6 +19,7 @@ fast-agent instance whose ``message_history`` is seeded from the caller's memory, no restart amnesia. """ +import asyncio import time from typing import Any @@ -125,8 +126,9 @@ def _history_payload_to_multipart( class MultimodalAgentMCPServer(AgentMCPServer): """AgentMCPServer with optional image + history support on send_message.""" - def __init__(self, *args, **kwargs) -> None: + def __init__(self, *args, request_limits: dict | None = None, **kwargs) -> None: super().__init__(*args, **kwargs) + self._request_limits = request_limits or {} self._register_health_routes() def _register_health_routes(self) -> None: @@ -210,6 +212,8 @@ class MultimodalAgentMCPServer(AgentMCPServer): request_params = RequestParams( tool_execution_handler=EnrichedMCPToolProgressManager(report_progress), emit_loop_progress=True, + max_iterations=self._request_limits.get("max_iterations", 15), + streaming_timeout=self._request_limits.get("streaming_timeout", 120.0), ) instance = await self._acquire_instance(ctx) agent = instance.app[agent_name] @@ -271,11 +275,26 @@ class MultimodalAgentMCPServer(AgentMCPServer): ) return response - if agent_context and ctx: - return await self.with_bridged_context( - agent_context, ctx, execute_send + turn_timeout = self._request_limits.get("turn_timeout", 300.0) + + async def _dispatch() -> str: + if agent_context and ctx: + return await self.with_bridged_context( + agent_context, ctx, execute_send + ) + return await execute_send() + + try: + return await asyncio.wait_for(_dispatch(), timeout=turn_timeout) + except asyncio.TimeoutError: + logger.warning( + f"Agent '{agent_name}' turn exceeded {turn_timeout}s wall-clock limit", + name="turn_timeout", + agent=agent_name, + turn_timeout=turn_timeout, + conversation_id=conversation_id, ) - return await execute_send() + raise except BaseException: metrics_outcome = "error" raise diff --git a/pallas/server.py b/pallas/server.py index cfe5254..255ec53 100644 --- a/pallas/server.py +++ b/pallas/server.py @@ -62,6 +62,9 @@ def _build_agents_table(config: dict) -> dict[str, dict]: "port": agent["port"], "model": agent.get("model"), "model_capabilities": agent.get("model_capabilities"), + "max_iterations": agent.get("max_iterations"), + "streaming_timeout": agent.get("streaming_timeout"), + "turn_timeout": agent.get("turn_timeout"), } for name, agent in config["agents"].items() } @@ -259,6 +262,12 @@ async def _start_agent(name: str, agents: dict[str, dict]) -> None: # breaking the "Pallas is ephemeral" contract. # With "request" the Pallas process holds no per-conversation state # and the LLM sees exactly what Daedalus asks it to see. + request_limits = { + k: entry[k] + for k in ("max_iterations", "streaming_timeout", "turn_timeout") + if entry.get(k) is not None + } + server = MultimodalAgentMCPServer( primary_instance=primary_instance, create_instance=fast_instance._server_instance_factory, @@ -267,6 +276,7 @@ async def _start_agent(name: str, agents: dict[str, dict]) -> None: server_name=f"{fast_instance.name}-MCP-Server", host="0.0.0.0", get_registry_version=fast_instance._get_registry_version, + request_limits=request_limits, ) downstream_servers = _resolve_downstream_servers(fast_instance) diff --git a/pyproject.toml b/pyproject.toml index 53a38fb..3c42465 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pallas-mcp" -version = "0.2.2" +version = "0.3.0" description = "FastAgent MCP Bridge — generic runtime for serving FastAgent agents over StreamableHTTP" requires-python = ">=3.13.5" dependencies = [