feat: add per-agent loop safeguards for tool-call turns
Introduce three optional per-agent config fields to bound tool-call loop execution: `max_iterations` (default 15), `streaming_timeout` (default 120s), and `turn_timeout` (default 300s wall-clock). - Plumb limits from agent config through `_build_agents_table` and `_start_agent` into `MultimodalAgentMCPServer` via `request_limits` - Apply `max_iterations` and `streaming_timeout` to `RequestParams` - Wrap turn dispatch in `asyncio.wait_for` to enforce `turn_timeout`, logging a warning on timeout - Document the new fields in README
This commit is contained in:
22
README.md
22
README.md
@@ -75,6 +75,28 @@ agents:
|
||||
description: "Web search and knowledge graph"
|
||||
```
|
||||
|
||||
### Loop safeguards
|
||||
|
||||
Three optional fields bound how long an agent's tool-call loop can run:
|
||||
|
||||
| Field | Type | Default | Purpose |
|
||||
|---|---|---|---|
|
||||
| `max_iterations` | int | 15 | Maximum tool calls in a single agent turn |
|
||||
| `streaming_timeout` | float | 120 | Max idle seconds between streaming events |
|
||||
| `turn_timeout` | float | 300 | Hard wall-clock limit for a full turn (seconds) |
|
||||
|
||||
All three are optional. Agents that omit them use the defaults shown above.
|
||||
|
||||
```yaml
|
||||
agents:
|
||||
research:
|
||||
module: agents.research
|
||||
port: 8250
|
||||
max_iterations: 10 # this agent only needs a few search calls
|
||||
streaming_timeout: 60 # fail fast on a slow search MCP
|
||||
turn_timeout: 120 # research turns should not take more than 2 min
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `fastagent.config.yaml` extensions
|
||||
|
||||
@@ -19,6 +19,7 @@ fast-agent instance whose ``message_history`` is seeded from the caller's
|
||||
memory, no restart amnesia.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
@@ -125,8 +126,9 @@ def _history_payload_to_multipart(
|
||||
class MultimodalAgentMCPServer(AgentMCPServer):
|
||||
"""AgentMCPServer with optional image + history support on send_message."""
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
def __init__(self, *args, request_limits: dict | None = None, **kwargs) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
self._request_limits = request_limits or {}
|
||||
self._register_health_routes()
|
||||
|
||||
def _register_health_routes(self) -> None:
|
||||
@@ -210,6 +212,8 @@ class MultimodalAgentMCPServer(AgentMCPServer):
|
||||
request_params = RequestParams(
|
||||
tool_execution_handler=EnrichedMCPToolProgressManager(report_progress),
|
||||
emit_loop_progress=True,
|
||||
max_iterations=self._request_limits.get("max_iterations", 15),
|
||||
streaming_timeout=self._request_limits.get("streaming_timeout", 120.0),
|
||||
)
|
||||
instance = await self._acquire_instance(ctx)
|
||||
agent = instance.app[agent_name]
|
||||
@@ -271,11 +275,26 @@ class MultimodalAgentMCPServer(AgentMCPServer):
|
||||
)
|
||||
return response
|
||||
|
||||
turn_timeout = self._request_limits.get("turn_timeout", 300.0)
|
||||
|
||||
async def _dispatch() -> str:
|
||||
if agent_context and ctx:
|
||||
return await self.with_bridged_context(
|
||||
agent_context, ctx, execute_send
|
||||
)
|
||||
return await execute_send()
|
||||
|
||||
try:
|
||||
return await asyncio.wait_for(_dispatch(), timeout=turn_timeout)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
f"Agent '{agent_name}' turn exceeded {turn_timeout}s wall-clock limit",
|
||||
name="turn_timeout",
|
||||
agent=agent_name,
|
||||
turn_timeout=turn_timeout,
|
||||
conversation_id=conversation_id,
|
||||
)
|
||||
raise
|
||||
except BaseException:
|
||||
metrics_outcome = "error"
|
||||
raise
|
||||
|
||||
@@ -62,6 +62,9 @@ def _build_agents_table(config: dict) -> dict[str, dict]:
|
||||
"port": agent["port"],
|
||||
"model": agent.get("model"),
|
||||
"model_capabilities": agent.get("model_capabilities"),
|
||||
"max_iterations": agent.get("max_iterations"),
|
||||
"streaming_timeout": agent.get("streaming_timeout"),
|
||||
"turn_timeout": agent.get("turn_timeout"),
|
||||
}
|
||||
for name, agent in config["agents"].items()
|
||||
}
|
||||
@@ -259,6 +262,12 @@ async def _start_agent(name: str, agents: dict[str, dict]) -> None:
|
||||
# breaking the "Pallas is ephemeral" contract.
|
||||
# With "request" the Pallas process holds no per-conversation state
|
||||
# and the LLM sees exactly what Daedalus asks it to see.
|
||||
request_limits = {
|
||||
k: entry[k]
|
||||
for k in ("max_iterations", "streaming_timeout", "turn_timeout")
|
||||
if entry.get(k) is not None
|
||||
}
|
||||
|
||||
server = MultimodalAgentMCPServer(
|
||||
primary_instance=primary_instance,
|
||||
create_instance=fast_instance._server_instance_factory,
|
||||
@@ -267,6 +276,7 @@ async def _start_agent(name: str, agents: dict[str, dict]) -> None:
|
||||
server_name=f"{fast_instance.name}-MCP-Server",
|
||||
host="0.0.0.0",
|
||||
get_registry_version=fast_instance._get_registry_version,
|
||||
request_limits=request_limits,
|
||||
)
|
||||
|
||||
downstream_servers = _resolve_downstream_servers(fast_instance)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "pallas-mcp"
|
||||
version = "0.2.2"
|
||||
version = "0.3.0"
|
||||
description = "FastAgent MCP Bridge — generic runtime for serving FastAgent agents over StreamableHTTP"
|
||||
requires-python = ">=3.13.5"
|
||||
dependencies = [
|
||||
|
||||
Reference in New Issue
Block a user