feat: add per-agent loop safeguards for tool-call turns
Introduce three optional per-agent config fields to bound tool-call loop execution: `max_iterations` (default 15), `streaming_timeout` (default 120s), and `turn_timeout` (default 300s wall-clock). - Plumb limits from agent config through `_build_agents_table` and `_start_agent` into `MultimodalAgentMCPServer` via `request_limits` - Apply `max_iterations` and `streaming_timeout` to `RequestParams` - Wrap turn dispatch in `asyncio.wait_for` to enforce `turn_timeout`, logging a warning on timeout - Document the new fields in README
This commit is contained in:
22
README.md
22
README.md
@@ -75,6 +75,28 @@ agents:
|
|||||||
description: "Web search and knowledge graph"
|
description: "Web search and knowledge graph"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Loop safeguards
|
||||||
|
|
||||||
|
Three optional fields bound how long an agent's tool-call loop can run:
|
||||||
|
|
||||||
|
| Field | Type | Default | Purpose |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `max_iterations` | int | 15 | Maximum tool calls in a single agent turn |
|
||||||
|
| `streaming_timeout` | float | 120 | Max idle seconds between streaming events |
|
||||||
|
| `turn_timeout` | float | 300 | Hard wall-clock limit for a full turn (seconds) |
|
||||||
|
|
||||||
|
All three are optional. Agents that omit them use the defaults shown above.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
agents:
|
||||||
|
research:
|
||||||
|
module: agents.research
|
||||||
|
port: 8250
|
||||||
|
max_iterations: 10 # this agent only needs a few search calls
|
||||||
|
streaming_timeout: 60 # fail fast on a slow search MCP
|
||||||
|
turn_timeout: 120 # research turns should not take more than 2 min
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## `fastagent.config.yaml` extensions
|
## `fastagent.config.yaml` extensions
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ fast-agent instance whose ``message_history`` is seeded from the caller's
|
|||||||
memory, no restart amnesia.
|
memory, no restart amnesia.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import time
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -125,8 +126,9 @@ def _history_payload_to_multipart(
|
|||||||
class MultimodalAgentMCPServer(AgentMCPServer):
|
class MultimodalAgentMCPServer(AgentMCPServer):
|
||||||
"""AgentMCPServer with optional image + history support on send_message."""
|
"""AgentMCPServer with optional image + history support on send_message."""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs) -> None:
|
def __init__(self, *args, request_limits: dict | None = None, **kwargs) -> None:
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
self._request_limits = request_limits or {}
|
||||||
self._register_health_routes()
|
self._register_health_routes()
|
||||||
|
|
||||||
def _register_health_routes(self) -> None:
|
def _register_health_routes(self) -> None:
|
||||||
@@ -210,6 +212,8 @@ class MultimodalAgentMCPServer(AgentMCPServer):
|
|||||||
request_params = RequestParams(
|
request_params = RequestParams(
|
||||||
tool_execution_handler=EnrichedMCPToolProgressManager(report_progress),
|
tool_execution_handler=EnrichedMCPToolProgressManager(report_progress),
|
||||||
emit_loop_progress=True,
|
emit_loop_progress=True,
|
||||||
|
max_iterations=self._request_limits.get("max_iterations", 15),
|
||||||
|
streaming_timeout=self._request_limits.get("streaming_timeout", 120.0),
|
||||||
)
|
)
|
||||||
instance = await self._acquire_instance(ctx)
|
instance = await self._acquire_instance(ctx)
|
||||||
agent = instance.app[agent_name]
|
agent = instance.app[agent_name]
|
||||||
@@ -271,11 +275,26 @@ class MultimodalAgentMCPServer(AgentMCPServer):
|
|||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
turn_timeout = self._request_limits.get("turn_timeout", 300.0)
|
||||||
|
|
||||||
|
async def _dispatch() -> str:
|
||||||
if agent_context and ctx:
|
if agent_context and ctx:
|
||||||
return await self.with_bridged_context(
|
return await self.with_bridged_context(
|
||||||
agent_context, ctx, execute_send
|
agent_context, ctx, execute_send
|
||||||
)
|
)
|
||||||
return await execute_send()
|
return await execute_send()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return await asyncio.wait_for(_dispatch(), timeout=turn_timeout)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.warning(
|
||||||
|
f"Agent '{agent_name}' turn exceeded {turn_timeout}s wall-clock limit",
|
||||||
|
name="turn_timeout",
|
||||||
|
agent=agent_name,
|
||||||
|
turn_timeout=turn_timeout,
|
||||||
|
conversation_id=conversation_id,
|
||||||
|
)
|
||||||
|
raise
|
||||||
except BaseException:
|
except BaseException:
|
||||||
metrics_outcome = "error"
|
metrics_outcome = "error"
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -62,6 +62,9 @@ def _build_agents_table(config: dict) -> dict[str, dict]:
|
|||||||
"port": agent["port"],
|
"port": agent["port"],
|
||||||
"model": agent.get("model"),
|
"model": agent.get("model"),
|
||||||
"model_capabilities": agent.get("model_capabilities"),
|
"model_capabilities": agent.get("model_capabilities"),
|
||||||
|
"max_iterations": agent.get("max_iterations"),
|
||||||
|
"streaming_timeout": agent.get("streaming_timeout"),
|
||||||
|
"turn_timeout": agent.get("turn_timeout"),
|
||||||
}
|
}
|
||||||
for name, agent in config["agents"].items()
|
for name, agent in config["agents"].items()
|
||||||
}
|
}
|
||||||
@@ -259,6 +262,12 @@ async def _start_agent(name: str, agents: dict[str, dict]) -> None:
|
|||||||
# breaking the "Pallas is ephemeral" contract.
|
# breaking the "Pallas is ephemeral" contract.
|
||||||
# With "request" the Pallas process holds no per-conversation state
|
# With "request" the Pallas process holds no per-conversation state
|
||||||
# and the LLM sees exactly what Daedalus asks it to see.
|
# and the LLM sees exactly what Daedalus asks it to see.
|
||||||
|
request_limits = {
|
||||||
|
k: entry[k]
|
||||||
|
for k in ("max_iterations", "streaming_timeout", "turn_timeout")
|
||||||
|
if entry.get(k) is not None
|
||||||
|
}
|
||||||
|
|
||||||
server = MultimodalAgentMCPServer(
|
server = MultimodalAgentMCPServer(
|
||||||
primary_instance=primary_instance,
|
primary_instance=primary_instance,
|
||||||
create_instance=fast_instance._server_instance_factory,
|
create_instance=fast_instance._server_instance_factory,
|
||||||
@@ -267,6 +276,7 @@ async def _start_agent(name: str, agents: dict[str, dict]) -> None:
|
|||||||
server_name=f"{fast_instance.name}-MCP-Server",
|
server_name=f"{fast_instance.name}-MCP-Server",
|
||||||
host="0.0.0.0",
|
host="0.0.0.0",
|
||||||
get_registry_version=fast_instance._get_registry_version,
|
get_registry_version=fast_instance._get_registry_version,
|
||||||
|
request_limits=request_limits,
|
||||||
)
|
)
|
||||||
|
|
||||||
downstream_servers = _resolve_downstream_servers(fast_instance)
|
downstream_servers = _resolve_downstream_servers(fast_instance)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "pallas-mcp"
|
name = "pallas-mcp"
|
||||||
version = "0.2.2"
|
version = "0.3.0"
|
||||||
description = "FastAgent MCP Bridge — generic runtime for serving FastAgent agents over StreamableHTTP"
|
description = "FastAgent MCP Bridge — generic runtime for serving FastAgent agents over StreamableHTTP"
|
||||||
requires-python = ">=3.13.5"
|
requires-python = ">=3.13.5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
|||||||
Reference in New Issue
Block a user