From 440f7fb60cc47387f8f2dff79dcfd3a9563dfb9c Mon Sep 17 00:00:00 2001
From: Robert Helewka <r@helu.ca>
Date: Wed, 27 May 2026 05:41:08 -0400
Subject: [PATCH] feat: add per-agent loop safeguards for tool-call turns

Introduce three optional per-agent config fields to bound tool-call loop
execution: `max_iterations` (default 15), `streaming_timeout` (default
120s), and `turn_timeout` (default 300s wall-clock).

- Plumb limits from agent config through `_build_agents_table` and
  `_start_agent` into `MultimodalAgentMCPServer` via `request_limits`
- Apply `max_iterations` and `streaming_timeout` to `RequestParams`
- Wrap turn dispatch in `asyncio.wait_for` to enforce `turn_timeout`,
  logging a warning on timeout
- Document the new fields in README
---
 README.md                   | 22 ++++++++++++++++++++++
 pallas/multimodal_server.py | 29 ++++++++++++++++++++++++-----
 pallas/server.py            | 10 ++++++++++
 pyproject.toml              |  2 +-
 4 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 9f59663..2c7b5bd 100644
--- a/README.md
+++ b/README.md
@@ -75,6 +75,28 @@ agents:
     description: "Web search and knowledge graph"
 ```
 
+### Loop safeguards
+
+Three optional fields bound how long an agent's tool-call loop can run:
+
+| Field | Type | Default | Purpose |
+|---|---|---|---|
+| `max_iterations` | int | 15 | Maximum tool calls in a single agent turn |
+| `streaming_timeout` | float | 120 | Max idle seconds between streaming events |
+| `turn_timeout` | float | 300 | Hard wall-clock limit for a full turn (seconds) |
+
+All three are optional. Agents that omit them use the defaults shown above.
+
+```yaml
+agents:
+  research:
+    module: agents.research
+    port: 8250
+    max_iterations: 10      # this agent only needs a few search calls
+    streaming_timeout: 60   # fail fast on a slow search MCP
+    turn_timeout: 120       # research turns should not take more than 2 min
+```
+
 ---
 
 ## `fastagent.config.yaml` extensions
diff --git a/pallas/multimodal_server.py b/pallas/multimodal_server.py
index c93c87f..a760ec7 100644
--- a/pallas/multimodal_server.py
+++ b/pallas/multimodal_server.py
@@ -19,6 +19,7 @@ fast-agent instance whose ``message_history`` is seeded from the caller's
 memory, no restart amnesia.
 """
 
+import asyncio
 import time
 from typing import Any
 
@@ -125,8 +126,9 @@ def _history_payload_to_multipart(
 class MultimodalAgentMCPServer(AgentMCPServer):
     """AgentMCPServer with optional image + history support on send_message."""
 
-    def __init__(self, *args, **kwargs) -> None:
+    def __init__(self, *args, request_limits: dict | None = None, **kwargs) -> None:
         super().__init__(*args, **kwargs)
+        self._request_limits = request_limits or {}
         self._register_health_routes()
 
     def _register_health_routes(self) -> None:
@@ -210,6 +212,8 @@ class MultimodalAgentMCPServer(AgentMCPServer):
             request_params = RequestParams(
                 tool_execution_handler=EnrichedMCPToolProgressManager(report_progress),
                 emit_loop_progress=True,
+                max_iterations=self._request_limits.get("max_iterations", 15),
+                streaming_timeout=self._request_limits.get("streaming_timeout", 120.0),
             )
             instance = await self._acquire_instance(ctx)
             agent = instance.app[agent_name]
@@ -271,11 +275,26 @@ class MultimodalAgentMCPServer(AgentMCPServer):
                     )
                     return response
 
-                if agent_context and ctx:
-                    return await self.with_bridged_context(
-                        agent_context, ctx, execute_send
+                turn_timeout = self._request_limits.get("turn_timeout", 300.0)
+
+                async def _dispatch() -> str:
+                    if agent_context and ctx:
+                        return await self.with_bridged_context(
+                            agent_context, ctx, execute_send
+                        )
+                    return await execute_send()
+
+                try:
+                    return await asyncio.wait_for(_dispatch(), timeout=turn_timeout)
+                except asyncio.TimeoutError:
+                    logger.warning(
+                        f"Agent '{agent_name}' turn exceeded {turn_timeout}s wall-clock limit",
+                        name="turn_timeout",
+                        agent=agent_name,
+                        turn_timeout=turn_timeout,
+                        conversation_id=conversation_id,
                     )
-                return await execute_send()
+                    raise
             except BaseException:
                 metrics_outcome = "error"
                 raise
diff --git a/pallas/server.py b/pallas/server.py
index cfe5254..255ec53 100644
--- a/pallas/server.py
+++ b/pallas/server.py
@@ -62,6 +62,9 @@ def _build_agents_table(config: dict) -> dict[str, dict]:
             "port": agent["port"],
             "model": agent.get("model"),
             "model_capabilities": agent.get("model_capabilities"),
+            "max_iterations": agent.get("max_iterations"),
+            "streaming_timeout": agent.get("streaming_timeout"),
+            "turn_timeout": agent.get("turn_timeout"),
         }
         for name, agent in config["agents"].items()
     }
@@ -259,6 +262,12 @@ async def _start_agent(name: str, agents: dict[str, dict]) -> None:
         #     breaking the "Pallas is ephemeral" contract.
         # With "request" the Pallas process holds no per-conversation state
         # and the LLM sees exactly what Daedalus asks it to see.
+        request_limits = {
+            k: entry[k]
+            for k in ("max_iterations", "streaming_timeout", "turn_timeout")
+            if entry.get(k) is not None
+        }
+
         server = MultimodalAgentMCPServer(
             primary_instance=primary_instance,
             create_instance=fast_instance._server_instance_factory,
@@ -267,6 +276,7 @@ async def _start_agent(name: str, agents: dict[str, dict]) -> None:
             server_name=f"{fast_instance.name}-MCP-Server",
             host="0.0.0.0",
             get_registry_version=fast_instance._get_registry_version,
+            request_limits=request_limits,
         )
 
         downstream_servers = _resolve_downstream_servers(fast_instance)
diff --git a/pyproject.toml b/pyproject.toml
index 53a38fb..3c42465 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "pallas-mcp"
-version = "0.2.2"
+version = "0.3.0"
 description = "FastAgent MCP Bridge — generic runtime for serving FastAgent agents over StreamableHTTP"
 requires-python = ">=3.13.5"
 dependencies = [