Add call_tool & _execute_on_server traceback-capture monkeypatches

The send_request wrapper (56a1cd0) never fires in pallas.log for the
NoneType-await failures, proving the offending await lives above
send_request in the call stack. Install two additional wrappers to
triangulate:

  * MCPAgentClientSession.call_tool — catches failures in the session's
    override (meta merge, params, send_request invocation itself, ...).
  * MCPAggregator._execute_on_server — catches the broadest surface:
    get_server, session factory, permission check, tracer span,
    progress callback, try_execute wrapper.

Both emit logger.exception(...) with full stack before re-raising;
control flow is otherwise untouched.  Removable once the offending
frame is identified from the resulting traceback.
This commit is contained in:
2026-05-06 18:54:44 -04:00
parent 56a1cd0a6c
commit 273b96b370

View File

@@ -47,6 +47,7 @@ import httpx
from fast_agent.mcp import mcp_connection_manager as _mcm
from fast_agent.mcp import mcp_agent_client_session as _macs
from fast_agent.mcp import mcp_aggregator as _magg
from fast_agent.mcp.auth.context import request_bearer_token
logger = logging.getLogger("pallas.forward")
@@ -372,6 +373,96 @@ def _patch_send_request() -> None:
logger.info("send_request traceback-capture patch installed")
# ── call_tool / _execute_on_server traceback capture ─────────────────────────
# The "object NoneType can't be used in 'await' expression" error surfaces
# via ``EnrichedMCPToolProgressManager.on_tool_complete`` (message=error),
# which is driven by ``MCPAggregator`` at line 2287 catching a generic
# ``Exception`` and passing ``str(e)`` downstream. The ``send_request``
# wrapper above proved — by its silence — that the exception is NOT raised
# inside ``send_request``, so the failing ``await X()`` (where X returns
# ``None``) must live in one of the frames between:
# * ``MCPAgentClientSession.call_tool`` (override, ~985)
# * ``MCPAggregator._execute_on_server.try_execute`` (~1612)
# * anything between call_tool and send_request (``_merge_experimental_session_meta``,
# the permission handler, the progress-callback factory, span creation, …)
#
# We install two outer wrappers to triangulate:
# 1. ``MCPAgentClientSession.call_tool`` — catches anything raised in the
# session's override (meta merge, params construction, send_request invocation
# itself, ...);
# 2. ``MCPAggregator._execute_on_server`` — catches everything the aggregator
# sets up around the client call (get_server, session factory, permission
# check, tracer span, progress callback, ``try_execute`` itself).
#
# Both emit ``logger.exception(...)`` (full stack) before re-raising; the
# original control flow is untouched. Once the offending frame is identified
# from the resulting traceback, these wrappers can be removed.
_original_session_call_tool = _macs.MCPAgentClientSession.call_tool
async def _session_call_tool_with_trace(self, *args, **kwargs):
try:
return await _original_session_call_tool(self, *args, **kwargs)
except BaseException as exc:
server = getattr(self, "session_server_name", None) or "unknown"
tool_name = args[0] if args else kwargs.get("name", "?")
_trace_logger.exception(
"session.call_tool failed server=%s name=%s exc_type=%s",
server,
tool_name,
type(exc).__name__,
)
raise
def _patch_session_call_tool() -> None:
if getattr(
_macs.MCPAgentClientSession.call_tool, "_pallas_trace_patched", False
):
return
_session_call_tool_with_trace._pallas_trace_patched = True # type: ignore[attr-defined]
_macs.MCPAgentClientSession.call_tool = _session_call_tool_with_trace # type: ignore[assignment]
logger.info("session.call_tool traceback-capture patch installed")
_original_execute_on_server = _magg.MCPAggregator._execute_on_server
async def _execute_on_server_with_trace(self, *args, **kwargs):
try:
return await _original_execute_on_server(self, *args, **kwargs)
except BaseException as exc:
server_name = args[0] if args else kwargs.get("server_name", "?")
operation_type = (
args[1] if len(args) > 1 else kwargs.get("operation_type", "?")
)
operation_name = (
args[2] if len(args) > 2 else kwargs.get("operation_name", "?")
)
method_name = (
args[3] if len(args) > 3 else kwargs.get("method_name", "?")
)
_trace_logger.exception(
"aggregator._execute_on_server failed server=%s op=%s name=%s method=%s exc_type=%s",
server_name,
operation_type,
operation_name,
method_name,
type(exc).__name__,
)
raise
def _patch_execute_on_server() -> None:
if getattr(
_magg.MCPAggregator._execute_on_server, "_pallas_trace_patched", False
):
return
_execute_on_server_with_trace._pallas_trace_patched = True # type: ignore[attr-defined]
_magg.MCPAggregator._execute_on_server = _execute_on_server_with_trace # type: ignore[assignment]
logger.info("aggregator._execute_on_server traceback-capture patch installed")
def install() -> None:
if getattr(_mcm._prepare_headers_and_auth, "_pallas_forward_patched", False):
return
@@ -379,6 +470,8 @@ def install() -> None:
_prepare_headers_and_auth_with_forward._pallas_forward_patched = True # type: ignore[attr-defined]
_mcm._prepare_headers_and_auth = _prepare_headers_and_auth_with_forward
_patch_send_request()
_patch_session_call_tool()
_patch_execute_on_server()
# INFO so it always appears in the journal at boot — greppable proof
# that the patch ran before any agent started.
logger.info(