forward: capture send_request tracebacks before fast-agent drops them

fast-agent's MCPAgentClientSession.send_request catches every downstream
transport exception, logs the one-line 'send_request failed: <str(e)>'
WITHOUT exc_info=True, then re-raises.  The exception then propagates
up to the agent loop where its message is serialised as the tool result
string ('object NoneType can't be used in an await expression' being
the canonical symptom) and the traceback is lost forever.

Wrap send_request so Pallas emits logger.exception() with the full
stack against the 'pallas.forward.trace' logger before re-raising.
No behavioural change — we re-raise the same exception; we just get
one extra log record with the frames attached, which pallas.log now
preserves thanks to the _JSONFormatter traceback field.

This will surface the real origin of the NoneType-await that's
currently being served as Harper's mnemosyne tool result even though
Mnemosyne itself returns 200 OK.
This commit is contained in:
2026-05-06 06:11:00 -04:00
parent ac4af942ab
commit 56a1cd0a6c

View File

@@ -46,9 +46,11 @@ from typing import Any
import httpx
from fast_agent.mcp import mcp_connection_manager as _mcm
from fast_agent.mcp import mcp_agent_client_session as _macs
from fast_agent.mcp.auth.context import request_bearer_token
logger = logging.getLogger("pallas.forward")
_trace_logger = logging.getLogger("pallas.forward.trace")
class _DynamicBearerAuth(httpx.Auth):
@@ -329,12 +331,54 @@ def _refresh_forward_servers() -> None:
logger.debug("forward.no_config_found searched=%s", _candidate_config_paths())
# ── send_request traceback capture ───────────────────────────────────────────
# fast-agent's ``MCPAgentClientSession.send_request`` catches every
# downstream-transport exception, logs ``"send_request failed: <str(e)>"``
# *without* ``exc_info=True``, and re-raises — which means the exception
# propagates up to the agent loop where it is serialised as a tool result
# string (``"object NoneType can't be used in 'await' expression"`` is the
# canonical symptom) with no traceback anywhere.
#
# We wrap ``send_request`` so Pallas can emit ``logger.exception(...)`` with
# the full stack before re-raising. The original logger still fires its
# one-line summary; our wrapper adds the frames next to it in pallas.log.
# No behavioural change — we re-raise the same exception.
_original_send_request = _macs.MCPAgentClientSession.send_request
async def _send_request_with_trace(self, *args, **kwargs):
try:
return await _original_send_request(self, *args, **kwargs)
except BaseException as exc: # ExceptionGroup flows through BaseException
server = getattr(self, "session_server_name", None) or "unknown"
request_method = "?"
if args:
root = getattr(args[0], "root", None)
request_method = getattr(root, "method", "?") or "?"
_trace_logger.exception(
"send_request failed server=%s method=%s exc_type=%s",
server,
request_method,
type(exc).__name__,
)
raise
def _patch_send_request() -> None:
if getattr(_macs.MCPAgentClientSession.send_request, "_pallas_trace_patched", False):
return
_send_request_with_trace._pallas_trace_patched = True # type: ignore[attr-defined]
_macs.MCPAgentClientSession.send_request = _send_request_with_trace # type: ignore[assignment]
logger.info("send_request traceback-capture patch installed")
def install() -> None:
if getattr(_mcm._prepare_headers_and_auth, "_pallas_forward_patched", False):
return
_refresh_forward_servers()
_prepare_headers_and_auth_with_forward._pallas_forward_patched = True # type: ignore[attr-defined]
_mcm._prepare_headers_and_auth = _prepare_headers_and_auth_with_forward
_patch_send_request()
# INFO so it always appears in the journal at boot — greppable proof
# that the patch ran before any agent started.
logger.info(