feat: relay sub-agent activity chunks to parent MCP context

2026-05-28 15:36:14 -04:00
parent d387650bf2
commit 0b0a8f37a4
6 changed files with 854 additions and 95 deletions
--- a/tests/test_assistant_stream.py
+++ b/tests/test_assistant_stream.py
@@ -1,9 +1,9 @@
 """Tests for ``pallas.assistant_stream``.

-Drives the ``after_llm_call`` hook with handcrafted ``PromptMessageExtended``
-objects and asserts the resulting MCP ``send_log_message`` payload shape.
-No fast-agent runtime is involved — the hook is a pure async function and
-the MCP context is faked.
+Drives the ``after_llm_call`` / ``before_tool_call`` / ``after_tool_call``
+hooks with handcrafted ``PromptMessageExtended`` objects and asserts the
+resulting MCP ``send_log_message`` payload shape.  No fast-agent runtime is
+involved — the hooks are pure async functions and the MCP context is faked.

 Tests use ``asyncio.run`` directly to match the convention in
 ``tests/test_health.py`` and ``tests/test_mantle_shims.py`` (pallas has no
@@ -20,12 +20,14 @@ from fast_agent.types.llm_stop_reason import LlmStopReason
 from mcp.types import (
    CallToolRequest,
    CallToolRequestParams,
+    CallToolResult,
    ImageContent,
    TextContent,
 )

 from pallas.assistant_stream import (
    KIND,
+    KIND_RESULTS,
    LOGGER_NAME,
    SCHEMA_VERSION,
    AssistantChunkEmitter,
@@ -80,13 +82,22 @@ class _FakeAgent:
        self.tool_runner_hooks: ToolRunnerHooks | None = None


-def _tool_call(call_id: str, name: str, arguments: dict | None = None) -> CallToolRequest:
+def _tool_call(name: str, arguments: dict | None = None) -> CallToolRequest:
    return CallToolRequest(
        method="tools/call",
        params=CallToolRequestParams(name=name, arguments=arguments or {}),
    )


+def _tool_result(
+    text: str | None = "ok", *, is_error: bool = False
+) -> CallToolResult:
+    content: list = []
+    if text is not None:
+        content.append(TextContent(type="text", text=text))
+    return CallToolResult(content=content, isError=is_error)
+
+
 # ── Tests ────────────────────────────────────────────────────────────────────


@@ -101,7 +112,7 @@ def test_emit_text_only_iteration() -> None:
        stop_reason=LlmStopReason.END_TURN,
    )

-    _run(emitter._emit(msg))
+    _run(emitter._emit_assistant_chunk(msg))

    assert len(ctx.session.calls) == 1
    call = ctx.session.calls[0]
@@ -120,8 +131,8 @@ def test_emit_text_only_iteration() -> None:
    assert data["tool_calls"] == []


-def test_emit_text_with_tool_call_iteration() -> None:
-    """An assistant turn that emits text and then calls a tool ships both."""
+def test_emit_text_with_tool_call_iteration_carries_args_preview() -> None:
+    """An iteration that requests a tool ships name, server prefix, and args preview."""
    ctx = _FakeContext()
    emitter = AssistantChunkEmitter(ctx, agent_name="alan", conversation_id="conv-1")

@@ -129,33 +140,38 @@ def test_emit_text_with_tool_call_iteration() -> None:
        role="assistant",
        content=[TextContent(type="text", text="Logging this…")],
        tool_calls={
-            "toolu_1": _tool_call("toolu_1", "time__get_current_time"),
+            "toolu_1": _tool_call(
+                "argos-search_web",
+                {"query": "ducati v2 vs v4 reliability"},
+            ),
        },
        stop_reason=LlmStopReason.TOOL_USE,
    )

-    _run(emitter._emit(msg))
+    _run(emitter._emit_assistant_chunk(msg))

    assert len(ctx.session.calls) == 1
    data = ctx.session.calls[0]["data"]
    assert data["stop_reason"] == "toolUse"
    assert data["content"] == [{"type": "text", "text": "Logging this…"}]
-    assert data["tool_calls"] == [{"id": "toolu_1", "name": "time__get_current_time"}]
+    assert data["tool_calls"] == [
+        {
+            "id": "toolu_1",
+            "name": "argos-search_web",
+            "server": "argos",
+            "arguments_preview": "ducati v2 vs v4 reliability",
+        }
+    ]


 def test_emit_skips_completely_empty_iteration() -> None:
-    """A turn with no text blocks and no tool calls emits nothing.
-
-    Tool-call lifecycle is already covered by notifications/progress.  An
-    empty assistant_chunk would just be noise on the wire and a no-op
-    update on the live bubble.
-    """
+    """A turn with no text blocks and no tool calls emits nothing."""
    ctx = _FakeContext()
    emitter = AssistantChunkEmitter(ctx, agent_name="alan", conversation_id="conv-1")

    msg = PromptMessageExtended(role="assistant", content=[])

-    _run(emitter._emit(msg))
+    _run(emitter._emit_assistant_chunk(msg))

    assert ctx.session.calls == []
    # Iteration counter still bumps so subsequent chunks aren't mis-numbered.
@@ -173,7 +189,7 @@ def test_emit_image_block_passes_through_with_mime_type_renamed() -> None:
        stop_reason=LlmStopReason.END_TURN,
    )

-    _run(emitter._emit(msg))
+    _run(emitter._emit_assistant_chunk(msg))

    data = ctx.session.calls[0]["data"]
    assert data["content"] == [
@@ -188,21 +204,21 @@ def test_emit_iterations_are_numbered_in_order() -> None:
    emitter = AssistantChunkEmitter(ctx, agent_name="alan", conversation_id="conv-1")

    async def drive() -> None:
-        await emitter._emit(
+        await emitter._emit_assistant_chunk(
            PromptMessageExtended(
                role="assistant",
                content=[TextContent(type="text", text="first")],
                stop_reason=LlmStopReason.TOOL_USE,
            )
        )
-        await emitter._emit(
+        await emitter._emit_assistant_chunk(
            PromptMessageExtended(
                role="assistant",
                content=[TextContent(type="text", text="second")],
                stop_reason=LlmStopReason.TOOL_USE,
            )
        )
-        await emitter._emit(
+        await emitter._emit_assistant_chunk(
            PromptMessageExtended(
                role="assistant",
                content=[TextContent(type="text", text="third — done")],
@@ -232,9 +248,96 @@ def test_emit_swallows_session_failure() -> None:
        stop_reason=LlmStopReason.END_TURN,
    )

-    # Must not raise.
-    _run(emitter._emit(msg))
-    # And no successful calls were recorded (fail_with raised before append).
+    _run(emitter._emit_assistant_chunk(msg))
+    assert ctx.session.calls == []
+
+
+def test_emit_tool_results_pairs_call_id_with_iteration() -> None:
+    """``after_tool_call`` ships a results payload keyed to the iteration that called."""
+    ctx = _FakeContext()
+    emitter = AssistantChunkEmitter(ctx, agent_name="alan", conversation_id="conv-1")
+
+    # Iteration 1: text + tool request.
+    iter1 = PromptMessageExtended(
+        role="assistant",
+        content=[TextContent(type="text", text="Searching…")],
+        tool_calls={"toolu_1": _tool_call("argos-search_web", {"query": "foo"})},
+        stop_reason=LlmStopReason.TOOL_USE,
+    )
+
+    async def drive() -> None:
+        # Simulate the runner: after_llm_call → before_tool_call → after_tool_call.
+        await emitter._emit_assistant_chunk(iter1)
+        await emitter.as_before_tool_call_hook()(None, iter1)
+        # The "user" message produced by the tool runner carries tool_results.
+        tool_msg = PromptMessageExtended(
+            role="user",
+            content=[],
+            tool_results={"toolu_1": _tool_result("12 results found")},
+        )
+        await emitter.as_after_tool_call_hook()(None, tool_msg)
+
+    _run(drive())
+
+    assert len(ctx.session.calls) == 2
+    chunk = ctx.session.calls[0]["data"]
+    results = ctx.session.calls[1]["data"]
+    assert chunk["kind"] == KIND
+    assert chunk["iteration"] == 1
+    assert results["kind"] == KIND_RESULTS
+    assert results["iteration"] == 1
+    assert results["agent"] == "alan"
+    assert results["conversation_id"] == "conv-1"
+    assert len(results["results"]) == 1
+    entry = results["results"][0]
+    assert entry["id"] == "toolu_1"
+    assert entry["ok"] is True
+    assert entry["result_preview"] == "12 results found"
+    assert isinstance(entry["duration_ms"], int)
+    assert entry["duration_ms"] >= 0
+
+
+def test_emit_tool_results_marks_error() -> None:
+    """A failing tool call surfaces ``ok: False`` in the results entry."""
+    ctx = _FakeContext()
+    emitter = AssistantChunkEmitter(ctx, agent_name="alan", conversation_id="c")
+
+    iter1 = PromptMessageExtended(
+        role="assistant",
+        content=[],
+        tool_calls={"toolu_1": _tool_call("argos-search_web", {"query": "foo"})},
+        stop_reason=LlmStopReason.TOOL_USE,
+    )
+
+    async def drive() -> None:
+        await emitter._emit_assistant_chunk(iter1)
+        await emitter.as_before_tool_call_hook()(None, iter1)
+        tool_msg = PromptMessageExtended(
+            role="user",
+            content=[],
+            tool_results={
+                "toolu_1": _tool_result("connection refused", is_error=True)
+            },
+        )
+        await emitter.as_after_tool_call_hook()(None, tool_msg)
+
+    _run(drive())
+
+    # iter1 is a pure-tool turn (no text + has tool_calls) → still emits an
+    # assistant_chunk because tool_calls is non-empty.
+    assert len(ctx.session.calls) == 2
+    results = ctx.session.calls[1]["data"]
+    assert results["results"][0]["ok"] is False
+    assert results["results"][0]["result_preview"] == "connection refused"
+
+
+def test_emit_tool_results_empty_when_no_results() -> None:
+    """An ``after_tool_call`` carrying no tool_results emits nothing."""
+    ctx = _FakeContext()
+    emitter = AssistantChunkEmitter(ctx, agent_name="alan", conversation_id="c")
+
+    msg = PromptMessageExtended(role="user", content=[], tool_results=None)
+    _run(emitter.as_after_tool_call_hook()(None, msg))
    assert ctx.session.calls == []


@@ -262,23 +365,22 @@ def test_install_for_request_merges_with_existing_after_llm_call() -> None:

    _run(agent.tool_runner_hooks.after_llm_call(None, msg))

-    # Base ran, and the assistant-stream emitter shipped the chunk.
    assert seen == ["base"]
    assert len(ctx.session.calls) == 1
    assert ctx.session.calls[0]["data"]["content"] == [
        {"type": "text", "text": "hi"}
    ]

-    # Other hook slots stay untouched.
-    assert agent.tool_runner_hooks.before_llm_call is None
-    assert agent.tool_runner_hooks.before_tool_call is None
-    assert agent.tool_runner_hooks.after_tool_call is None
-    assert agent.tool_runner_hooks.after_turn_complete is None
+    # All four hook slots used by the emitter are now bound; the restore
+    # call puts the original hooks back exactly.
+    assert agent.tool_runner_hooks.before_tool_call is not None
+    assert agent.tool_runner_hooks.after_tool_call is not None

-    # Restore puts the original hooks back exactly.
    restore()
    assert agent.tool_runner_hooks is not None
    assert agent.tool_runner_hooks.after_llm_call is base_after
+    assert agent.tool_runner_hooks.before_tool_call is None
+    assert agent.tool_runner_hooks.after_tool_call is None


 def test_install_for_request_with_no_existing_hooks() -> None:
@@ -293,6 +395,8 @@ def test_install_for_request_with_no_existing_hooks() -> None:

    assert agent.tool_runner_hooks is not None
    assert agent.tool_runner_hooks.after_llm_call is not None
+    assert agent.tool_runner_hooks.before_tool_call is not None
+    assert agent.tool_runner_hooks.after_tool_call is not None

    restore()
    assert agent.tool_runner_hooks is None