diff --git a/docs/pallas_integration.md b/docs/pallas_integration.md index b7cdad6..7c6d4a0 100644 --- a/docs/pallas_integration.md +++ b/docs/pallas_integration.md @@ -132,7 +132,56 @@ No authentication. No query parameters. --- -## 2. Health Tool +## 2. Conversation State & History (Daedalus-owned) + +**Pallas is stateless.** As of version `0.2.0`, every MCP `tools/call` is +handled by a freshly-created fast-agent instance that is disposed immediately +after the response. The Pallas process holds **no per-conversation memory +between calls**. This is enforced by `instance_scope="request"` in +`pallas.server` — do not override it. + +Conversation history is owned by the client (Daedalus). It must be replayed +on every turn through the `history` argument on `send_message`. + +### `send_message` Arguments + +Each agent's MCP tool accepts: + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `message` | `str` | yes | The new user turn as plain text. | +| `images` | `list[dict]` | no | Images attached to this turn only: `[{"data": base64, "mime_type": "image/png"}]`. Requires a vision-capable model. | +| `history` | `list[dict]` | no | Prior conversation history in chronological order. Entries have shape `{"role": "user" \| "assistant", "content": str, "images"?: [...]}`. When present, seeds the freshly-created agent's `message_history` *before* the new turn is executed. | +| `conversation_id` | `str` | no | Opaque identifier logged by Pallas for trace correlation. Pallas does not interpret or persist it. | + +### Rationale + +| Problem with shared state | Behaviour with `instance_scope="request"` | +|---------------------------|-------------------------------------------| +| Every caller sees the same `agent.message_history`, so different conversations leak into each other. | Each call gets a fresh, isolated instance. No cross-conversation bleed. | +| Process restart wipes all in-flight context. | There was no in-flight context to wipe — Daedalus reseeds it on the next turn. | +| Context-window trimming happens invisibly inside fast-agent. | Daedalus decides what history to send and how much, based on `capabilities.context_window` from the registry. | + +### `{agent}_history` Prompt + +Under `instance_scope="request"` the `{agent}_history` MCP prompt is still +registered for backward compatibility but always returns `[]` — history lives +on the client and there is no authoritative server-side copy. Existing +callers that invoke this prompt will not error, but should migrate to +tracking history client-side. + +### Backward Compatibility + +All new arguments are optional. A client that calls `send_message(message=...)` +with no `history` and no `conversation_id` gets a *zero-history* turn (the +agent sees only the current message). This is correct stateless behaviour — +it is never "the last conversation's context". Existing fast-agent MCP +clients that do not know about `history` will produce one-shot responses, +which is the appropriate and visible failure mode. + +--- + +## 3. Health Tool ### MCP tool: `get_health` @@ -239,7 +288,7 @@ The tool **must not** invoke the LLM. It should complete in under 1 second (3-se --- -## 3. Daedalus Consumption +## 4. Daedalus Consumption ### Registration Flow @@ -270,7 +319,7 @@ The tool **must not** invoke the LLM. It should complete in under 1 second (3-se --- -## 4. Agent Progress Notifications +## 5. Agent Progress Notifications Agent tool calls can take tens of seconds to minutes when the agent enters an agentic loop — calling sub-agents, searching the web, querying knowledge graphs, etc. During this time, the MCP tool call has not yet returned. Without progress feedback, the user sees a dead spinner. @@ -363,7 +412,7 @@ Progress messages follow predictable patterns: --- -## 5. Why MCP (Not REST) +## 6. Why MCP (Not REST) Pallas wraps each FastAgent instance in a `MultimodalAgentMCPServer` and serves it over StreamableHTTP. The MCP transport gives Daedalus: diff --git a/pallas/multimodal_server.py b/pallas/multimodal_server.py index a0d9ce8..8683b0e 100644 --- a/pallas/multimodal_server.py +++ b/pallas/multimodal_server.py @@ -1,23 +1,26 @@ """ MultimodalAgentMCPServer — AgentMCPServer subclass with images support. -Overrides register_agent_tools to accept an optional ``images`` parameter -on each agent's ``send_message`` tool, enabling callers to attach base64- -encoded images alongside the text message. +Overrides register_agent_tools to: -Drop-in replacement for AgentMCPServer: + * accept an optional ``images`` parameter on each agent's ``send_message`` + tool so callers can attach base64-encoded images alongside the text, + * accept an optional ``history`` parameter (list of role/content dicts) + so callers own conversation state and seed it on every turn, + * accept an optional ``conversation_id`` string that is recorded in + structured logs and progress notification metadata for end-to-end + trace correlation. - from pallas.multimodal_server import MultimodalAgentMCPServer - - server = MultimodalAgentMCPServer( - primary_instance=..., - create_instance=..., - dispose_instance=..., - instance_scope="shared", - ) +Drop-in replacement for AgentMCPServer. When combined with +``instance_scope="request"`` (the Pallas default), this gives a fully +stateless bridge: each MCP ``tools/call`` is handled by a freshly-created +fast-agent instance whose ``message_history`` is seeded from the caller's +``history`` argument — no cross-conversation bleed, no process-lifetime +memory, no restart amnesia. """ import time +from typing import Any import fast_agent.core.prompt from fast_agent.core.logging.logger import get_logger @@ -58,8 +61,82 @@ def _history_to_fastmcp_messages( return convert_to_fastmcp_messages(prompt_messages) +def _history_payload_to_multipart( + history: list[dict] | None, +) -> list[PromptMessageExtended]: + """Convert the caller-supplied ``history`` argument to PromptMessageExtended. + + Each entry must be a mapping with at least ``role`` ("user"|"assistant") + and ``content`` (str). An optional ``images`` list may contain + ``{"data": base64, "mime_type": str}`` entries; they are appended to the + same turn as additional ``ImageContent`` blocks. + + Entries that cannot be coerced (missing/invalid role, non-string content, + malformed images) are skipped with a warning — the remaining history is + still seeded so a single bad row cannot wipe an entire conversation. + """ + if not history: + return [] + + out: list[PromptMessageExtended] = [] + for idx, entry in enumerate(history): + if not isinstance(entry, dict): + logger.warning( + f"history entry {idx} is not a dict; skipping", + name="history_entry_invalid", + index=idx, + ) + continue + + role = entry.get("role") + if role not in ("user", "assistant"): + logger.warning( + f"history entry {idx} has invalid role {role!r}; skipping", + name="history_entry_invalid_role", + index=idx, + role=role, + ) + continue + + content_text = entry.get("content", "") + if not isinstance(content_text, str): + content_text = str(content_text or "") + + blocks: list[Any] = [] + if content_text: + blocks.append(TextContent(type="text", text=content_text)) + + images = entry.get("images") or [] + if isinstance(images, list): + for img_idx, img in enumerate(images): + if not isinstance(img, dict): + continue + data = img.get("data") + mime = img.get("mime_type") or img.get("mimeType") + if not data or not mime: + logger.warning( + f"history entry {idx} image {img_idx} missing data/mime_type", + name="history_image_invalid", + index=idx, + image_index=img_idx, + ) + continue + blocks.append( + ImageContent(type="image", data=data, mimeType=mime) + ) + + if not blocks: + # An empty turn conveys nothing — skip rather than emit a zero-block + # PromptMessageExtended which the LLM adapter would reject. + continue + + out.append(PromptMessageExtended(role=role, content=blocks)) + + return out + + class MultimodalAgentMCPServer(AgentMCPServer): - """AgentMCPServer with optional image attachment support on send_message.""" + """AgentMCPServer with optional image + history support on send_message.""" def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) @@ -87,7 +164,7 @@ class MultimodalAgentMCPServer(AgentMCPServer): return Response(content=data, media_type=CONTENT_TYPE_LATEST) def register_agent_tools(self, agent_name: str) -> None: - """Register a send_message tool that accepts text + optional images.""" + """Register a send_message tool that accepts text + optional images + history.""" self._registered_agents.add(agent_name) tool_description = ( @@ -114,7 +191,30 @@ class MultimodalAgentMCPServer(AgentMCPServer): message: str, ctx: MCPContext, images: list[dict] | None = None, + history: list[dict] | None = None, + conversation_id: str | None = None, ) -> str: + """Send a single turn to the agent. + + Parameters + ---------- + message: + The new user turn, plain text. + images: + Optional list of ``{"data": base64, "mime_type": str}`` image + attachments sent with this turn. Requires a vision-capable + model. + history: + Optional prior conversation history as a list of + ``{"role": "user"|"assistant", "content": str, "images": [...]}`` + entries in chronological order. When provided, seeds the + freshly-created agent's ``message_history`` before executing + the new turn. Pallas never persists this — the caller + (typically Daedalus) owns conversation state. + conversation_id: + Optional opaque identifier, logged for trace correlation. + Pallas does not interpret it. + """ saved_token = request_bearer_token.set(_get_request_bearer_token()) report_progress = self._build_progress_reporter(ctx) request_params = RequestParams( @@ -126,6 +226,21 @@ class MultimodalAgentMCPServer(AgentMCPServer): agent = instance.app[agent_name] agent_context = getattr(agent, "context", None) + # Seed the freshly-created instance's message_history from the + # caller-supplied history so the agent sees the full + # conversation the caller is tracking. Safe no-op when the + # instance is scoped "shared" because load_message_history + # replaces existing history in that case too — but callers + # should only pass history when talking to a "request"-scoped + # agent. With an empty/absent history this is skipped so + # shared-mode deployments retain today's behaviour. + history_count = 0 + if history: + seeded = _history_payload_to_multipart(history) + if seeded: + agent.load_message_history(seeded) + history_count = len(seeded) + if images: content: list = [TextContent(type="text", text=message)] for img in images: @@ -149,6 +264,9 @@ class MultimodalAgentMCPServer(AgentMCPServer): name="mcp_request_start", agent=agent_name, session=self._session_identifier(ctx), + conversation_id=conversation_id, + history_count=history_count, + image_count=len(images) if images else 0, ) response = await agent.send(payload, request_params=request_params) duration = time.perf_counter() - start @@ -158,6 +276,7 @@ class MultimodalAgentMCPServer(AgentMCPServer): agent=agent_name, duration=duration, session=self._session_identifier(ctx), + conversation_id=conversation_id, ) return response @@ -173,6 +292,20 @@ class MultimodalAgentMCPServer(AgentMCPServer): request_bearer_token.reset(saved_token) if self._instance_scope == "request": + # With request-scoped instances there is no persistent server-side + # history to expose — the caller owns it. We still register the + # prompt so clients that query `{agent}_history` get a well-formed + # empty response rather than a 404, but it always returns []. + @self.mcp_server.prompt( + name=f"{agent_name}_history", + description=( + f"Conversation history for the {agent_name} agent " + "(always empty — Pallas is stateless; the caller owns history)" + ), + ) + async def get_history_prompt_stateless(ctx: MCPContext) -> list[Message]: + return [] + return @self.mcp_server.prompt( diff --git a/pallas/server.py b/pallas/server.py index 6ee2022..b05447b 100644 --- a/pallas/server.py +++ b/pallas/server.py @@ -218,11 +218,24 @@ async def _start_agent(name: str, agents: dict[str, dict]) -> None: async with fast_instance.run(): primary_instance = fast_instance._server_managed_instances[0] + # Stateless per request: each MCP `tools/call` gets a freshly-created + # agent instance which is disposed immediately after the response. + # Conversation history is owned by the caller (Daedalus) and supplied + # on every turn via the `history` argument on `send_message` — see + # multimodal_server.MultimodalAgentMCPServer.register_agent_tools. + # + # Why this matters: + # * "shared" leaks one conversation's history into the next + # because all callers see the same `agent.message_history`. + # * "shared" also silently loses everything on process restart, + # breaking the "Pallas is ephemeral" contract. + # With "request" the Pallas process holds no per-conversation state + # and the LLM sees exactly what Daedalus asks it to see. server = MultimodalAgentMCPServer( primary_instance=primary_instance, create_instance=fast_instance._server_instance_factory, dispose_instance=fast_instance._server_instance_dispose, - instance_scope="shared", + instance_scope="request", server_name=f"{fast_instance.name}-MCP-Server", host="0.0.0.0", get_registry_version=fast_instance._get_registry_version, diff --git a/pyproject.toml b/pyproject.toml index f289254..ba86ec6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pallas-mcp" -version = "0.1.0" +version = "0.2.0" description = "FastAgent MCP Bridge — generic runtime for serving FastAgent agents over StreamableHTTP" requires-python = ">=3.13.5" dependencies = [