feat(server): add force_create_media_buy_arm + force_task_completion controller scenarios (#282)

bokelley · claude · web-flow · commit 9818250c75bc · 2026-04-29T21:22:42.000-04:00
* feat(server): add force_create_media_buy_arm + force_task_completion controller scenarios Adds two new comply_test_controller scenarios for AdCP 3.0.1 storyboard parity. Sellers running the create_media_buy_async.yaml storyboard suite against a Python reference seller now grade `passing` rather than `not_applicable` on the submitted-arm phase. - Adds `force_create_media_buy_arm` and `force_task_completion` to SCENARIOS, TestControllerStore abstract base, and the dispatcher in _handle_test_controller. - Validates arm enum, conditional task_id-when-submitted, char limits, 256 KB result cap, and whitespace task_id stripping. - Updates register_test_controller inline schema (derived from SCENARIOS to prevent drift) and mcp_tools.py ADCP_TOOL_DEFINITIONS enum to include both. - Adds account field to both inline schemas so storyboard runners can drive cross-account isolation. - 20 new tests at parity with Node training-agent nine-test pattern. Closes #281 https://claude.ai/code/session_01KaGEJKsjnTEuLF6qnaRFqQ * fix(server): strip task_id for input-required arm; add coverage test Forced.extra='forbid' in the comply_test_controller response schema means a store that echoes task_id on arm='input-required' would produce an invalid Forced object. The dispatcher now nullifies task_id before the store call when arm='input-required', preventing protocol drift regardless of store implementation. Adds one test: test_arm_task_id_stripped_for_input_required. https://claude.ai/code/session_01KaGEJKsjnTEuLF6qnaRFqQ * refactor(server): address pre-PR review feedback - Extract _accepts_kwarg(method, name) so both context and account pass-through share one signature-inspection impl; _accepts_context_kwarg delegates to it. - Gate account kwarg via _accepts_kwarg in the shared `extra` dict so stores that omit account= don't receive an unexpected keyword and silently fall to INTERNAL_ERROR. - Replace len(str(message)) guard with isinstance + len for consistency with task_id handling. - Import SCENARIOS from test_controller in mcp_tools.py so the comply_test_controller inputSchema enum is always derived from the canonical list and can't drift on the next scenario addition. https://claude.ai/code/session_01KaGEJKsjnTEuLF6qnaRFqQ --------- Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/src/adcp/server/mcp_tools.py b/src/adcp/server/mcp_tools.py
@@ -24,6 +24,7 @@
 from typing import Any
 
 from adcp.server.base import ADCPHandler, ToolContext
+from adcp.server.test_controller import SCENARIOS as _CONTROLLER_SCENARIOS
 from adcp.validation.client_hooks import ValidationHookConfig
 
 logger = logging.getLogger(__name__)
@@ -842,22 +843,14 @@
                 "account": {"type": "object"},
                 "scenario": {
                     "type": "string",
-                    "enum": [
-                        "list_scenarios",
-                        "force_creative_status",
-                        "force_account_status",
-                        "force_media_buy_status",
-                        "force_session_status",
-                        "simulate_delivery",
-                        "simulate_budget_spend",
-                        "seed_product",
-                        "seed_pricing_option",
-                        "seed_creative",
-                        "seed_plan",
-                        "seed_media_buy",
-                    ],
+                    # Derived from test_controller.SCENARIOS so the static stub
+                    # matches the dispatcher; the Pydantic-generated path also
+                    # carries the new names because #292 ships them in the
+                    # comply-test-controller-request schema.
+                    "enum": ["list_scenarios"] + _CONTROLLER_SCENARIOS,
                 },
                 "params": {"type": "object"},
+                "account": {"type": "object"},
                 "context": {"type": "object"},
             },
             "required": ["scenario"],
diff --git a/src/adcp/server/test_controller.py b/src/adcp/server/test_controller.py
@@ -49,6 +49,8 @@ async def force_account_status(self, account_id, status):
     "force_creative_status",
     "force_account_status",
     "force_media_buy_status",
+    "force_create_media_buy_arm",
+    "force_task_completion",
     "force_session_status",
     "simulate_delivery",
     "simulate_budget_spend",
@@ -60,6 +62,10 @@ async def force_account_status(self, account_id, status):
     "seed_media_buy",
 ]
 
+_MAX_TASK_ID = 128
+_MAX_MESSAGE = 2000
+_MAX_RESULT_BYTES = 256 * 1024  # 256 KB soft cap per AdCP 3.0.1
+
 
 class TestControllerError(Exception):
     """Typed error for test controller store methods.
@@ -165,6 +171,89 @@ async def force_session_status(
         """
         raise NotImplementedError
 
+    async def force_create_media_buy_arm(
+        self,
+        arm: str,
+        task_id: str | None = None,
+        message: str | None = None,
+        *,
+        account: dict[str, Any] | None = None,
+        context: ToolContext | None = None,
+    ) -> dict[str, Any]:
+        """Register a single-shot directive for the next create_media_buy call.
+
+        The directive is consumed by the next create_media_buy call from the
+        same authenticated sandbox account, then cleared. A second registration
+        before consumption overwrites the first.
+
+        Args:
+            arm: Response arm — ``'submitted'`` or ``'input-required'``.
+            task_id: Required when ``arm='submitted'``. The seller MUST emit
+                this exact value on the next create_media_buy task envelope
+                and accept it on subsequent tasks/get calls within the same
+                sandbox account. Max 128 chars.
+            message: Optional plain-text note surfaced on the response.
+                Max 2000 chars.
+            account: Caller-supplied account object from the MCP request.
+                Implementations use this for single-shot-per-account isolation.
+            context: Optional ToolContext from the server's context_factory.
+
+        Returns:
+            ForcedDirectiveSuccess::
+
+                {"success": True, "forced": {"arm": str, "task_id"?: str}}
+
+        Raises:
+            TestControllerError: with code ``"NOT_FOUND"`` if the caller
+                account is not recognized, or ``"INVALID_PARAMS"`` on
+                validation failure.
+        """
+        raise NotImplementedError
+
+    async def force_task_completion(
+        self,
+        task_id: str,
+        result: dict[str, Any],
+        *,
+        account: dict[str, Any] | None = None,
+        context: ToolContext | None = None,
+    ) -> dict[str, Any]:
+        """Resolve a previously-submitted task to ``'completed'``.
+
+        Isolation and idempotency contract:
+
+        - **Cross-account replay** — raise ``TestControllerError("NOT_FOUND", ...)``
+          when the task_id was registered by a different sandbox account.
+        - **Identical-params replay** — idempotent; return the same
+          ``StateTransitionSuccess``.
+        - **Diverging-params replay** against a terminal task — raise
+          ``TestControllerError("INVALID_TRANSITION", ...,
+          current_state="completed")``.
+
+        Args:
+            task_id: Task handle to resolve. Max 128 chars.
+            result: Completion payload (non-empty object). Implementations
+                SHOULD validate it against the response branch for the task's
+                original method and MUST reject payloads that fail that check
+                with ``TestControllerError("INVALID_PARAMS", ...)``.
+            account: Caller-supplied account object from the MCP request.
+                Used for cross-account isolation.
+            context: Optional ToolContext from the server's context_factory.
+
+        Returns:
+            StateTransitionSuccess::
+
+                {"success": True, "previous_state": "submitted",
+                 "current_state": "completed"}
+
+        Raises:
+            TestControllerError: with code ``"NOT_FOUND"`` if the task_id
+                is unknown or owned by a different account,
+                ``"INVALID_TRANSITION"`` if the task is already terminal and
+                params diverge, or ``"INVALID_PARAMS"`` on validation failure.
+        """
+        raise NotImplementedError
+
     async def simulate_delivery(
         self,
         media_buy_id: str,
@@ -300,34 +389,23 @@ def _controller_error(error: str, detail: str, current_state: str | None = None)
     return resp
 
 
-def _accepts_context_kwarg(method: Any) -> bool:
-    """True when ``method``'s signature accepts ``context=`` by keyword.
+def _accepts_kwarg(method: Any, name: str) -> bool:
+    """True when ``method``'s signature accepts ``name`` as a keyword argument.
 
-    TestControllerStore subclasses written against the original API
-    (pre-#227) don't declare ``context``; passing it would raise
-    ``TypeError`` at the call site. Signature inspection keeps the
-    dispatcher backward-compatible while letting stores opt in to
-    header-driven context by simply adding ``context=None`` to their
-    override.
+    Used by the dispatcher to decide whether to pass optional kwargs
+    (``context``, ``account``) to store methods. Methods that don't
+    declare the kwarg keep working unchanged; methods that do get the
+    value threaded in.
 
     Counts as an opt-in:
 
-    - ``*, context: ...`` — keyword-only (the documented recipe).
-    - ``context: ...`` as a regular positional-or-keyword parameter.
-    - ``**kwargs`` — accepts any keyword, including ``context``.
+    - ``*, name: ...`` — keyword-only (the documented recipe).
+    - ``name: ...`` as a regular positional-or-keyword parameter.
+    - ``**kwargs`` — accepts any keyword.
 
     Does **not** count:
 
-    - ``context`` as positional-only (before ``/``) — passing by
-      keyword raises ``TypeError``.
-    - ``context`` as ``*args`` (it's never a variadic positional).
-
-    Caveat: ``inspect.signature`` follows ``__wrapped__`` set by
-    ``@functools.wraps``. A decorator that wraps a legacy store method
-    and exposes the legacy signature will look "not opted in" even if
-    the wrapper itself would accept ``context``. This matches the
-    behavior callers expect — the wrapped callable signature is the
-    authoritative contract.
+    - ``name`` as positional-only (before ``/``).
     """
     try:
         sig = inspect.signature(method)
@@ -340,11 +418,16 @@ def _accepts_context_kwarg(method: Any) -> bool:
     for param in sig.parameters.values():
         if param.kind == inspect.Parameter.VAR_KEYWORD:
             return True
-        if param.name == "context" and param.kind in allowed:
+        if param.name == name and param.kind in allowed:
             return True
     return False
 
 
+def _accepts_context_kwarg(method: Any) -> bool:
+    """True when ``method``'s signature accepts ``context=`` by keyword."""
+    return _accepts_kwarg(method, "context")
+
+
 async def _handle_test_controller(
     store: TestControllerStore,
     params: dict[str, Any],
@@ -385,6 +468,9 @@ async def _handle_test_controller(
     extra: dict[str, Any] = {}
     if context is not None and _accepts_context_kwarg(method):
         extra["context"] = context
+    account = params.get("account")
+    if account is not None and _accepts_kwarg(method, "account"):
+        extra["account"] = account
 
     try:
         if scenario == "force_creative_status":
@@ -414,6 +500,78 @@ async def _handle_test_controller(
                 termination_reason=scenario_params.get("termination_reason"),
                 **extra,
             )
+        elif scenario == "force_create_media_buy_arm":
+            arm = scenario_params.get("arm") or ""
+            if arm not in ("submitted", "input-required"):
+                return _controller_error(
+                    "INVALID_PARAMS",
+                    "arm must be 'submitted' or 'input-required'",
+                )
+            raw_task_id = scenario_params.get("task_id")
+            task_id: str | None = (
+                raw_task_id.strip() if isinstance(raw_task_id, str) else None
+            )
+            if not task_id:
+                task_id = None
+            if arm == "submitted" and not task_id:
+                return _controller_error(
+                    "INVALID_PARAMS",
+                    "task_id is required when arm is 'submitted'",
+                )
+            if task_id and len(task_id) > _MAX_TASK_ID:
+                return _controller_error(
+                    "INVALID_PARAMS",
+                    f"task_id must be at most {_MAX_TASK_ID} characters",
+                )
+            # Forced.task_id is only valid for arm='submitted'; strip it for
+            # 'input-required' so stores can't inadvertently echo it into the
+            # Forced object (which has extra="forbid" in the response schema).
+            if arm == "input-required":
+                task_id = None
+            message = scenario_params.get("message")
+            if message is not None and (
+                not isinstance(message, str) or len(message) > _MAX_MESSAGE
+            ):
+                return _controller_error(
+                    "INVALID_PARAMS",
+                    f"message must be a string of at most {_MAX_MESSAGE} characters",
+                )
+            result = await method(
+                arm=arm,
+                task_id=task_id,
+                message=message,
+                **extra,
+            )
+        elif scenario == "force_task_completion":
+            raw_task_id = scenario_params.get("task_id")
+            task_id = raw_task_id.strip() if isinstance(raw_task_id, str) else None
+            if not task_id:
+                return _controller_error(
+                    "INVALID_PARAMS",
+                    "Missing required parameter: 'task_id'",
+                )
+            if len(task_id) > _MAX_TASK_ID:
+                return _controller_error(
+                    "INVALID_PARAMS",
+                    f"task_id must be at most {_MAX_TASK_ID} characters",
+                )
+            result_value = scenario_params.get("result")
+            if not isinstance(result_value, dict) or not result_value:
+                return _controller_error(
+                    "INVALID_PARAMS",
+                    "result must be a non-empty object",
+                )
+            result_bytes = len(json.dumps(result_value).encode("utf-8"))
+            if result_bytes > _MAX_RESULT_BYTES:
+                return _controller_error(
+                    "INVALID_PARAMS",
+                    f"result payload exceeds {_MAX_RESULT_BYTES // 1024} KB limit",
+                )
+            result = await method(
+                task_id=task_id,
+                result=result_value,
+                **extra,
+            )
         elif scenario == "simulate_delivery":
             result = await method(
                 media_buy_id=scenario_params["media_buy_id"],
@@ -546,29 +704,19 @@ async def comply_test_controller(**kwargs: Any) -> str:
         description="Compliance test controller. Sandbox only, not for production use.",
     )
 
-    # Override schema with the proper comply_test_controller inputSchema
+    # Override schema with the proper comply_test_controller inputSchema.
+    # Derived from SCENARIOS so it can't drift from the dispatcher.
     tool.parameters = {
         "type": "object",
         "properties": {
             "account": {"type": "object"},
             "scenario": {
                 "type": "string",
-                "enum": [
-                    "list_scenarios",
-                    "force_creative_status",
-                    "force_account_status",
-                    "force_media_buy_status",
-                    "force_session_status",
-                    "simulate_delivery",
-                    "simulate_budget_spend",
-                    "seed_product",
-                    "seed_pricing_option",
-                    "seed_creative",
-                    "seed_plan",
-                    "seed_media_buy",
-                ],
+                # Derived from SCENARIOS so the enum never drifts from the dispatcher.
+                "enum": ["list_scenarios"] + SCENARIOS,
             },
             "params": {"type": "object"},
+            "account": {"type": "object"},
             "context": {"type": "object"},
         },
         "required": ["scenario"],
diff --git a/tests/test_force_create_media_buy_arm_and_force_task_completion.py b/tests/test_force_create_media_buy_arm_and_force_task_completion.py