Skip to content

Commit d3536c2

Browse files
bokelleyclaude
andcommitted
fix(signing): align brand-authz with ADCP #3690 security profile
Stage 4 spec research surfaced two conformance gaps in the just-landed brand_authz code. Both are fix-now-before-review issues, not stage-4 scope; landing on this PR keeps the security-relevant deltas in one review pass instead of shipping a known-divergent impl behind a "spec-conformant" label. 1. **PSL PRIVATE section must be in scope** (security) Per security.mdx §"Origin binding": "ICANN+PRIVATE sections both in scope so platforms like vercel.app, pages.dev, github.io are treated as suffixes". Without include_psl_private_domains=True, an attacker's ``attacker.vercel.app`` and a victim's ``victim.vercel.app`` would share an eTLD+1 of ``vercel.app`` — and the attacker's deployment would falsely satisfy the binding check against the victim's vercel-hosted brand.json. Fix: enable include_psl_private_domains on the singleton extractor. Regression test covers vercel.app / pages.dev / github.io. 2. **agents[] match must be byte-equal, not canonicalized** Per security.mdx §"Discovering an agent's signing keys": "Find the entry in agents[] whose url byte-equals A (no canonicalization at this step — the most common failure mode is a trailing-slash or scheme mismatch)." Canonicalizing silently authorizes URLs that drift from the operator's declaration. Also per spec: multiple matches → ``request_signature_brand_json_ambiguous``. Schema does not constrain agents[] to be unique-by-URL, so dupes from operator misconfig must fail closed rather than silently picking the first match. Fix: _find_listed_agent → _find_listed_agents (plural, returns full match list). Byte-equal comparison. New ``agent_ambiguous`` reason on the BrandAuthorizationReason taxonomy (maps to the spec's ``request_signature_brand_json_ambiguous`` at the framework boundary in stage 5). New tests cover trailing-slash mismatch, case mismatch, and duplicate entries. Removes the now-unused _canonicalize_agent_url helper and its imports. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 4008f09 commit d3536c2

4 files changed

Lines changed: 159 additions & 53 deletions

File tree

src/adcp/signing/brand_authz.py

Lines changed: 55 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
from collections.abc import Callable
3939
from dataclasses import dataclass
4040
from typing import Any, Literal, Protocol, runtime_checkable
41-
from urllib.parse import urlsplit
4241

4342
from adcp.signing.brand_jwks import (
4443
DEFAULT_BRAND_JSON_TIMEOUT_SECONDS,
@@ -51,7 +50,6 @@
5150
BrandJsonResolverError,
5251
_BrandJsonFetcher,
5352
_BrandJsonSnapshot,
54-
_canonicalize_url,
5553
_ClientFactory,
5654
)
5755
from adcp.signing.etld import host_from, registrable_domain, same_registrable_domain
@@ -64,6 +62,7 @@
6462
"etld1_match",
6563
"operator_delegation",
6664
"agent_not_listed",
65+
"agent_ambiguous",
6766
"agent_type_mismatch",
6867
"binding_failed",
6968
"brand_json_unavailable",
@@ -220,22 +219,39 @@ async def check(
220219
fetch_error=snap,
221220
)
222221

223-
matched = _find_listed_agent(
222+
listing = _find_listed_agents(
224223
snap.data,
225224
agent_url=agent_url,
226225
agent_type=agent_type,
227226
brand_id=brand_id,
228227
)
229228

230-
if matched is None:
229+
if len(listing) == 0:
231230
# Distinguish "not present at all" from "present but wrong type":
232-
# the latter is a stronger signal of misconfiguration.
231+
# the latter is a stronger signal of misconfiguration. Spec
232+
# folds both into ``request_signature_agent_not_in_brand_json``;
233+
# we keep the finer reason so the framework can choose to
234+
# surface it in diagnostics.
233235
if agent_type is not None and _has_listed_agent_at(
234236
snap.data, agent_url=agent_url, brand_id=brand_id
235237
):
236238
return BrandAuthorizationResult(False, reason="agent_type_mismatch")
237239
return BrandAuthorizationResult(False, reason="agent_not_listed")
238240

241+
if len(listing) > 1:
242+
# Multiple agents[] entries byte-equal the agent URL. Per
243+
# ADCP #3690 this maps to ``request_signature_brand_json_ambiguous``:
244+
# the brand.json schema does not constrain agents[] to be
245+
# unique-by-URL, so an operator misconfig can produce duplicates
246+
# — fail closed rather than silently picking one.
247+
return BrandAuthorizationResult(
248+
False,
249+
reason="agent_ambiguous",
250+
matched_agent_url=listing[0].url,
251+
)
252+
253+
matched = listing[0]
254+
239255
# Step 2a: eTLD+1 binding.
240256
if same_registrable_domain(agent_url, brand_host):
241257
return BrandAuthorizationResult(
@@ -363,38 +379,51 @@ class _ListedAgent:
363379
type: BrandAgentType | None
364380

365381

366-
def _find_listed_agent(
382+
def _find_listed_agents(
367383
data: dict[str, Any],
368384
*,
369385
agent_url: str,
370386
agent_type: BrandAgentType | None,
371387
brand_id: str | None,
372-
) -> _ListedAgent | None:
373-
"""Search ``agents[]`` arrays for an entry matching ``agent_url``.
374-
375-
Walks (in order) top-level ``agents``, ``house.agents``, and per-
376-
brand ``brands[].agents`` — bounded by ``brand_id`` when provided.
377-
Returns the first canonical-URL match (with ``type`` filter when
378-
set); ``None`` if no entry matches.
388+
) -> list[_ListedAgent]:
389+
"""Search ``agents[]`` arrays for entries matching ``agent_url``.
390+
391+
Walks top-level ``agents``, ``house.agents``, and per-brand
392+
``brands[].agents`` (bounded by ``brand_id`` when provided),
393+
returning every entry whose ``url`` **byte-equals** ``agent_url``.
394+
395+
**Byte-equal match by spec mandate.** Per ADCP #3690 security
396+
profile: "Find the entry in ``agents[]`` whose ``url`` byte-equals
397+
A (no canonicalization at this step). The most common failure
398+
mode is a trailing-slash or scheme mismatch (e.g.,
399+
``https://x.com/mcp`` ≠ ``https://x.com/mcp/``)." Canonicalizing
400+
would silently authorize agents whose URL is "close enough" to
401+
what the brand declared — operators must be deliberate about what
402+
they list.
403+
404+
Returning the full match list (rather than the first match) lets
405+
the caller distinguish ``agent_not_listed`` (0 matches),
406+
``agent_type_mismatch`` (0 type-filtered matches but the URL is
407+
listed), and ``agent_ambiguous`` (>1 matches — operator misconfig,
408+
spec maps to ``request_signature_brand_json_ambiguous``).
379409
"""
380-
target = _canonicalize_agent_url(agent_url)
381-
410+
matches: list[_ListedAgent] = []
382411
for entry in _walk_agents(data, brand_id=brand_id):
383412
if not isinstance(entry, dict):
384413
continue
385414
url = entry.get("url")
386-
if not isinstance(url, str):
387-
continue
388-
if _canonicalize_agent_url(url) != target:
415+
if not isinstance(url, str) or url != agent_url:
389416
continue
390417
if agent_type is not None and entry.get("type") != agent_type:
391418
continue
392419
listed_type = entry.get("type")
393-
return _ListedAgent(
394-
url=url,
395-
type=listed_type if isinstance(listed_type, str) else None, # type: ignore[arg-type]
420+
matches.append(
421+
_ListedAgent(
422+
url=url,
423+
type=listed_type if isinstance(listed_type, str) else None, # type: ignore[arg-type]
424+
)
396425
)
397-
return None
426+
return matches
398427

399428

400429
def _has_listed_agent_at(
@@ -403,15 +432,14 @@ def _has_listed_agent_at(
403432
agent_url: str,
404433
brand_id: str | None,
405434
) -> bool:
406-
"""Return True if ``agent_url`` appears in ``agents[]`` regardless
407-
of ``type`` — used to distinguish ``agent_type_mismatch`` from
408-
``agent_not_listed`` for caller diagnostics."""
409-
target = _canonicalize_agent_url(agent_url)
435+
"""Return True if ``agent_url`` byte-equals any listed ``agents[].url``
436+
regardless of ``type`` — used to distinguish ``agent_type_mismatch``
437+
from ``agent_not_listed`` for caller diagnostics."""
410438
for entry in _walk_agents(data, brand_id=brand_id):
411439
if not isinstance(entry, dict):
412440
continue
413441
url = entry.get("url")
414-
if isinstance(url, str) and _canonicalize_agent_url(url) == target:
442+
if isinstance(url, str) and url == agent_url:
415443
return True
416444
return False
417445

@@ -518,31 +546,6 @@ def _find_authorized_operator(
518546
return None
519547

520548

521-
def _canonicalize_agent_url(url: str) -> str:
522-
"""Canonicalize an agent URL for byte-equal comparison.
523-
524-
Reuses the brand.json URL canonicalizer (scheme/host lowercased,
525-
default port stripped, fragment stripped, userinfo rejected).
526-
Falls back to a basic ``urlsplit``-based lowercase on inputs the
527-
canonicalizer rejects (the comparison is best-effort here — a URL
528-
we cannot canonicalize will never match a properly-canonicalized
529-
target, which is the correct failure direction).
530-
"""
531-
try:
532-
return _canonicalize_url(url, allow_private=True)
533-
except BrandJsonResolverError:
534-
# Fall back to a permissive normalization so the comparison can
535-
# still proceed (the caller's verified agent_url has already
536-
# been validated upstream; the brand.json's listed url is the
537-
# one we can't structurally trust).
538-
parts = urlsplit(url)
539-
if not parts.scheme or not parts.netloc:
540-
return url.lower()
541-
netloc = parts.netloc.lower()
542-
path = parts.path or "/"
543-
return f"{parts.scheme.lower()}://{netloc}{path}"
544-
545-
546549
__all__ = [
547550
"BrandAuthorizationReason",
548551
"BrandAuthorizationResolver",

src/adcp/signing/etld.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,21 @@ def _extractor() -> tldextract.TLDExtract:
4040
First-call PSL parsing is non-trivial (~hundreds of ms on cold disk
4141
cache); subsequent calls are cheap. The singleton keeps that cost
4242
paid-once-per-process.
43+
44+
**Both ICANN and PRIVATE PSL sections are in scope.** Per ADCP
45+
spec #3690, the eTLD+1 binding must treat platform-shared suffixes
46+
like ``vercel.app``, ``pages.dev``, and ``github.io`` (in the PSL
47+
PRIVATE section) as suffixes — otherwise ``attacker.vercel.app``
48+
and ``victim.vercel.app`` would share an eTLD+1 of ``vercel.app``
49+
and an attacker's vercel deployment would falsely satisfy the
50+
binding against a vercel-hosted brand. ``include_psl_private_domains=True``
51+
closes that vector.
4352
"""
44-
return tldextract.TLDExtract(suffix_list_urls=(), fallback_to_snapshot=True)
53+
return tldextract.TLDExtract(
54+
suffix_list_urls=(),
55+
fallback_to_snapshot=True,
56+
include_psl_private_domains=True,
57+
)
4558

4659

4760
def host_from(value: str) -> str:

tests/test_brand_authz.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,82 @@ async def test_authz_brand_json_404_returns_brand_json_unavailable() -> None:
491491
assert result.fetch_error is not None
492492

493493

494+
# ----- byte-equal agents[] matching (spec mandate) -----
495+
496+
497+
@pytest.mark.asyncio
498+
async def test_authz_trailing_slash_mismatch_fails_byte_equal() -> None:
499+
# Per ADCP #3690: agents[].url match MUST be byte-equal. A trailing
500+
# slash on the request side vs no trailing slash on the brand.json
501+
# side is a mismatch — operators must list the exact URL.
502+
body = _brand_json(
503+
{"agents": [{"type": "signals", "id": "s", "url": "https://ads.brand.com/agent"}]}
504+
)
505+
transport = _MockTransport({"https://brand.com/.well-known/brand.json": {"body": body}})
506+
resolver = BrandJsonAuthorizationResolver(
507+
"https://brand.com/.well-known/brand.json",
508+
_client_factory=_factory(transport),
509+
)
510+
511+
result = await resolver.check(
512+
agent_url="https://ads.brand.com/agent/", # extra trailing slash
513+
brand_domain="brand.com",
514+
)
515+
assert result.authorized is False
516+
assert result.reason == "agent_not_listed"
517+
518+
519+
@pytest.mark.asyncio
520+
async def test_authz_case_mismatch_fails_byte_equal() -> None:
521+
# Scheme/host case differences are NOT canonicalized at this step.
522+
# The spec's rationale: operators must be deliberate about what
523+
# they list; a canonicalization-permissive match silently authorizes
524+
# URLs that drift from what the brand declared.
525+
body = _brand_json(
526+
{"agents": [{"type": "signals", "id": "s", "url": "https://ads.brand.com/agent"}]}
527+
)
528+
transport = _MockTransport({"https://brand.com/.well-known/brand.json": {"body": body}})
529+
resolver = BrandJsonAuthorizationResolver(
530+
"https://brand.com/.well-known/brand.json",
531+
_client_factory=_factory(transport),
532+
)
533+
534+
result = await resolver.check(
535+
agent_url="https://ADS.brand.com/agent", # uppercase host
536+
brand_domain="brand.com",
537+
)
538+
assert result.authorized is False
539+
assert result.reason == "agent_not_listed"
540+
541+
542+
@pytest.mark.asyncio
543+
async def test_authz_duplicate_agents_entry_returns_ambiguous() -> None:
544+
# brand.json schema does NOT constrain agents[] to be unique-by-URL.
545+
# If an operator misconfigures with duplicate entries for the same
546+
# URL, fail closed rather than silently picking one — maps to
547+
# ``request_signature_brand_json_ambiguous`` at the framework boundary.
548+
body = _brand_json(
549+
{
550+
"agents": [
551+
{"type": "signals", "id": "a", "url": "https://ads.brand.com/agent"},
552+
{"type": "signals", "id": "b", "url": "https://ads.brand.com/agent"},
553+
]
554+
}
555+
)
556+
transport = _MockTransport({"https://brand.com/.well-known/brand.json": {"body": body}})
557+
resolver = BrandJsonAuthorizationResolver(
558+
"https://brand.com/.well-known/brand.json",
559+
_client_factory=_factory(transport),
560+
)
561+
562+
result = await resolver.check(
563+
agent_url="https://ads.brand.com/agent",
564+
brand_domain="brand.com",
565+
)
566+
assert result.authorized is False
567+
assert result.reason == "agent_ambiguous"
568+
569+
494570
# ----- shared-fetcher builder -----
495571

496572

tests/test_etld.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,20 @@ def test_same_registrable_domain_cross_tld_with_shared_label() -> None:
143143
assert same_registrable_domain("brand.com", "brand.org") is False
144144

145145

146+
def test_registrable_domain_psl_private_section_in_scope() -> None:
147+
# Per ADCP #3690, the PSL PRIVATE section must be in scope so
148+
# platform-shared suffixes (``vercel.app``, ``pages.dev``,
149+
# ``github.io``) are treated as suffixes. Without this,
150+
# ``attacker.vercel.app`` and ``victim.vercel.app`` would share an
151+
# eTLD+1 and the binding check would authorize an attacker's
152+
# vercel deployment for a victim's vercel-hosted brand.
153+
assert registrable_domain("attacker.vercel.app") == "attacker.vercel.app"
154+
assert registrable_domain("victim.vercel.app") == "victim.vercel.app"
155+
assert same_registrable_domain("attacker.vercel.app", "victim.vercel.app") is False
156+
assert registrable_domain("brand.github.io") == "brand.github.io"
157+
assert registrable_domain("brand.pages.dev") == "brand.pages.dev"
158+
159+
146160
def test_registrable_domain_reserved_tld_returns_none() -> None:
147161
# ``.example``, ``.test``, ``.invalid``, ``.localhost`` are RFC 2606
148162
# reserved names — NOT in the PSL — so they fail closed. The spec's

0 commit comments

Comments
 (0)