Skip to content

Commit d3a9ce2

Browse files
committed
feat(adagents): inline-resolution path for publisher_properties selectors (#749 Part 1)
Layers inline-resolution on top of PR #753's publisher_domains[] fan-out and revoked_publisher_domains[] support. _resolve_agent_properties now returns resolved property dicts (not selector dicts) for publisher_properties authorization_type, sourced from the parent file's top-level properties[] indexed by publisher_domain. - Pre-builds domain → properties index once per call so per-domain lookups are O(1) — avoids O(N×M) at cafemedia scale (6,843 properties × 6,800 domains = ~46M comparisons under the old linear scan). - Inline resolution honors revoked_publisher_domains[] transparently via the existing revoked_top_level pre-filter (revoked domains never enter the per-domain index, so they're skipped by construction). - Fail-closed on unknown selection_type and empty selector criteria (property_tags=[] / property_ids=[]) per CLAUDE.md "no fallbacks" in authorization-decision paths. Fail-fast on properties missing the required property_id field. - Cafemedia/interchange.io scale test (6,843 properties × 6,800 domains) is wall-clock-bounded to catch O(N×M) regressions. - Two pre-existing TestRevokedPublisherDomains tests + the TestPublisherDomainsCompactForm resolution test updated to assert on resolved property dicts (the new contract) instead of selector dicts. - Dead filter_revoked_selectors post-pass removed from get_properties_by_agent and get_all_properties — revocation is enforced upstream via the index, so the post-filter was structurally redundant. Closes #749 Part 1. Federated fallback (when inline yields no match for a domain) lives in companion PR #752's async helpers.
1 parent 1c4e57d commit d3a9ce2

2 files changed

Lines changed: 440 additions & 23 deletions

File tree

src/adcp/adagents.py

Lines changed: 102 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,16 +1250,109 @@ def _resolve_agent_properties(
12501250
# Handle publisher_properties (cross-domain references).
12511251
# Each entry with publisher_domains[a,b,c] fans out to one selector per
12521252
# listed domain — the compact form is exactly equivalent to repeating
1253-
# the entry once per publisher per adcp#4504.
1253+
# the entry once per publisher per adcp#4504. Selectors are then
1254+
# resolved inline against the parent file's top-level properties[]
1255+
# array, indexed by publisher_domain, per adcp#4827.
12541256
if authorization_type == "publisher_properties":
12551257
publisher_props = agent.get("publisher_properties", [])
12561258
if not isinstance(publisher_props, list):
12571259
return []
1258-
return _fanout_publisher_properties([p for p in publisher_props if isinstance(p, dict)])
1260+
selectors = _fanout_publisher_properties(
1261+
[p for p in publisher_props if isinstance(p, dict)]
1262+
)
1263+
return _resolve_publisher_property_selectors(selectors, top_level_properties)
12591264

12601265
return []
12611266

12621267

1268+
def _resolve_publisher_property_selectors(
1269+
selectors: list[dict[str, Any]],
1270+
top_level_properties: list[dict[str, Any]],
1271+
) -> list[dict[str, Any]]:
1272+
"""Resolve fanned-out publisher_properties selectors against inline data.
1273+
1274+
For each selector (one per publisher_domain), look up the matching
1275+
properties in ``top_level_properties`` by ``publisher_domain`` and
1276+
apply the selector's ``selection_type``:
1277+
1278+
- ``"all"``: every property under that domain
1279+
- ``"by_tag"``: properties whose ``tags`` intersect ``property_tags``
1280+
(empty ``property_tags`` resolves to ``[]`` — fail-closed, no
1281+
"tag list omitted means everything")
1282+
- ``"by_id"``: properties whose ``property_id`` is in ``property_ids``
1283+
(empty ``property_ids`` resolves to ``[]`` — same fail-closed rule)
1284+
- Anything else: ``[]`` (fail-closed; unknown selection_type does
1285+
not authorize anything — see CLAUDE.md "no fallbacks" on
1286+
authorization decisions)
1287+
1288+
Selectors whose domain has no entries in the index are skipped —
1289+
federated fallback (fetching the publisher's own adagents.json) is
1290+
out of scope for this resolver and lives in companion helpers.
1291+
1292+
Results are deduplicated by ``(publisher_domain, property_id)``.
1293+
Raises :class:`AdagentsValidationError` if any matching property is
1294+
missing the required ``property_id`` field (fail-fast per CLAUDE.md).
1295+
"""
1296+
if not selectors:
1297+
return []
1298+
1299+
# Build domain → [property, ...] index once. O(N) up-front trades
1300+
# against O(selectors × properties) per-domain scans, which blows
1301+
# up at cafemedia scale (6,843 properties × thousands of selectors).
1302+
domain_index: dict[str, list[dict[str, Any]]] = {}
1303+
for prop in top_level_properties:
1304+
if not isinstance(prop, dict):
1305+
continue
1306+
domain = prop.get("publisher_domain")
1307+
if not isinstance(domain, str) or not domain:
1308+
continue
1309+
domain_index.setdefault(domain, []).append(prop)
1310+
1311+
resolved: list[dict[str, Any]] = []
1312+
seen: set[tuple[str, str]] = set()
1313+
for selector in selectors:
1314+
domain = selector.get("publisher_domain")
1315+
if not isinstance(domain, str) or not domain:
1316+
continue
1317+
candidates = domain_index.get(domain)
1318+
if not candidates:
1319+
continue
1320+
1321+
selection_type = selector.get("selection_type")
1322+
matched: list[dict[str, Any]]
1323+
if selection_type == "all":
1324+
matched = list(candidates)
1325+
elif selection_type == "by_tag":
1326+
wanted_tags = {t for t in selector.get("property_tags", []) or [] if isinstance(t, str)}
1327+
if not wanted_tags:
1328+
continue
1329+
matched = [
1330+
p
1331+
for p in candidates
1332+
if {t for t in p.get("tags", []) or [] if isinstance(t, str)} & wanted_tags
1333+
]
1334+
elif selection_type == "by_id":
1335+
wanted_ids = {i for i in selector.get("property_ids", []) or [] if isinstance(i, str)}
1336+
if not wanted_ids:
1337+
continue
1338+
matched = [p for p in candidates if p.get("property_id") in wanted_ids]
1339+
else:
1340+
continue
1341+
1342+
for prop in matched:
1343+
prop_id = prop.get("property_id")
1344+
if not isinstance(prop_id, str) or not prop_id:
1345+
raise AdagentsValidationError(
1346+
f"property under domain={domain!r} missing required 'property_id'"
1347+
)
1348+
key = (domain, prop_id)
1349+
if key in seen:
1350+
continue
1351+
seen.add(key)
1352+
resolved.append(prop)
1353+
return resolved
1354+
1355+
12631356
def _fanout_publisher_properties(
12641357
publisher_props: list[dict[str, Any]],
12651358
) -> list[dict[str, Any]]:
@@ -1380,9 +1473,9 @@ def get_all_properties(adagents_data: dict[str, Any]) -> list[dict[str, Any]]:
13801473
if not agent_url:
13811474
continue
13821475

1476+
# revoked_top_level pre-filters revoked domains from the per-domain
1477+
# index, so inline resolution honors revocation transparently.
13831478
agent_properties = _resolve_agent_properties(agent, revoked_top_level)
1384-
if revoked and agent.get("authorization_type") == "publisher_properties":
1385-
agent_properties = filter_revoked_selectors(agent_properties, revoked)
13861479

13871480
for prop in agent_properties:
13881481
prop_with_agent = {**prop, "agent_url": agent_url}
@@ -1423,8 +1516,9 @@ def get_properties_by_agent(adagents_data: dict[str, Any], agent_url: str) -> li
14231516
- inline_properties: Properties defined directly in the agent's properties array
14241517
- property_ids: Filter top-level properties by property_id
14251518
- property_tags: Filter top-level properties by tags
1426-
- publisher_properties: References properties from other publisher domains
1427-
(returns the selector objects, not resolved properties)
1519+
- publisher_properties: Inline-resolved properties from cross-publisher
1520+
selectors (resolved from the parent file's top-level properties[]
1521+
array per adcp#4827)
14281522
14291523
Args:
14301524
adagents_data: Parsed adagents.json data
@@ -1471,9 +1565,9 @@ def get_properties_by_agent(adagents_data: dict[str, Any], agent_url: str) -> li
14711565
if normalize_url(agent_url_from_json) != normalized_agent_url:
14721566
continue
14731567

1568+
# revoked_top_level pre-filters revoked domains from the per-domain
1569+
# index, so inline resolution honors revocation transparently.
14741570
resolved = _resolve_agent_properties(agent, revoked_top_level)
1475-
if revoked and agent.get("authorization_type") == "publisher_properties":
1476-
resolved = filter_revoked_selectors(resolved, revoked)
14771571
return resolved
14781572

14791573
return []

0 commit comments

Comments
 (0)