Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .changeset/probe-managerdomain-fallback-script.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
---

chore(scripts): add real-world managerdomain fallback probe

Ad-hoc developer tool (`server/scripts/probe-managerdomain-fallback.ts`)
that hits live DNS / public web against a small fixture of known
publisher-manager pairs and asserts the AdAgentsValidationResult
envelope.

Not for CI — meant as a manual probe a developer runs after touching
adagents-manager.ts to confirm the path still does what we think
against real managed-network publishers. Fixtures cover the direct
path (craftgossip), Mediavine fallback (homestratosphere), Freestar
fallback (momtastic, expected to fail closed since freestar.com does
not yet serve a manifest), and a non-404 control (raptive.com 403).

Surfaced three real-world divergences to follow up on, captured in
the fixture rationales:

1. craftgossip's adagents.json fails JSON validation.
2. homestratosphere's fallback to mediavine.com does not reach the
scope gate — likely a validator schema delta against Mediavine's
`agent_url` field naming.
3. freestar.com 404s on /.well-known so its delegating publishers
(momtastic) fail closed today.

Usage:
npx tsx server/scripts/probe-managerdomain-fallback.ts --verbose

Existed as a manual `curl` exercise before — moving the fixture set
into version control so the next regression is caught on demand
rather than after merge. The original gap (the explicit-publisher-
scoping bug fixed in #4283) is exactly what this would have caught.
224 changes: 224 additions & 0 deletions server/scripts/probe-managerdomain-fallback.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
/**
* Real-world smoke test for the ads.txt MANAGERDOMAIN fallback path
* (#4173 / #4204 / #4210 / #4283).
*
* Hits live DNS and the public web. NOT for CI — meant as an ad-hoc
* probe a developer runs after touching adagents-manager.ts to confirm
* the path still does what we think against real managed-network
* publishers.
*
* Why this exists:
* The full PR stack for #4200 shipped against `*.example.com`
* fixtures and zero real-world verification. The first time we
* probed real domains (post-merge) we found the explicit-publisher-
* scoping gate rejected every production manager manifest — Mediavine,
* the only manager actually serving an adagents.json against a
* publisher with a managerdomain pointer, used a property-level
* scoping shape the gate didn't recognize. That cost a follow-up PR
* (#4283) to fix.
*
* This script makes that probe repeatable so the next regression is
* caught immediately rather than after merge.
*
* What it does:
* 1. For each fixture, run `AdAgentsManager.validateDomain(domain)`
* using the same code path the live crawler uses (no parallel
* implementation).
* 2. Assert the result against an expected envelope. Expected
* envelopes are loose ("any of these outcomes is acceptable")
* because real-world manifests change without warning.
* 3. Report per-domain pass/fail + an aggregate.
*
* Usage:
* npx tsx server/scripts/probe-managerdomain-fallback.ts
* npx tsx server/scripts/probe-managerdomain-fallback.ts --verbose
*
* Exit code:
* 0 — every fixture met its expected envelope.
* 1 — at least one fixture failed; details printed to stderr.
*/

import { AdAgentsManager, type AdAgentsValidationResult, type DiscoveryMethod } from '../src/adagents-manager.js';

interface ExpectedEnvelope {
// Allowed terminal states for this fixture. Real-world managers
// toggle their manifests, so we accept any of these outcomes; the
// assertion fails only when the actual result falls outside the set.
allowedDiscoveryMethods?: DiscoveryMethod[];
// When discovery_method falls back, allowed manager domains. Empty
// set means "any manager" (don't constrain).
allowedManagerDomains?: string[];
// Whether `valid` should be true. `null` means either is acceptable
// (e.g. a manager whose manifest may or may not yet scope this
// publisher in properties[]).
expectedValid: boolean | null;
// Free-form note explaining what the fixture is supposed to exercise.
rationale: string;
}

interface Fixture {
domain: string;
expected: ExpectedEnvelope;
}

const FIXTURES: Fixture[] = [
{
domain: 'craftgossip.com',
expected: {
allowedDiscoveryMethods: ['direct', 'authoritative_location'],
expectedValid: null, // depends on craftgossip's current manifest
rationale: 'Direct path control: serves /.well-known/adagents.json itself.',
},
},
{
domain: 'homestratosphere.com',
expected: {
// CURRENT REALITY: probe falls through to discovery_method='direct'
// with an http_status error rather than reaching managerdomain
// fallback, even though the publisher 404s and ads.txt declares
// MANAGERDOMAIN=mediavine.com. Hypothesis: Mediavine's live
// manifest uses field names (e.g. `agent_url`) that fail
// AdAgentsValidationResult validation before the scope gate ever
// runs, leaving managerResult.valid=false.
//
// TODO: investigate the validator delta against
// https://mediavine.com/.well-known/adagents.json
// If their shape is conformant, our validator is stale; if not,
// file upstream so they migrate. Once resolved, tighten this
// fixture to `allowedDiscoveryMethods: ['ads_txt_managerdomain']`.
allowedDiscoveryMethods: ['direct', 'ads_txt_managerdomain'],
allowedManagerDomains: ['mediavine.com'],
expectedValid: null,
rationale:
'Mediavine-managed publisher (homestratosphere → mediavine). Should reach the fallback path; investigation pending on why current validator does not.',
},
},
{
domain: 'momtastic.com',
expected: {
allowedDiscoveryMethods: ['direct', 'authoritative_location', 'ads_txt_managerdomain'],
// freestar.com currently 404s on /.well-known. Fallback should
// attempt it and the manager fetch fails-closed. discovery_method
// stays 'direct' on the failure path because the fallback
// override only fires when the manager validates.
expectedValid: false,
rationale:
'Freestar-managed publisher: ads.txt MANAGERDOMAIN=freestar.com but freestar.com does not yet serve a manifest (404). Fallback should attempt and fail closed.',
},
},
{
domain: 'raptive.com',
expected: {
// Origin returns 403, not 404 — fallback only fires on 404, so
// discovery_method stays 'direct' and we surface an HTTP error.
allowedDiscoveryMethods: ['direct'],
expectedValid: false,
rationale:
'Manager domain itself: 403 on /.well-known/adagents.json. Verifies fallback does NOT trigger on non-404.',
},
},
];

interface ProbeResult {
fixture: Fixture;
result: AdAgentsValidationResult;
pass: boolean;
failures: string[];
}

function checkEnvelope(fixture: Fixture, result: AdAgentsValidationResult): string[] {
const failures: string[] = [];
const { expected } = fixture;

if (expected.allowedDiscoveryMethods && !expected.allowedDiscoveryMethods.includes(result.discovery_method)) {
failures.push(
`discovery_method=${result.discovery_method}, expected one of [${expected.allowedDiscoveryMethods.join(', ')}]`,
);
}

if (expected.allowedManagerDomains && expected.allowedManagerDomains.length > 0 && result.manager_domain) {
if (!expected.allowedManagerDomains.includes(result.manager_domain)) {
failures.push(
`manager_domain=${result.manager_domain}, expected one of [${expected.allowedManagerDomains.join(', ')}]`,
);
}
}

if (expected.expectedValid !== null && result.valid !== expected.expectedValid) {
failures.push(`valid=${result.valid}, expected ${expected.expectedValid}`);
}

return failures;
}

async function probe(verbose: boolean): Promise<ProbeResult[]> {
const manager = new AdAgentsManager();
const out: ProbeResult[] = [];

for (const fixture of FIXTURES) {
process.stderr.write(`[probe] ${fixture.domain} ... `);
try {
const result = await manager.validateDomain(fixture.domain);
const failures = checkEnvelope(fixture, result);
const pass = failures.length === 0;
out.push({ fixture, result, pass, failures });
process.stderr.write(pass ? 'OK\n' : 'FAIL\n');
if (verbose || !pass) {
process.stderr.write(
` discovery_method=${result.discovery_method}` +
(result.manager_domain ? ` manager_domain=${result.manager_domain}` : '') +
` valid=${result.valid}` +
(result.errors.length > 0 ? ` errors=${JSON.stringify(result.errors.map(e => e.field))}` : '') +
'\n',
);
if (failures.length > 0) {
for (const f of failures) process.stderr.write(` ✗ ${f}\n`);
}
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
process.stderr.write(`ERROR: ${message}\n`);
out.push({
fixture,
result: {
valid: false,
errors: [{ field: 'probe_script', message, severity: 'error' }],
warnings: [],
domain: fixture.domain,
url: '',
discovery_method: 'direct',
},
pass: false,
failures: [`probe threw: ${message}`],
});
}
}

return out;
}

async function main() {
const verbose = process.argv.includes('--verbose');
const results = await probe(verbose);

const passed = results.filter(r => r.pass).length;
const failed = results.length - passed;

process.stderr.write(`\n${passed}/${results.length} fixtures passed\n`);

if (failed > 0) {
process.stderr.write('\nFailed fixtures:\n');
for (const r of results.filter(x => !x.pass)) {
process.stderr.write(` ${r.fixture.domain}: ${r.fixture.expected.rationale}\n`);
for (const f of r.failures) process.stderr.write(` ✗ ${f}\n`);
}
process.exit(1);
}

process.exit(0);
}

main().catch(err => {
console.error('probe failed:', err);
process.exit(1);
});
Loading