From 923fd7be7dbf8189c3f0dbe3dd4710c05061c7d6 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Fri, 8 May 2026 15:08:30 -0400 Subject: [PATCH 1/3] feat(registry): manager fan-out re-validation endpoint Closes #4200 item 5. POST /api/registry/manager-revalidation-request short-circuits the 60-minute organic crawl cycle: when a manager rotates its adagents.json, ops can hit this endpoint and have every delegating publisher enqueued immediately rather than waiting for a routine sweep to detect drift. Thin wrapper around enqueueManagerRevalidation (#4210). The crawler's worker tick (processManagerRevalidationQueue) drains the queue at a bounded rate, so a Raptive-scale rotation propagates within ~10 hours of this request. - Body: { manager_domain }. Lower-cased and trimmed. - Returns 202 with publishers_enqueued (rows touched in the queue). - Rate-limited via the shared validateAndRateLimitCrawl machinery. Key is namespaced ("manager:" prefix) so a manager request doesn't bypass an in-window publisher recrawl on the same domain or vice-versa. Hourly per-member limit is shared with other crawl endpoints. Per-agent source enum extension to 'adagents_json_via_manager' is NOT shipping in this PR. Re-evaluating: publishers.discovery_method (landed in #4204) already lets consumers join through and discriminate direct vs. managerdomain-discovered authorizations. A separate per-agent enum value would be denormalization and would silently exclude managerdomain rows from every existing reader filtering on source='adagents_json'. Closing that follow-up as won't-fix on the issue. --- .changeset/manager-revalidation-endpoint.md | 23 +++ server/src/routes/registry-api.ts | 99 ++++++++++- .../manager-revalidation-endpoint.test.ts | 160 ++++++++++++++++++ 3 files changed, 281 insertions(+), 1 deletion(-) create mode 100644 .changeset/manager-revalidation-endpoint.md create mode 100644 server/tests/integration/manager-revalidation-endpoint.test.ts diff --git a/.changeset/manager-revalidation-endpoint.md b/.changeset/manager-revalidation-endpoint.md new file mode 100644 index 0000000000..71dd6d3f04 --- /dev/null +++ b/.changeset/manager-revalidation-endpoint.md @@ -0,0 +1,23 @@ +--- +--- + +feat(registry): manager fan-out re-validation endpoint + +Closes #4200 item 5. New `POST /api/registry/manager-revalidation-request` +short-circuits the 60-minute organic crawl cycle: when a manager rotates +its `adagents.json`, ops can hit this endpoint and have every delegating +publisher enqueued immediately. Thin wrapper around +`enqueueManagerRevalidation` (which lands in #4210). + +- Body: `{ manager_domain }`. Validated, lower-cased, trimmed. +- Returns `202` with `publishers_enqueued` (count of delegating publishers + added to or refreshed in the queue). Zero when nobody delegates. +- Rate-limited via the shared `validateAndRateLimitCrawl` machinery used + by `crawl-request` and `brand-crawl-request`. Key is namespaced + (`manager:` prefix) so a manager-recrawl request doesn't bypass an + in-window publisher recrawl on the same domain or vice-versa. Hourly + per-member limit is shared. + +Tests: enqueue happy path with multiple delegating publishers, zero-count +when none delegate, 400 on missing field, lower-case + trim normalization, +per-domain rate limit window. diff --git a/server/src/routes/registry-api.ts b/server/src/routes/registry-api.ts index 925aec6cda..8763172379 100644 --- a/server/src/routes/registry-api.ts +++ b/server/src/routes/registry-api.ts @@ -102,7 +102,7 @@ import { isWebUserAAOAdmin } from "../addie/admin-status-lookup.js"; import { getDevUser, isDevModeEnabled } from "../middleware/auth.js"; import { OrganizationDatabase, hasApiAccess, resolveMembershipTier } from "../db/organization-db.js"; import { resolveCallerOrgId } from "./helpers/resolve-caller-org.js"; -import { canonicalizeAgentUrl } from "../db/publisher-db.js"; +import { canonicalizeAgentUrl, PublisherDatabase } from "../db/publisher-db.js"; import { AuthorizationSnapshotDatabase, EvidenceValidationError, @@ -1517,6 +1517,60 @@ registry.registerPath({ }, }); +registry.registerPath({ + method: "post", + path: "/api/registry/manager-revalidation-request", + operationId: "requestManagerRevalidation", + summary: "Request manager fan-out re-validation", + description: + "Trigger re-validation of every publisher delegating to a manager domain via ads.txt `MANAGERDOMAIN`. Use after rotating the manager's `adagents.json` so the change propagates to delegating publishers without waiting for the next routine crawl cycle. Work is queued and drained at a bounded rate (≈50 publishers per 5-minute tick). Returns 202 immediately with the number of publishers enqueued.\n\n**Rate limits:** 5 minutes per manager domain, 30 requests per user per hour (shared with other crawl-request endpoints).", + tags: ["Agent Discovery"], + security: [{ bearerAuth: [] }, { oauth2: [] }], + request: { + body: { + content: { + "application/json": { + schema: z.object({ + manager_domain: z.string().openapi({ + example: "raptive.com", + description: "Manager domain whose delegating publishers should be queued for re-validation. Must already be present as `manager_domain` on at least one publisher row.", + }), + }), + }, + }, + }, + }, + responses: { + 202: { + description: "Re-validation queue request accepted", + content: { + "application/json": { + schema: z.object({ + message: z.literal("Manager re-validation enqueued"), + manager_domain: z.string(), + publishers_enqueued: z.number().int().openapi({ + description: "Number of delegating publisher rows added to or refreshed in the manager_revalidation_queue. Zero if no publisher delegates to this manager.", + }), + }), + }, + }, + }, + 400: { description: "Invalid domain format, private IP, or unresolvable domain", content: { "application/json": { schema: ErrorSchema } } }, + 401: { description: "Authentication required", content: { "application/json": { schema: ErrorSchema } } }, + 429: { + description: "Rate limit exceeded", + content: { + "application/json": { + schema: z.object({ + error: z.string(), + retry_after: z.number().int().openapi({ description: "Seconds to wait before retrying" }), + }), + }, + }, + }, + }, +}); + registry.registerPath({ method: "post", path: "/api/registry/brand-crawl-request", @@ -2741,6 +2795,7 @@ export function createRegistryApiRouter(config: RegistryApiConfig): Router { const noopMiddleware: RequestHandler = (_req, _res, next) => next(); const optAuth: RequestHandler = optionalAuthMiddleware ?? noopMiddleware; const orgDb = new OrganizationDatabase(); + const publisherDb = new PublisherDatabase(); const catalogDb = new CatalogDatabase(); @@ -7583,5 +7638,47 @@ export function createRegistryApiRouter(config: RegistryApiConfig): Router { } }); + // Manager fan-out re-validation: when a manager rotates its + // adagents.json, this endpoint short-circuits the 60-minute organic + // crawl cycle by enqueueing every delegating publisher directly into + // manager_revalidation_queue. The crawler worker drains the queue at + // a bounded rate; each per-publisher validation re-fetches the + // manager's file via the ads.txt MANAGERDOMAIN fallback, so the + // publishers see the rotated content without us needing to re-crawl + // the manager itself first. + // + // Rate-limit key is namespaced ("manager:") so a manager-recrawl + // request doesn't bypass an in-window publisher recrawl on the same + // domain (or vice-versa). Hourly per-member limit is shared. + router.post("/registry/manager-revalidation-request", authMiddleware, async (req, res) => { + try { + // Translate manager_domain → domain for the shared validator, + // which reads req.body.domain. + const managerInput = req.body?.manager_domain?.toLowerCase?.()?.trim?.() || ''; + if (!managerInput || typeof managerInput !== 'string') { + return res.status(400).json({ error: "manager_domain is required" }); + } + const reqWithDomain: typeof req = Object.assign({}, req, { + body: { ...req.body, domain: managerInput }, + }); + const normalizedDomain = await validateAndRateLimitCrawl( + reqWithDomain, + res, + `manager:${managerInput}`, + ); + if (!normalizedDomain) return; + + const enqueued = await publisherDb.enqueueManagerRevalidation(normalizedDomain); + return res.status(202).json({ + message: "Manager re-validation enqueued", + manager_domain: normalizedDomain, + publishers_enqueued: enqueued, + }); + } catch (error) { + logger.error({ error }, "Failed to enqueue manager revalidation"); + return res.status(500).json({ error: "Failed to enqueue manager revalidation" }); + } + }); + return router; } diff --git a/server/tests/integration/manager-revalidation-endpoint.test.ts b/server/tests/integration/manager-revalidation-endpoint.test.ts new file mode 100644 index 0000000000..1c0ce696cb --- /dev/null +++ b/server/tests/integration/manager-revalidation-endpoint.test.ts @@ -0,0 +1,160 @@ +/** + * HTTP endpoint test for /api/registry/manager-revalidation-request + * (#4200 item 5). + * + * The endpoint short-circuits the 60-minute organic crawl cycle: when a + * manager rotates its adagents.json, ops can hit this endpoint and have + * every delegating publisher enqueued for re-validation immediately. + * Body of the request is just `{ manager_domain }`; the handler calls + * `enqueueManagerRevalidation` and returns the count. + */ +import { describe, it, expect, beforeAll, beforeEach, afterAll } from 'vitest'; +import request from 'supertest'; +import express from 'express'; +import type { Pool } from 'pg'; +import { initializeDatabase, closeDatabase } from '../../src/db/client.js'; +import { runMigrations } from '../../src/db/migrate.js'; +import { createRegistryApiRouter, type RegistryApiConfig } from '../../src/routes/registry-api.js'; + +const MANAGER = 'mgr-revalidation-endpoint.example.com'; +const PUB_A = 'pub-a.mgr-revalidation-endpoint.example.com'; +const PUB_B = 'pub-b.mgr-revalidation-endpoint.example.com'; + +function buildTestApp() { + const app = express(); + app.use(express.json()); + + const passAuth: import('express').RequestHandler = (req, _res, next) => { + (req as import('express').Request & { user?: { id: string } }).user = { id: 'test-member' }; + next(); + }; + + const config: RegistryApiConfig = { + brandManager: {} as unknown as RegistryApiConfig['brandManager'], + brandDb: {} as unknown as RegistryApiConfig['brandDb'], + propertyDb: {} as unknown as RegistryApiConfig['propertyDb'], + adagentsManager: {} as unknown as RegistryApiConfig['adagentsManager'], + healthChecker: {} as unknown as RegistryApiConfig['healthChecker'], + crawler: {} as unknown as RegistryApiConfig['crawler'], + capabilityDiscovery: {} as unknown as RegistryApiConfig['capabilityDiscovery'], + registryRequestsDb: { + trackRequest: async () => {}, + markResolved: async () => true, + }, + requireAuth: passAuth, + optionalAuth: passAuth, + }; + + const router = createRegistryApiRouter(config); + app.use('/api', router); + return app; +} + +describe('POST /api/registry/manager-revalidation-request', () => { + let pool: Pool; + let app: express.Express; + + beforeAll(async () => { + pool = initializeDatabase({ + connectionString: process.env.DATABASE_URL || 'postgresql://adcp:localdev@localhost:5432/adcp_test', + }); + await runMigrations(); + app = buildTestApp(); + }); + + async function clearFixtures() { + await pool.query( + `DELETE FROM manager_revalidation_queue WHERE publisher_domain = ANY($1::text[])`, + [[PUB_A, PUB_B]], + ); + await pool.query( + `DELETE FROM publishers WHERE domain = ANY($1::text[])`, + [[PUB_A, PUB_B, MANAGER]], + ); + } + + async function seedDelegatingPublisher(domain: string, manager: string): Promise { + await pool.query( + `INSERT INTO publishers (domain, source_type, manager_domain, discovery_method, last_validated) + VALUES ($1, 'adagents_json', $2, 'ads_txt_managerdomain', NOW())`, + [domain, manager], + ); + } + + beforeEach(async () => { + await clearFixtures(); + }); + + afterAll(async () => { + await clearFixtures(); + await closeDatabase(); + }); + + it('enqueues every delegating publisher and returns the count', async () => { + await seedDelegatingPublisher(PUB_A, MANAGER); + await seedDelegatingPublisher(PUB_B, MANAGER); + + const res = await request(app) + .post('/api/registry/manager-revalidation-request') + .send({ manager_domain: MANAGER }); + + expect(res.status).toBe(202); + expect(res.body).toMatchObject({ + message: 'Manager re-validation enqueued', + manager_domain: MANAGER, + publishers_enqueued: 2, + }); + + const queued = await pool.query( + `SELECT publisher_domain FROM manager_revalidation_queue + WHERE manager_domain = $1 ORDER BY publisher_domain ASC`, + [MANAGER], + ); + expect(queued.rows.map(r => r.publisher_domain)).toEqual([PUB_A, PUB_B]); + }); + + it('returns 0 publishers_enqueued when no publisher delegates to the manager', async () => { + const res = await request(app) + .post('/api/registry/manager-revalidation-request') + .send({ manager_domain: 'nobody-delegates-here.example.com' }); + + expect(res.status).toBe(202); + expect(res.body.publishers_enqueued).toBe(0); + }); + + it('400s when manager_domain is missing', async () => { + const res = await request(app) + .post('/api/registry/manager-revalidation-request') + .send({}); + + expect(res.status).toBe(400); + expect(res.body.error).toContain('manager_domain'); + }); + + it('lower-cases and trims the manager_domain before lookup', async () => { + await seedDelegatingPublisher(PUB_A, MANAGER); + + const res = await request(app) + .post('/api/registry/manager-revalidation-request') + .send({ manager_domain: ` ${MANAGER.toUpperCase()} ` }); + + expect(res.status).toBe(202); + expect(res.body.manager_domain).toBe(MANAGER); + expect(res.body.publishers_enqueued).toBe(1); + }); + + it('rate-limits a second request to the same manager within the window', async () => { + await seedDelegatingPublisher(PUB_A, MANAGER); + + const first = await request(app) + .post('/api/registry/manager-revalidation-request') + .send({ manager_domain: MANAGER }); + expect(first.status).toBe(202); + + const second = await request(app) + .post('/api/registry/manager-revalidation-request') + .send({ manager_domain: MANAGER }); + expect(second.status).toBe(429); + expect(second.body.retry_after).toBeGreaterThan(0); + }); +}); From 788fa4a9b900666fdbb3ddac73ab1a4a899c25e1 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Fri, 8 May 2026 15:15:16 -0400 Subject: [PATCH 2/3] test: bypass DNS validation for manager-revalidation-endpoint test .example.com subdomains don't resolve in CI; mock validateCrawlDomain to a pass-through to exercise the handler logic. Mirrors the pattern used in registry-publisher-brand-json-hydration.test.ts. --- .../manager-revalidation-endpoint.test.ts | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/server/tests/integration/manager-revalidation-endpoint.test.ts b/server/tests/integration/manager-revalidation-endpoint.test.ts index 1c0ce696cb..b47d59a3e5 100644 --- a/server/tests/integration/manager-revalidation-endpoint.test.ts +++ b/server/tests/integration/manager-revalidation-endpoint.test.ts @@ -8,7 +8,19 @@ * Body of the request is just `{ manager_domain }`; the handler calls * `enqueueManagerRevalidation` and returns the count. */ -import { describe, it, expect, beforeAll, beforeEach, afterAll } from 'vitest'; +import { describe, it, expect, beforeAll, beforeEach, afterAll, vi } from 'vitest'; + +// Bypass DNS-based domain validation: test fixtures use `.example.com` +// subdomains that don't resolve in CI. The real validation surface is +// exercised by the publisher crawl-request endpoint tests. +vi.mock('../../src/utils/url-security.js', async () => { + const actual = await vi.importActual>('../../src/utils/url-security.js'); + return { + ...actual, + validateCrawlDomain: async (domain: string) => domain.toLowerCase().trim(), + }; +}); + import request from 'supertest'; import express from 'express'; import type { Pool } from 'pg'; From a0db4fa7e88f09bd464390876f6d39a91a05799b Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Fri, 8 May 2026 15:19:49 -0400 Subject: [PATCH 3/3] test(manager-revalidation): rebuild app per test for isolated rate-limit state The crawl-rate-limit Map lives in createRegistryApiRouter's closure; sharing one app across cases meant the second test on the same manager domain hit the 5-minute window from the first test's request. Rebuild in beforeEach so each test gets a fresh limit state. --- .../tests/integration/manager-revalidation-endpoint.test.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/tests/integration/manager-revalidation-endpoint.test.ts b/server/tests/integration/manager-revalidation-endpoint.test.ts index b47d59a3e5..5ba20892bd 100644 --- a/server/tests/integration/manager-revalidation-endpoint.test.ts +++ b/server/tests/integration/manager-revalidation-endpoint.test.ts @@ -71,7 +71,6 @@ describe('POST /api/registry/manager-revalidation-request', () => { connectionString: process.env.DATABASE_URL || 'postgresql://adcp:localdev@localhost:5432/adcp_test', }); await runMigrations(); - app = buildTestApp(); }); async function clearFixtures() { @@ -95,6 +94,9 @@ describe('POST /api/registry/manager-revalidation-request', () => { beforeEach(async () => { await clearFixtures(); + // Fresh app per test so the in-memory rate-limit Map (closure-scoped + // inside createRegistryApiRouter) doesn't bleed between cases. + app = buildTestApp(); }); afterAll(async () => {