Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ describe('verifyEnterpriseMultiTenantWindows script', () => {
await fs.rm(tempRoot, { recursive: true, force: true });
});

test('covers current D1/D2/D4/D5/D6/D7/D8 isolation invariants without invoking a real provider', async () => {
test('covers current D1/D2/D4/D5/D6/D7/D8/D9 isolation invariants without invoking a real provider', async () => {
const tracePath = path.join(tempRoot, 'fixture.pftrace');
const uploadRoot = path.join(tempRoot, 'uploads');
const outputPath = path.join(tempRoot, 'report.json');
Expand All @@ -53,6 +53,7 @@ describe('verifyEnterpriseMultiTenantWindows script', () => {
D6: true,
D7: true,
D8: true,
D9: true,
});
expect(Object.values(report.scenarios.D1.checks)).toEqual(expect.arrayContaining([true]));
expect(Object.values(report.scenarios.D1.checks).every(Boolean)).toBe(true);
Expand All @@ -62,6 +63,7 @@ describe('verifyEnterpriseMultiTenantWindows script', () => {
expect(Object.values(report.scenarios.D6.checks).every(Boolean)).toBe(true);
expect(Object.values(report.scenarios.D7.checks).every(Boolean)).toBe(true);
expect(Object.values(report.scenarios.D8.checks).every(Boolean)).toBe(true);
expect(Object.values(report.scenarios.D9.checks).every(Boolean)).toBe(true);
expect(report.coverageLimitations.join('\n')).toContain('TraceProcessorLease');
expect(report.scenarios.D6.details).toEqual(expect.objectContaining({
reportUrl: expect.stringMatching(/^\/api\/reports\//),
Expand All @@ -71,6 +73,12 @@ describe('verifyEnterpriseMultiTenantWindows script', () => {
providerSnapshotChanged: true,
sdkSessionReusable: false,
}));
expect(report.scenarios.D9.details).toEqual(expect.objectContaining({
recoveredPreviousStatuses: expect.arrayContaining([
expect.arrayContaining([expect.stringMatching(/^run-/), 'pending']),
expect.arrayContaining([expect.stringMatching(/^run-/), 'running']),
]),
}));
expect(process.env.UPLOAD_DIR).toBe(previousUploadDir);

const written = JSON.parse(await fs.readFile(outputPath, 'utf8')) as EnterpriseWindowRegressionReport;
Expand All @@ -79,6 +87,6 @@ describe('verifyEnterpriseMultiTenantWindows script', () => {

const traceFiles = (await fs.readdir(path.join(uploadRoot, 'traces')))
.filter(file => file.endsWith('.trace'));
expect(traceFiles).toHaveLength(11);
expect(traceFiles).toHaveLength(12);
});
});
186 changes: 184 additions & 2 deletions backend/src/scripts/verifyEnterpriseMultiTenantWindows.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import { TraceProcessorLeaseStore } from '../services/traceProcessorLeaseStore';
import { ownerFieldsFromContext } from '../services/resourceOwnership';
import type { RequestContext } from '../middleware/auth';

type ScenarioName = 'D1' | 'D2' | 'D4' | 'D5' | 'D6' | 'D7' | 'D8';
type ScenarioName = 'D1' | 'D2' | 'D4' | 'D5' | 'D6' | 'D7' | 'D8' | 'D9';

interface VerifyOptions {
tracePath?: string;
Expand Down Expand Up @@ -1182,6 +1182,185 @@ async function scenarioD8(
};
}

async function scenarioD9(
tracePath: string,
userAWindow1: WindowContext,
): Promise<ScenarioReport> {
const restartDbPath = path.join(path.dirname(getTracesDir()), `d9-restart-${crypto.randomUUID()}.sqlite`);
let restartDb = new Database(restartDbPath);
try {
applyEnterpriseMinimalSchema(restartDb);
const upload = await simulateUpload(restartDb, userAWindow1, tracePath, 'd9-backend-restart.pftrace');
const pendingRun = createAnalysisRun(restartDb, userAWindow1, upload, 'pending');
const runningRun = createAnalysisRun(restartDb, userAWindow1, upload, 'running');
const completedRun = createAnalysisRun(restartDb, userAWindow1, upload, 'completed');
const recoveryAt = 1_777_200_100_000;
const recoveryErrorJson = JSON.stringify({
message: 'backend restart during D9 regression',
source: 'backend_startup_recovery',
});

appendAgentEvent(restartDb, userAWindow1, pendingRun.runId, 1, 'progress', {
type: 'progress',
data: { phase: 'queued-before-restart' },
});
appendAgentEvent(restartDb, userAWindow1, runningRun.runId, 1, 'progress', {
type: 'progress',
data: { phase: 'running-before-restart' },
});
appendAgentEvent(restartDb, userAWindow1, runningRun.runId, 2, 'thought', {
type: 'thought',
data: { summary: 'partial thought before restart' },
});
appendAgentEvent(restartDb, userAWindow1, completedRun.runId, 1, 'conclusion', {
type: 'conclusion',
data: { summary: 'completed before restart' },
});
appendAgentEvent(restartDb, userAWindow1, completedRun.runId, 2, 'analysis_completed', {
type: 'analysis_completed',
data: { reportUrl: '/api/reports/d9-terminal-report' },
});
restartDb.prepare(`
UPDATE analysis_runs
SET completed_at = ?, updated_at = ?
WHERE id = ?
`).run(recoveryAt - 1_000, recoveryAt - 1_000, completedRun.runId);
restartDb.prepare(`
UPDATE analysis_sessions
SET status = 'completed', updated_at = ?
WHERE id = ?
`).run(recoveryAt - 1_000, completedRun.sessionId);

restartDb.close();
restartDb = new Database(restartDbPath);
applyEnterpriseMinimalSchema(restartDb);

const interrupted = restartDb.prepare<unknown[], {
id: string;
tenant_id: string;
workspace_id: string;
session_id: string;
status: string;
}>(`
SELECT id, tenant_id, workspace_id, session_id, status
FROM analysis_runs
WHERE status IN ('pending', 'running', 'awaiting_user')
ORDER BY updated_at ASC, started_at ASC, id ASC
`).all();
const recoverInterrupted = restartDb.transaction(() => {
for (const run of interrupted) {
restartDb.prepare(`
UPDATE analysis_runs
SET status = 'failed',
heartbeat_at = ?,
updated_at = ?,
completed_at = COALESCE(completed_at, ?),
error_json = COALESCE(error_json, ?)
WHERE tenant_id = ?
AND workspace_id = ?
AND id = ?
AND status = ?
`).run(
recoveryAt,
recoveryAt,
recoveryAt,
recoveryErrorJson,
run.tenant_id,
run.workspace_id,
run.id,
run.status,
);
restartDb.prepare(`
UPDATE analysis_sessions
SET status = 'failed',
updated_at = ?
WHERE tenant_id = ?
AND workspace_id = ?
AND id = ?
AND status IN ('pending', 'running', 'awaiting_user')
`).run(recoveryAt, run.tenant_id, run.workspace_id, run.session_id);
}
});
recoverInterrupted();

const lifecycleRows = restartDb.prepare<unknown[], {
id: string;
status: string;
completed_at: number | null;
error_json: string | null;
}>(`
SELECT id, status, completed_at, error_json
FROM analysis_runs
WHERE id IN (?, ?, ?)
ORDER BY id ASC
`).all(pendingRun.runId, runningRun.runId, completedRun.runId);
const lifecycleById = Object.fromEntries(lifecycleRows.map(row => [row.id, row]));
const pendingEvents = listAgentEventsAfter(restartDb, userAWindow1, pendingRun.runId, 0);
const runningEvents = listAgentEventsAfter(restartDb, userAWindow1, runningRun.runId, 0);
const completedEvents = listAgentEventsAfter(restartDb, userAWindow1, completedRun.runId, 0);
const traceMetadata = await readTraceMetadataForContext(upload.traceId, userAWindow1.context);
const traceAssetRow = restartDb.prepare<unknown[], { id: string; local_path: string; status: string }>(`
SELECT id, local_path, status
FROM trace_assets
WHERE tenant_id = ? AND workspace_id = ? AND id = ?
`).get(userAWindow1.context.tenantId, userAWindow1.context.workspaceId, upload.traceId);
const sessionStatuses = restartDb.prepare<unknown[], { id: string; status: string }>(`
SELECT id, status
FROM analysis_sessions
WHERE id IN (?, ?, ?)
ORDER BY id ASC
`).all(pendingRun.sessionId, runningRun.sessionId, completedRun.sessionId);
const sessionStatusById = Object.fromEntries(sessionStatuses.map(row => [row.id, row.status]));
const recoveredPreviousStatuses = interrupted.map(run => [run.id, run.status]);

const checks = {
fileBackedDbReopensWithTraceAsset: traceAssetRow?.id === upload.traceId
&& traceAssetRow.status === 'ready'
&& fs.existsSync(upload.localPath),
traceMetadataReadableAfterRestart: traceMetadata?.id === upload.traceId
&& traceMetadata.tenantId === userAWindow1.context.tenantId
&& traceMetadata.workspaceId === userAWindow1.context.workspaceId,
startupRecoveryFindsOnlyNonterminalRuns: recoveredPreviousStatuses.length === 2
&& recoveredPreviousStatuses.some(([id, status]) => id === pendingRun.runId && status === 'pending')
&& recoveredPreviousStatuses.some(([id, status]) => id === runningRun.runId && status === 'running')
&& !recoveredPreviousStatuses.some(([id]) => id === completedRun.runId),
pendingAndRunningRunsBecomeFailed: lifecycleById[pendingRun.runId]?.status === 'failed'
&& lifecycleById[runningRun.runId]?.status === 'failed'
&& lifecycleById[pendingRun.runId]?.completed_at === recoveryAt
&& lifecycleById[runningRun.runId]?.completed_at === recoveryAt,
terminalRunRemainsCompleted: lifecycleById[completedRun.runId]?.status === 'completed'
&& lifecycleById[completedRun.runId]?.completed_at === recoveryAt - 1_000,
persistedEventsReplayAfterRestart: pendingEvents.map(event => event.cursor).join(',') === '1'
&& runningEvents.map(event => event.cursor).join(',') === '1,2'
&& completedEvents.map(event => `${event.cursor}:${event.eventType}`).join(',') === '1:conclusion,2:analysis_completed',
failedSessionsMarkedWithoutTouchingTerminalSession: sessionStatusById[pendingRun.sessionId] === 'failed'
&& sessionStatusById[runningRun.sessionId] === 'failed'
&& sessionStatusById[completedRun.sessionId] === 'completed',
recoveryErrorIsAuditable: lifecycleById[pendingRun.runId]?.error_json === recoveryErrorJson
&& lifecycleById[runningRun.runId]?.error_json === recoveryErrorJson
&& lifecycleById[completedRun.runId]?.error_json === null,
};

return {
checks,
details: {
traceId: upload.traceId,
restartDbPath,
pendingRun,
runningRun,
completedRun,
recoveredPreviousStatuses,
pendingEvents,
runningEvents,
completedEvents,
sessionStatusById,
},
};
} finally {
restartDb.close();
}
}

function allChecksPassed(report: EnterpriseWindowRegressionReport): boolean {
return Object.values(report.scenarios).every(scenario =>
Object.values(scenario.checks).every(Boolean),
Expand Down Expand Up @@ -1248,6 +1427,7 @@ export async function runEnterpriseWindowRegression(
D6: await scenarioD6(db, tracePath, windows.userAWindow1, windows.userCWindow1),
D7: await scenarioD7(db, tracePath, windows.userAWindow1),
D8: await scenarioD8(db, tracePath, windows.userAWindow1),
D9: await scenarioD9(tracePath, windows.userAWindow1),
};
const report: EnterpriseWindowRegressionReport = {
timestamp: new Date().toISOString(),
Expand All @@ -1260,6 +1440,7 @@ export async function runEnterpriseWindowRegression(
D6: scenarioPassed(scenarios.D6),
D7: scenarioPassed(scenarios.D7),
D8: scenarioPassed(scenarios.D8),
D9: scenarioPassed(scenarios.D9),
},
uploadRoot,
tracePath,
Expand All @@ -1272,7 +1453,8 @@ export async function runEnterpriseWindowRegression(
'D6 covers the persisted AgentEvent replay contract after a conclusion cursor; the live stream route path is covered by agentRoutesRbac tests.',
'D7 covers running run, active lease, report_generation holder, and draining rejection invariants; actual route blocking is covered by enterpriseTraceMetadataRoutes tests.',
'D8 covers the DB ProviderSnapshot pin/hash-mismatch invariant in the enterprise window regression; AgentAnalyzeSessionService tests cover actual in-memory and persisted SDK session non-reuse.',
'Production backend proxy and queue behavior remain future §0.7 D1/D2/D4/D5/D6/D7/D8 final-acceptance work against a live browser and trace_processor_shell.',
'D9 covers file-backed DB close/reopen recovery for trace metadata, run states, and AgentEvent replay; enterpriseRestartPersistence tests cover the route-level restart recovery path.',
'Production backend proxy and queue behavior remain future §0.7 D1/D2/D4/D5/D6/D7/D8/D9 final-acceptance work against a live browser and trace_processor_shell.',
],
};
report.passed = allChecksPassed(report);
Expand Down
2 changes: 1 addition & 1 deletion docs/features/enterprise-multi-tenant/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
- [x] D6 SSE 在 conclusion 后、analysis_completed 前断开 → AgentEvent replay 能补回 reportUrl
- [x] D7 手动 cleanup / delete → running run / active lease / 正在生成的 report 被 draining 保护
- [x] D8 Provider 配置在 session 中途变更 → resume 校验 ProviderSnapshot hash,不复用错误 SDK session
- [ ] D9 后端进程重启 → pending/running/terminal run 状态、events、trace metadata 可恢复或转 failed
- [x] D9 后端进程重启 → pending/running/terminal run 状态、events、trace metadata 可恢复或转 failed
- [ ] D10 机器内存接近上限 → admission 拒绝新 lease,不通过 OOM 杀已有窗口

### 0.8 §19 总验收
Expand Down