Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .changeset/fix-silent-tracks-compliance-status.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
---

fix(server): treat all-silent comply tracks as passing, not degraded (closes #4065)

The `@adcp/sdk/testing` `comply()` function returns `overall_status: 'partial'` when every
track is `'silent'` (all scenarios passed with no advisory observations — the best possible
outcome). The server mapped `'partial'` → `ComplianceStatus: 'degraded'`, causing the
compliance dashboard to show "Degraded" for fully-clean agents.

**Root cause:** `complianceResultToDbInput()` called `mapOverallStatus(result.overall_status)`
which blindly forwarded the SDK's `'partial'` to the DB. `computeStatus('partial')` then
returned `'degraded'`.

**Fix:** Added `effectiveRunStatus()` which checks whether all active (non-skip) tracks are
`'pass'` or `'silent'` before falling through to `mapOverallStatus`. When all tracks are
passing/silent, it overrides to `'passing'` and zeroes out `tracks_partial` so the stored
run record stays consistent.

**Frontend:** Track pills in `agents.html` and `dashboard-agents.html` (card view and history
panel) mapped `'silent'` to the skip CSS class (gray). Fixed to use the pass class (green) in
all four locations.

**Secondary gap (not fixed here):** `member-tools.ts:3533` records
`overall_passed: result.overall_status === 'passing'` using the raw SDK string, which stays
`'partial'` for all-silent runs. The `quality_evaluation` test-run record will incorrectly
store `overall_passed: false` until a follow-up PR fixes `evaluate_agent_quality`'s
`recordTest` call. Tracked as a known gap.

**Downstream effects of the fix (correct behavior):**
- Streak accumulation now works for all-silent agents (`streak_days` advances correctly)
- `last_passed_at` is now set on all-silent heartbeats (was NULL, blocking badge eligibility)
- Compliance notifications no longer false-fire for agents with clean-but-silent runs
4 changes: 2 additions & 2 deletions server/public/agents.html
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ <h2 id="detail-name"></h2>
if (trackEntries.length > 0) {
complianceHtml += '<div class="track-pills">';
for (const [track, status] of trackEntries) {
const pillClass = status === 'pass' ? 'track-pill--pass'
const pillClass = status === 'pass' || status === 'silent' ? 'track-pill--pass'
: status === 'fail' ? 'track-pill--fail'
: status === 'partial' ? 'track-pill--partial'
: 'track-pill--skip';
Expand Down Expand Up @@ -1008,7 +1008,7 @@ <h2 id="detail-name"></h2>
let trackPillsHtml = '';
const tracks = cs.tracks || {};
for (const [track, status] of Object.entries(tracks).filter(([, s]) => s !== 'skip' && s !== 'expected')) {
const pillClass = status === 'pass' ? 'track-pill--pass'
const pillClass = status === 'pass' || status === 'silent' ? 'track-pill--pass'
: status === 'fail' ? 'track-pill--fail'
: status === 'partial' ? 'track-pill--partial'
: 'track-pill--skip';
Expand Down
6 changes: 3 additions & 3 deletions server/public/dashboard-agents.html
Original file line number Diff line number Diff line change
Expand Up @@ -1424,7 +1424,7 @@ <h1>Agents</h1>
const clickableTrackPills = Object.entries(tracks)
.filter(([, status]) => status !== 'skip' && status !== 'expected')
.map(([track, status]) => {
const cls = status === 'pass' ? 'agent-track--pass'
const cls = status === 'pass' || status === 'silent' ? 'agent-track--pass'
: status === 'fail' ? 'agent-track--fail'
: status === 'partial' ? 'agent-track--partial'
: 'agent-track--skip';
Expand Down Expand Up @@ -2599,7 +2599,7 @@ <h1>Agents</h1>
return;
}

const statusLabel = trackData.status === 'pass' ? 'Passing'
const statusLabel = trackData.status === 'pass' || trackData.status === 'silent' ? 'Passing'
: trackData.status === 'fail' ? 'Failing'
: trackData.status === 'partial' ? 'Partial' : trackData.status;

Expand Down Expand Up @@ -2670,7 +2670,7 @@ <h1>Agents</h1>
let runTracks = '';
if (run.tracks_json) {
for (const t of run.tracks_json) {
const cls = t.status === 'pass' ? 'agent-track--pass'
const cls = t.status === 'pass' || t.status === 'silent' ? 'agent-track--pass'
: t.status === 'fail' ? 'agent-track--fail'
: t.status === 'partial' ? 'agent-track--partial'
: 'agent-track--skip';
Expand Down
45 changes: 41 additions & 4 deletions server/src/addie/services/compliance-testing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,41 @@ function mapOverallStatus(status: string): OverallRunStatus {
}
}

/**
* Derive the effective overall status and track counters from a ComplianceResult.
*
* The SDK reports overall_status='partial' when every track returns 'silent' (all
* scenarios passed with no advisory observations — the best possible outcome).
* 'partial' maps to ComplianceStatus='degraded', which is wrong for a fully-clean
* run. When all active (non-skip) tracks are 'pass' or 'silent', override to
* 'passing' and recompute track counters so DB records stay consistent.
*/
function effectiveRunStatus(result: ComplianceResult): {
overall_status: OverallRunStatus;
tracks_passed: number;
tracks_failed: number;
tracks_partial: number;
} {
const activeTracks = result.tracks.filter((t: TrackResult) => t.status !== 'skip');
if (
activeTracks.length > 0 &&
activeTracks.every((t: TrackResult) => t.status === 'pass' || t.status === 'silent')
) {
return {
overall_status: 'passing',
tracks_passed: activeTracks.length,
tracks_failed: 0,
tracks_partial: 0,
};
}
return {
overall_status: mapOverallStatus(result.overall_status),
tracks_passed: result.summary.tracks_passed,
tracks_failed: result.summary.tracks_failed,
tracks_partial: result.summary.tracks_partial,
};
}

// ── Storyboard Status Derivation ─────────────────────────────────

/**
Expand Down Expand Up @@ -488,17 +523,19 @@ export function complianceResultToDbInput(
duration_ms: t.duration_ms,
}));

const { overall_status, tracks_passed, tracks_failed, tracks_partial } = effectiveRunStatus(result);

return {
agent_url: agentUrl,
lifecycle_stage: lifecycleStage,
overall_status: mapOverallStatus(result.overall_status),
overall_status,
headline: result.summary.headline,
total_duration_ms: result.total_duration_ms,
tracks_json: tracksJson,
tracks_passed: result.summary.tracks_passed,
tracks_failed: result.summary.tracks_failed,
tracks_passed,
tracks_failed,
tracks_skipped: result.summary.tracks_skipped,
tracks_partial: result.summary.tracks_partial,
tracks_partial,
agent_profile_json: result.agent_profile,
observations_json: result.observations,
triggered_by: triggeredBy,
Expand Down
103 changes: 103 additions & 0 deletions server/tests/unit/compliance-testing-effective-run-status.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import { describe, it, expect, vi } from 'vitest';

vi.mock('@adcp/sdk/testing', () => ({
setAgentTesterLogger: vi.fn(),
comply: vi.fn(),
loadComplianceIndex: vi.fn(() => ({ specialisms: [] })),
SAMPLE_BRIEFS: [],
getBriefsByVertical: vi.fn(() => []),
}));

vi.mock('../../src/services/storyboards.js', () => ({
getStoryboard: vi.fn(() => null),
getAllStoryboards: vi.fn(() => []),
}));

vi.mock('../../src/services/adcp-taxonomy.js', () => ({
isStableSpecialism: vi.fn(() => true),
}));

import { complianceResultToDbInput } from '../../src/addie/services/compliance-testing.js';

function makeTrack(status: string, scenarioCount = 3) {
return {
track: status === 'skip' ? 'governance' : 'core',
label: status === 'skip' ? 'Governance' : 'Core',
status,
duration_ms: 1000,
scenarios: Array.from({ length: scenarioCount }, (_, i) => ({
scenario: `scenario_${i}`,
overall_passed: status !== 'fail',
steps: [],
})),
};
}

function makeResult(tracks: ReturnType<typeof makeTrack>[], overallStatus = 'partial') {
const nonSkip = tracks.filter(t => t.status !== 'skip');
const passed = nonSkip.filter(t => t.status === 'pass' || t.status === 'silent').length;
const failed = nonSkip.filter(t => t.status === 'fail').length;
const partial = nonSkip.filter(t => t.status === 'partial').length;
return {
overall_status: overallStatus,
tracks,
summary: {
headline: 'Test headline',
tracks_passed: passed,
tracks_failed: failed,
tracks_partial: partial,
tracks_skipped: tracks.filter(t => t.status === 'skip').length,
},
total_duration_ms: 2000,
agent_profile: { name: 'test-agent', tools: [] },
observations: [],
};
}

describe('complianceResultToDbInput — effectiveRunStatus', () => {
it('promotes all-silent to passing with zero partial/failed counters', () => {
const result = makeResult([makeTrack('silent'), makeTrack('silent')], 'partial');
const out = complianceResultToDbInput(result as any, 'https://agent.example.com/mcp', 'production');

expect(out.overall_status).toBe('passing');
expect(out.tracks_passed).toBe(2);
expect(out.tracks_failed).toBe(0);
expect(out.tracks_partial).toBe(0);
});

it('promotes mixed pass+silent to passing', () => {
const result = makeResult([makeTrack('pass'), makeTrack('silent')], 'partial');
const out = complianceResultToDbInput(result as any, 'https://agent.example.com/mcp', 'production');

expect(out.overall_status).toBe('passing');
expect(out.tracks_passed).toBe(2);
expect(out.tracks_failed).toBe(0);
expect(out.tracks_partial).toBe(0);
});

it('does not promote when at least one track fails', () => {
const result = makeResult([makeTrack('silent'), makeTrack('fail')], 'partial');
const out = complianceResultToDbInput(result as any, 'https://agent.example.com/mcp', 'production');

expect(out.overall_status).toBe('partial');
expect(out.tracks_passed).toBe(0);

Check failure on line 83 in server/tests/unit/compliance-testing-effective-run-status.test.ts

View workflow job for this annotation

GitHub Actions / TypeScript Build

server/tests/unit/compliance-testing-effective-run-status.test.ts > complianceResultToDbInput — effectiveRunStatus > does not promote when at least one track fails

AssertionError: expected 1 to be +0 // Object.is equality - Expected + Received - 0 + 1 ❯ server/tests/unit/compliance-testing-effective-run-status.test.ts:83:31
expect(out.tracks_failed).toBe(1);
expect(out.tracks_partial).toBe(0);
});

it('ignores skip tracks when deciding promotion', () => {
const result = makeResult([makeTrack('silent'), makeTrack('skip')], 'partial');
const out = complianceResultToDbInput(result as any, 'https://agent.example.com/mcp', 'production');

expect(out.overall_status).toBe('passing');
expect(out.tracks_passed).toBe(1);
});

it('does not promote when all tracks are skipped (no active tracks)', () => {
const result = makeResult([makeTrack('skip'), makeTrack('skip')], 'partial');
const out = complianceResultToDbInput(result as any, 'https://agent.example.com/mcp', 'production');

// No active tracks — falls through to mapOverallStatus('partial') → 'partial'
expect(out.overall_status).toBe('partial');
});
});
Loading