separate repl evaluator-error handling

grypez · grypez · commit d0d4eb4d65f1 · 2025-11-10T18:45:54.000-05:00
diff --git a/packages/kernel-agents/src/strategies/repl/evaluator-error.test.ts b/packages/kernel-agents/src/strategies/repl/evaluator-error.test.ts
@@ -0,0 +1,103 @@
+import 'ses';
+import '@ocap/repo-tools/test-utils/mock-endoify';
+import { EvaluatorError, SampleGenerationError } from '@metamask/kernel-errors';
+import { describe, it, expect } from 'vitest';
+
+import { processEvaluationError, stripStackTrace } from './evaluator-error.ts';
+import { ERROR } from './symbols.ts';
+
+describe('stripStackTrace', () => {
+  it('strips stack trace from Error', () => {
+    const error = new Error('test error');
+    error.stack = 'Error: test error\n    at test.js:1:1';
+    const stripped = stripStackTrace(error);
+    expect(stripped).toBeInstanceOf(Error);
+    expect((stripped as Error).message).toBe('test error');
+    const strippedError = stripped as Error;
+    expect(strippedError.stack).not.toContain('at test.js');
+  });
+
+  it('preserves error cause chain', () => {
+    const inner = new Error('inner');
+    const outer = new Error('outer', { cause: inner });
+    const stripped = stripStackTrace(outer);
+    expect((stripped as Error).message).toBe('outer');
+    expect((stripped as Error).cause).toBeInstanceOf(Error);
+    expect(((stripped as Error).cause as Error).message).toBe('inner');
+  });
+
+  it('returns non-Error values unchanged', () => {
+    expect(stripStackTrace('string')).toBe('string');
+    expect(stripStackTrace(42)).toBe(42);
+    expect(stripStackTrace(null)).toBeNull();
+  });
+});
+
+describe('processEvaluationError', () => {
+  it('does nothing when result has no error', () => {
+    const result: { [ERROR]?: unknown } = {};
+    expect(() => processEvaluationError(result, 'code')).not.toThrow();
+  });
+
+  it('throws EvaluatorError for internal errors', () => {
+    const result: { [ERROR]?: unknown } = {
+      [ERROR]: new EvaluatorError('test', 'code', new Error('cause')),
+    };
+    expect(() => processEvaluationError(result, 'code')).toThrow(
+      EvaluatorError,
+    );
+  });
+
+  it('throws SampleGenerationError for SyntaxError', () => {
+    const result: { [ERROR]?: unknown } = {
+      [ERROR]: new SyntaxError('syntax error'),
+    };
+    expect(() => processEvaluationError(result, 'bad code')).toThrow(
+      SampleGenerationError,
+    );
+  });
+
+  it('throws SampleGenerationError for ReferenceError', () => {
+    const result: { [ERROR]?: unknown } = {
+      [ERROR]: new ReferenceError('reference error'),
+    };
+    expect(() => processEvaluationError(result, 'bad code')).toThrow(
+      SampleGenerationError,
+    );
+  });
+
+  it('throws SampleGenerationError for Error objects with SyntaxError name', () => {
+    const error = Object.assign(new Error('error'), { name: 'SyntaxError' });
+    const result: { [ERROR]?: unknown } = { [ERROR]: error };
+    expect(() => processEvaluationError(result, 'bad code')).toThrow(
+      SampleGenerationError,
+    );
+  });
+
+  it('processes and assigns valid-feedback errors', () => {
+    const result: { [ERROR]?: unknown } = {
+      [ERROR]: new Error('user error'),
+    };
+    processEvaluationError(result, 'code');
+    expect(result[ERROR]).toBeInstanceOf(Error);
+    const processedError = result[ERROR] as Error;
+    expect(processedError.message).toBe('user error');
+  });
+
+  it('wraps non-Error values as Error for valid-feedback', () => {
+    const result: { [ERROR]?: unknown } = { [ERROR]: 'string error' };
+    processEvaluationError(result, 'code');
+    expect(result[ERROR]).toBeInstanceOf(Error);
+    expect((result[ERROR] as Error).message).toBe('string error');
+  });
+
+  it('strips stack traces from valid-feedback errors', () => {
+    const error = new Error('user error');
+    error.stack = 'Error: user error\n    at test.js:1:1';
+    const result: { [ERROR]?: unknown } = { [ERROR]: error };
+    processEvaluationError(result, 'code');
+    const processedError = result[ERROR] as Error;
+    expect(processedError.message).toBe('user error');
+    expect(processedError.stack).not.toContain('at test.js');
+  });
+});
diff --git a/packages/kernel-agents/src/strategies/repl/evaluator-error.ts b/packages/kernel-agents/src/strategies/repl/evaluator-error.ts
@@ -0,0 +1,67 @@
+import { SampleGenerationError, EvaluatorError } from '@metamask/kernel-errors';
+
+import type { EvaluationResult } from './prepare-evaluation.ts';
+import { ERROR } from './symbols.ts';
+
+/**
+ * Strips stack traces from an error while preserving the message and cause chain.
+ *
+ * @param error - The error to strip stack traces from.
+ * @returns The error without stack traces.
+ */
+export const stripStackTrace = (error: unknown): unknown => {
+  if (!(error instanceof Error)) {
+    return error;
+  }
+  return new Error(
+    error.message,
+    ...(error.cause ? [{ cause: stripStackTrace(error.cause) }] : []),
+  );
+};
+
+const asError = (error: unknown): Error =>
+  error instanceof Error ? error : new Error(String(error));
+
+const isSyntaxError = (error: unknown): boolean =>
+  error instanceof SyntaxError ||
+  (error instanceof Error && error.name === 'SyntaxError');
+
+const isReferenceError = (error: unknown): boolean =>
+  error instanceof ReferenceError ||
+  (error instanceof Error && error.name === 'ReferenceError');
+
+/**
+ * Processes any error in the evaluation result. If an error exists, classifies it
+ * and either throws (for retry/exit errors) or processes and assigns it back to
+ * the result (for valid feedback errors).
+ *
+ * @param result - The evaluation result object that may contain an error.
+ * @param code - The code that was being evaluated.
+ * @throws {SampleGenerationError} For syntax/reference errors that should trigger retry.
+ * @throws {EvaluatorError} For internal errors that should exit the attempt.
+ */
+export const processEvaluationError = (
+  result: EvaluationResult,
+  code: string,
+): void => {
+  if (!Object.hasOwn(result, ERROR)) {
+    return;
+  }
+  const error = result[ERROR];
+
+  // Check if this is already an EvaluatorError (thrown by safe wrappers)
+  if (error instanceof EvaluatorError) {
+    throw error;
+  }
+
+  // Check if this is a sample generation error (syntax/reference errors)
+  if (isSyntaxError(error) || isReferenceError(error)) {
+    throw new SampleGenerationError(
+      code,
+      stripStackTrace(asError(error)) as Error,
+    );
+  }
+
+  // All other errors are valid feedback (capability errors, NotImplemented, etc.)
+  result[ERROR] = stripStackTrace(asError(error));
+};
diff --git a/packages/kernel-agents/src/strategies/repl/evaluator.test.ts b/packages/kernel-agents/src/strategies/repl/evaluator.test.ts
@@ -1,13 +1,11 @@
 import 'ses'; // We need the real Compartment, not the mock.
 import '@ocap/repo-tools/test-utils/mock-endoify';
-import { EvaluatorError } from '@metamask/kernel-errors';
 import { Logger } from '@metamask/logger';
 import { describe, it, expect, beforeEach, vi } from 'vitest';
 
 import { makeEvaluator } from './evaluator.ts';
 import {
   CommentMessage,
-  EvaluationMessage,
   ImportMessage,
   ResultMessage,
   StatementMessage,
@@ -94,37 +92,6 @@ describe('evaluator', () => {
     });
   });
 
-  describe('classifies errors', () => {
-    it('rejects EvaluatorError as internal error', async () => {
-      const evaluatorWithError = makeEvaluator({
-        initState: () => ({ consts: {}, lets: {} }),
-        capabilities: {
-          badCap: {
-            func: () => {
-              throw new EvaluatorError('test', 'code', new Error('cause'));
-            },
-            schema: { description: 'Bad capability', args: {} },
-          },
-        },
-      });
-      const statement = new EvaluationMessage('badCap();');
-      await expect(evaluatorWithError([], statement)).rejects.toThrow(
-        EvaluatorError,
-      );
-    });
-
-    it('returns user errors as valid feedback without stack traces', async () => {
-      const history: ReplTranscript = [];
-      const result = await evaluator(
-        history,
-        StatementMessage.fromCode(
-          '(function() { throw new Error("user error"); })();',
-        ),
-      );
-      expect(result?.messageBody.error).toBe('Error: user error');
-    });
-  });
-
   describe('creates result messages', () => {
     it('creates result with return value', async () => {
       const history: ReplTranscript = [];
@@ -136,18 +103,6 @@ describe('evaluator', () => {
       expect(result?.messageBody.return).toBe('"hello"');
     });
 
-    it('creates result with error', async () => {
-      const history: ReplTranscript = [];
-      const result = await evaluator(
-        history,
-        StatementMessage.fromCode(
-          '(function() { throw new Error("test"); })();',
-        ),
-      );
-      expect(result).toBeInstanceOf(ResultMessage);
-      expect(result?.messageBody.error).toContain('Error: test');
-    });
-
     it('creates result with declaration value', async () => {
       const history: ReplTranscript = [];
       const result = await evaluator(
@@ -169,17 +124,13 @@ describe('evaluator', () => {
   });
 
   describe('manages state', () => {
-    it('does not update state on error', async () => {
+    it('does not update state when evaluation has error', async () => {
       const initialState = { consts: {}, lets: {} };
       const history: ReplTranscript = [];
-      try {
-        await evaluator(
-          history,
-          StatementMessage.fromCode('const x = undefined.y;'),
-        );
-      } catch {
-        // Expected to throw
-      }
+      await evaluator(
+        history,
+        StatementMessage.fromCode('const x = undefined.y;'),
+      );
       expect(state).toStrictEqual(initialState);
     });
   });
diff --git a/packages/kernel-agents/src/strategies/repl/evaluator.ts b/packages/kernel-agents/src/strategies/repl/evaluator.ts
@@ -1,8 +1,9 @@
-import { SampleGenerationError, EvaluatorError } from '@metamask/kernel-errors';
+import { EvaluatorError } from '@metamask/kernel-errors';
 import { mergeDisjointRecords } from '@metamask/kernel-utils';
 import type { Logger } from '@metamask/logger';
 
 import { makeCompartment } from './compartment.ts';
+import { processEvaluationError } from './evaluator-error.ts';
 import {
   CommentMessage,
   EvaluationMessage,
@@ -17,58 +18,6 @@ import { extractCapabilities } from '../../capabilities/capability.ts';
 import type { CapabilityRecord } from '../../types.ts';
 import { ifDefined } from '../../utils.ts';
 
-/**
- * Error classification result for compartment errors.
- */
-type ErrorClassification =
-  | { type: 'sample-generation'; error: SampleGenerationError }
-  | { type: 'internal'; error: EvaluatorError }
-  | { type: 'valid-feedback'; error: Error };
-
-/**
- * Classifies a compartment error into one of three categories:
- * 1. Sample generation errors (syntax/reference errors) - should trigger retry
- * 2. Internal errors (REPL infrastructure violations) - should exit attempt
- * 3. Valid feedback errors (capability errors, etc.) - should be surfaced to agent
- *
- * @param error - The error to classify.
- * @param code - The code that was being evaluated.
- * @returns The classification result.
- */
-const classifyCompartmentError = (
-  error: unknown,
-  code: string,
-): ErrorClassification => {
-  const cause = error instanceof Error ? error : new Error(String(error));
-
-  // Check if this is already an EvaluatorError (thrown by safe wrappers)
-  if (cause instanceof EvaluatorError) {
-    return {
-      type: 'internal',
-      error: cause,
-    };
-  }
-
-  // Check if this is a sample generation error (syntax/reference errors)
-  if (
-    cause instanceof SyntaxError ||
-    cause instanceof ReferenceError ||
-    cause.name === 'SyntaxError' ||
-    cause.name === 'ReferenceError'
-  ) {
-    return {
-      type: 'sample-generation',
-      error: new SampleGenerationError(code, cause),
-    };
-  }
-
-  // All other errors are valid feedback (capability errors, NotImplemented, etc.)
-  return {
-    type: 'valid-feedback',
-    error: cause,
-  };
-};
-
 const validateStatement = (
   statement: StatementMessage,
 ): { earlyResult?: ResultMessage | null } => {
@@ -158,21 +107,7 @@ export const makeEvaluator = ({
     }
 
     // Handle errors caught by $catch (user code errors)
-    if (Object.hasOwn(result, ERROR)) {
-      const classification = classifyCompartmentError(result[ERROR], code);
-      if (['sample-generation', 'internal'].includes(classification.type)) {
-        throw classification.error;
-      }
-      // Valid feedback error: treat as result, stripping out the stack trace
-      const withoutStack = (error: unknown): unknown =>
-        error instanceof Error
-          ? new Error(
-              error.message,
-              ...(error.cause ? [{ cause: withoutStack(error.cause) }] : []),
-            )
-          : error;
-      result[ERROR] = withoutStack(result[ERROR]);
-    }
+    processEvaluationError(result, code);
 
     // Update the state and return the result
     const stepResult = [ERROR, RETURN, 'value'].some((key) =>