Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion packages/kernel-agents/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,26 @@
"default": "./dist/index.cjs"
}
},
"./json": {
"import": {
"types": "./dist/strategies/json-agent.d.mts",
"default": "./dist/strategies/json-agent.mjs"
},
"require": {
"types": "./dist/strategies/json-agent.d.cts",
"default": "./dist/strategies/json-agent.cjs"
}
},
"./repl": {
"import": {
"types": "./dist/strategies/repl-agent.d.mts",
"default": "./dist/strategies/repl-agent.mjs"
},
"require": {
"types": "./dist/strategies/repl-agent.d.cts",
"default": "./dist/strategies/repl-agent.cjs"
}
},
"./package.json": "./package.json"
},
"files": [
Expand Down Expand Up @@ -82,8 +102,12 @@
"node": "^20.6 || >=22"
},
"dependencies": {
"@metamask/kernel-errors": "workspace:^",
"@metamask/kernel-utils": "workspace:^",
"@metamask/logger": "workspace:^",
"@ocap/kernel-language-model-service": "workspace:^"
"@ocap/kernel-language-model-service": "workspace:^",
"ses": "^1.14.0",
"tree-sitter": "^0.25.0",
"tree-sitter-javascript": "^0.25.0"
}
}
66 changes: 66 additions & 0 deletions packages/kernel-agents/src/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Kernel Agents

A kernel agent, [tasked](./task.ts) with an objective, attempts to fulfill the objective within the context of its available capabilities and knowledge.

So doing, the agent collects experiences, which, in addition to the objective and context of a given task, include the history of the agent's actions and observations throughout the attempt, and, if relevant, the error state or final result achieved by the attempt.

Learning from experience requires some value signal associated to said experience. None is implemented nor provided for within this package.

Although every task is defined by an objective and the context within which to attempt that objective, the attempts themselves may follow various strategies. A [strategy](./strategies/README.md) presents the task specification, together with the history of an agent's observations and actions, in a textual form that elicits useful responses from a language model.

## Implementation Sketches

An abstract agent formulation looks as follows.

```js
// A highly abstract sketch of an agent
const agent = (params) => {
let state = initState(),
done = false;
const { act } = makeModel(params);
const { observe, step, render } = makeEnvironment(params);
for (let i = 0; i < params.maxSteps; i++) {
const observation = observe(state);
const action = await act(observation);
[state, done] = await step(state, action);
if (done) {
return state.result;
}
render(state);
}
}
```

In practice, agents are constructible from a language model by a slightly more detailed implementation. Although not an exact factorization of the generic structure given above, the broad sketch of `observe->act->step->render->repeat` remains.

```js
// A more detailed abstract sketch of an agent
const agent = ({ task, llm }, { maxSteps = 10 }) => {
const state = initState();
const prompter = makePrompter(state, task),
reader = makeReader(),
evaluator = makeEvaluator(state),
printer = makePrinter(state);
for (let i = 0; i < maxSteps; i++) {
// Observe
const { prompt, readerOptions } = prompter(state);
// Act
const { sample, abort } = await llm.sample(prompt);
const action = await reader({ sample, abort, ...readerOptions });
// Step
const dState = await evaluator(state, action);
state.update(action, dState);
if (task.isDone(action, state)) {
return result;
}

// Render
printer(action, observation);
}
}
```

For concrete implementations, see below.

- [json-agent](./strategies/json-agent.ts)
- [repl-agent](./strategies/repl-agent.ts)
120 changes: 45 additions & 75 deletions packages/kernel-agents/src/agent.test.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
import '@ocap/repo-tools/test-utils/mock-endoify';

import type { Logger } from '@metamask/logger';
import { Logger } from '@metamask/logger';
import { vi, describe, it, expect } from 'vitest';

import { makeAgent } from './agent.ts';
import { capability } from './capability.ts';
import { end } from './default-capabilities.ts';
import { AssistantMessage, CapabilityResultMessage } from './messages.ts';
import { makeChat } from './prompt.ts';
import { makeJsonAgent } from './strategies/json-agent.ts';
import { makeReplAgent } from './strategies/repl-agent.ts';

const prompt = 'test prompt';
const prefix = '{"messageType":"assistant","';
const stop = '</|>';

vi.mock('./prompt.ts', () => ({
makeChat: vi.fn(() => ({
getPromptAndPrefix: vi.fn(() => ({ prompt, prefix })),
pushMessages: vi.fn(),
})),
vi.mock('./strategies/repl/prompter.ts', () => ({
makePrompter: vi.fn(() => () => ({ prompt, readerArgs: { stop } })),
}));

describe('makeAgent', () => {
vi.mock('./strategies/json/prompter.ts', () => ({
makePrompter: vi.fn(() => () => ({ prompt, readerArgs: { prefix } })),
}));

const logger = new Logger('test');

describe.each([
['Json', makeJsonAgent, [`invoke":[{"name":"end","args":{"final":"x"}}]}`]],
['Repl', makeReplAgent, ["await end({ final: 'x' });", stop]],
])('make%sAgent', (strategy, makeAgent, endStatement) => {
const mockLlm = (...chunks: string[]) => ({
getInfo: vi.fn(),
load: vi.fn(),
Expand All @@ -37,88 +41,54 @@ describe('makeAgent', () => {
});

it('makes an agent', () => {
const llm = mockLlm();
const agent = makeAgent({ llm, capabilities: {} });
const languageModel = mockLlm();
const agent = makeAgent({ languageModel, capabilities: {} });
expect(agent).toBeDefined();
expect(agent).toHaveProperty('task');
});

it('endows the "end" capability by default', async () => {
const llm = mockLlm();
const mockMergeDisjointRecordsSpy = vi.spyOn(
await import('@metamask/kernel-utils'),
'mergeDisjointRecords',
);
const capabilities = {};
makeAgent({ llm, capabilities });
expect(mockMergeDisjointRecordsSpy).toHaveBeenCalledWith(
{ end },
capabilities,
);
});

describe('task', () => {
it('invokes the LLM', async () => {
const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`);
const agent = makeAgent({ llm, capabilities: {} });
const languageModel = mockLlm(...endStatement);
const agent = makeAgent({ languageModel, capabilities: {}, logger });
const result = await agent.task('');
expect(result).toBe('x');
// This is a massive understatement, but we don't want to test the prompt
expect(llm.sample).toHaveBeenCalledWith(prompt);
expect(languageModel.sample).toHaveBeenCalledWith(prompt);
});

it('throws if the LLM did not invoke a capability', async () => {
// LLM finishes valid JSON, but no invoke property
const llm = mockLlm(`content":""}`);
const agent = makeAgent({ llm, capabilities: {} });
const task = agent.task('');
await expect(task).rejects.toThrow('No invoke in result');
});
it.skipIf(strategy !== 'Json')(
'throws if the LLM did not invoke a capability',
async () => {
// LLM finishes valid JSON, but no invoke property
const languageModel = mockLlm(`content":""}`);
const agent = makeAgent({ languageModel, capabilities: {} });
const task = agent.task('');
await expect(task).rejects.toThrow('No invoke in message');
},
);

it('throws if invocation budget is exceeded', async () => {
const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`);
const agent = makeAgent({ llm, capabilities: {} });
const task = agent.task('', { invocationBudget: 0 });
await expect(task).rejects.toThrow('Invocation budget exceeded');
});

// XXX This test reflects a poor factorization of the agent.
it('pushes messages to the transcript', async () => {
const llm = mockLlm(`invoke":[{"name":"test","args":{}}]}`);
const pushMessages = vi.fn();
vi.mocked(makeChat).mockReturnValue({
getPromptAndPrefix: vi.fn(() => ({ prompt, prefix })),
pushMessages,
});
const { makeAgent: makeAgent2 } = await import('./agent.ts');
const agent = makeAgent2({
llm,
capabilities: {
test: capability(async () => 'test', {
description: 'test',
args: {},
returns: { type: 'string' },
}),
},
});
const task = agent.task('test', { invocationBudget: 1 });
const languageModel = mockLlm(...endStatement);
const agent = makeAgent({ languageModel, capabilities: {} });
const task = agent.task('', undefined, { invocationBudget: 0 });
await expect(task).rejects.toThrow('Invocation budget exceeded');
expect(pushMessages).toHaveBeenCalledWith(
expect.any(AssistantMessage),
expect.any(CapabilityResultMessage),
);
});

it('logs to the provided logger', async () => {
const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`);
const logger = {
const languageModel = mockLlm(...endStatement);
const testLogger = {
info: vi.fn(),
subLogger: vi.fn(() => logger),
subLogger: vi.fn(() => testLogger),
} as unknown as Logger;
const agent = makeAgent({ llm, capabilities: {}, logger });
await agent.task('test', { invocationBudget: 1 });
expect(logger.info).toHaveBeenCalledWith('query:', 'test');
expect(logger.subLogger).toHaveBeenCalledWith({ tags: ['t001'] });
const agent = makeAgent({
languageModel,
capabilities: {},
logger: testLogger,
});
await agent.task('test', undefined, { invocationBudget: 1 });
expect(testLogger.info).toHaveBeenCalledWith('intent:', 'test');
expect(testLogger.subLogger).toHaveBeenCalledWith({ tags: ['t001'] });
});
});
});
Loading
Loading