diff --git a/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/.openspec.yaml b/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/.openspec.yaml new file mode 100644 index 0000000..d8b0ed0 --- /dev/null +++ b/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-03-20 diff --git a/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/design.md b/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/design.md new file mode 100644 index 0000000..336a709 --- /dev/null +++ b/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/design.md @@ -0,0 +1,52 @@ +## Context + +`0.1.5` introduced the `source` field on recall effectiveness events and updated new-table bootstrap rows accordingly. Existing local databases created before that release keep the older LanceDB schema because `MemoryStore.init()` opens tables in place and does not run any compatibility checks. The next recall write then fails at the LanceDB layer because the row contains `source` while the existing `effectiveness_events` table schema does not. + +## Goals / Non-Goals + +**Goals:** +- Make upgraded installs able to open pre-`0.1.5` databases and append new recall events without manual cleanup. +- Keep the fix scoped to the existing `effectiveness_events` table and the missing `source` column. +- Preserve current summary semantics where legacy rows are interpreted as `system-transform` recall events. +- Add regression tests that prove the startup upgrade path works against an old on-disk schema. + +**Non-Goals:** +- Introducing a generic migration framework for every future schema change in this patch. +- Rewriting historical events to backfill exact `source` values beyond the safe default interpretation. +- Changing memory record schema or retrieval behavior. + +## Decisions + +### Decision: Patch the table during store initialization +Rationale: The failure happens before normal event processing can recover, so startup is the only reliable place to make the table schema compatible. This also keeps the repair transparent to operators. + +Alternatives considered: +- Fail fast with a manual migration instruction: rejected because local upgrades would remain broken until operators intervene. +- Catch write errors and retry with a reduced payload: rejected because it hides the schema mismatch and loses `source` data on all new rows. + +### Decision: Use LanceDB schema evolution to add `source` as an all-null or empty-string-compatible column +Rationale: LanceDB requires explicit schema evolution for new columns. Adding the column in place preserves existing data and unblocks new writes without rebuilding the table. + +Alternatives considered: +- Drop and recreate `effectiveness_events`: rejected because it destroys historical audit data. +- Create a new events table name: rejected because it complicates reads and splits history. + +### Decision: Keep legacy row normalization defaulting missing `source` to `system-transform` +Rationale: Pre-`0.1.5` recall events only came from the system transform path, so this remains the correct backward-compatible interpretation even after the storage schema is patched. + +## Risks / Trade-offs + +- [LanceDB API shape differs across installed versions] -> Mitigation: keep the table type narrow, feature-detect schema patch support needed by this project version, and verify with regression tests. +- [Older databases may contain no event rows yet] -> Mitigation: patch logic must be idempotent and succeed whether the table is empty or populated. +- [Future columns repeat this issue] -> Mitigation: structure the init-time compatibility check so additional column patches can be appended later. + +## Migration Plan + +1. On `MemoryStore.init()`, open `effectiveness_events` and inspect/patch the schema before any writes. +2. If `source` is missing, add it with a backward-compatible default that does not rewrite business meaning for existing rows. +3. Continue normal initialization and event writes. +4. If patching fails, surface a clear initialization error rather than allowing a later opaque write failure. + +## Open Questions + +- None for this scoped fix. diff --git a/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/proposal.md b/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/proposal.md new file mode 100644 index 0000000..d773734 --- /dev/null +++ b/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/proposal.md @@ -0,0 +1,24 @@ +## Why + +Version `0.1.5` added the `source` field to recall effectiveness events, but existing local LanceDB tables created by older versions are opened as-is and never patched. As a result, upgraded installs can fail on the first recall event write with `Found field not in schema: source at row 0`, breaking message send flows after upgrade. + +## What Changes + +- Add startup schema compatibility checks for the `effectiveness_events` table before new evaluation events are written. +- Automatically patch older event tables that are missing the `source` column so existing databases remain usable after upgrading to `0.1.5+`. +- Preserve backward-compatible interpretation of historical rows by treating pre-upgrade events as `system-transform` recall events in summaries. +- Add regression coverage for the upgrade path from pre-`0.1.5` event schemas. + +## Capabilities + +### New Capabilities +(none) + +### Modified Capabilities +- `memory-effectiveness-evaluation`: Persisted effectiveness event storage now upgrades older LanceDB schemas before recall events with `source` are appended. + +## Impact + +- `src/store.ts`: initialization must inspect and patch the `effectiveness_events` table schema. +- `test/foundation/foundation.test.ts`: add coverage for upgrading an older events table and preserving summary behavior. +- Docker-based validation: release verification must prove upgraded databases no longer fail on new recall writes. diff --git a/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/specs/memory-effectiveness-evaluation/spec.md b/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/specs/memory-effectiveness-evaluation/spec.md new file mode 100644 index 0000000..a20ec49 --- /dev/null +++ b/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/specs/memory-effectiveness-evaluation/spec.md @@ -0,0 +1,43 @@ +## MODIFIED Requirements + +### Requirement: Memory effectiveness event model +The system MUST persist append-only evaluation events for memory capture, memory recall, and user feedback so operators can audit how long-memory behavior performed in real sessions. Recall events MUST include a source field that distinguishes automatic system-transform recall from manual user-initiated search. When the persisted `effectiveness_events` table comes from an older plugin version that lacks the `source` column, the system MUST patch the table schema during initialization before appending new recall events. + +#### Scenario: Capture lifecycle emits auditable events +- **WHEN** the system evaluates assistant output for auto-capture +- **THEN** it records whether capture was considered, skipped, or stored and includes structured reason labels for non-stored outcomes + +#### Scenario: Recall lifecycle emits auditable events +- **WHEN** the system executes memory recall for a new user prompt +- **THEN** it records the recall request, result count, whether context was injected, and source as system-transform + +#### Scenario: Manual search emits recall events +- **WHEN** a user invokes the memory search command +- **THEN** the system records a recall event with source as manual-search and injected as false + +#### Scenario: Upgraded install patches missing recall source column +- **WHEN** the plugin initializes against an existing `effectiveness_events` table created before the `source` column existed +- **THEN** it patches the table schema before new recall events are appended + +#### Scenario: Feedback event remains append-only +- **WHEN** a user reports that a memory was missing, incorrect, or useful +- **THEN** the system stores a new evaluation event rather than mutating prior evaluation history + +### Requirement: Quantitative effectiveness summaries +The system MUST support quantitative summaries for long-memory effectiveness that combine capture funnel, recall funnel, and feedback-confirmed quality signals, and the project MUST define how those summaries are interpreted when explicit feedback is sparse. Recall summaries MUST separate automatic and manual recall metrics. + +#### Scenario: Operator requests effectiveness summary +- **WHEN** an operator runs the documented effectiveness reporting workflow +- **THEN** the system returns machine-readable summary fields for capture success, skip reasons, recall hit rate, helpful recall rate, false-positive rate, and false-negative rate, with recall metrics split into auto and manual sub-structures + +#### Scenario: Summary distinguishes operational and product metrics +- **WHEN** effectiveness metrics are reported +- **THEN** the report separates operational indicators from product-outcome proxies such as repeated-context reduction or manual-search-after-recall rate + +#### Scenario: Zero feedback is interpreted as unknown quality +- **WHEN** explicit feedback counts are zero or too sparse to support statistical confidence +- **THEN** maintainers treat feedback-derived rates as insufficient evidence rather than as confirmation that memory quality is good + +#### Scenario: Manual rescue ratio is reported +- **WHEN** the effectiveness summary includes both auto and manual recall data +- **THEN** the system reports a manual rescue ratio representing manual search frequency relative to auto recall frequency diff --git a/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/tasks.md b/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/tasks.md new file mode 100644 index 0000000..875e4ec --- /dev/null +++ b/openspec/changes/archive/2026-03-20-add-effectiveness-schema-upgrade/tasks.md @@ -0,0 +1,19 @@ +## 1. Open Existing Event Tables Safely + +- [x] 1.1 Extend the LanceDB table typing in `src/store.ts` so store initialization can inspect and patch `effectiveness_events` schema. +- [x] 1.2 Add init-time compatibility logic in `src/store.ts` that detects a missing `source` column and patches the existing `effectiveness_events` table before new recall events are written. + +## 2. Preserve Backward-Compatible Effectiveness Reads + +- [x] 2.1 Keep legacy recall rows readable after patching by preserving the `system-transform` default for rows without a populated `source` value. +- [x] 2.2 Fail initialization with a clear error if the event-table schema patch cannot be applied, instead of surfacing a later write-time schema mismatch. + +## 3. Regression Coverage + +- [x] 3.1 Add a foundation test that creates a pre-`0.1.5` `effectiveness_events` table, re-initializes the store, and verifies a recall event with `source` can be written successfully. +- [x] 3.2 Extend regression coverage to confirm upgraded event data still summarizes legacy recall rows as `system-transform` while preserving new auto/manual split metrics. + +## 4. Validation + +- [x] 4.1 Run changed-file diagnostics and the targeted automated tests covering the schema upgrade path. +- [x] 4.2 Rebuild and start the Docker test environment, then run containerized verification for the schema upgrade fix. diff --git a/openspec/specs/memory-effectiveness-evaluation/spec.md b/openspec/specs/memory-effectiveness-evaluation/spec.md index 331ca30..8f40ecc 100644 --- a/openspec/specs/memory-effectiveness-evaluation/spec.md +++ b/openspec/specs/memory-effectiveness-evaluation/spec.md @@ -4,7 +4,7 @@ TBD - created by archiving change add-memory-effectiveness-evaluation. Update Purpose after archive. ## Requirements ### Requirement: Memory effectiveness event model -The system MUST persist append-only evaluation events for memory capture, memory recall, and user feedback so operators can audit how long-memory behavior performed in real sessions. Recall events MUST include a source field that distinguishes automatic system-transform recall from manual user-initiated search. +The system MUST persist append-only evaluation events for memory capture, memory recall, and user feedback so operators can audit how long-memory behavior performed in real sessions. Recall events MUST include a source field that distinguishes automatic system-transform recall from manual user-initiated search. When the persisted `effectiveness_events` table comes from an older plugin version that lacks the `source` column, the system MUST patch the table schema during initialization before appending new recall events. #### Scenario: Capture lifecycle emits auditable events - **WHEN** the system evaluates assistant output for auto-capture @@ -18,6 +18,10 @@ The system MUST persist append-only evaluation events for memory capture, memory - **WHEN** a user invokes the memory search command - **THEN** the system records a recall event with source as manual-search and injected as false +#### Scenario: Upgraded install patches missing recall source column +- **WHEN** the plugin initializes against an existing `effectiveness_events` table created before the `source` column existed +- **THEN** it patches the table schema before new recall events are appended + #### Scenario: Feedback event remains append-only - **WHEN** a user reports that a memory was missing, incorrect, or useful - **THEN** the system stores a new evaluation event rather than mutating prior evaluation history diff --git a/src/store.ts b/src/store.ts index 3539107..3df3069 100644 --- a/src/store.ts +++ b/src/store.ts @@ -12,7 +12,9 @@ type LanceConnection = { type LanceTable = { add(rows: unknown[]): Promise; + addColumns(transforms: Array<{ name: string; valueSql: string }>): Promise; delete(filter: string): Promise; + schema(): Promise<{ fields: Array<{ name: string }> }>; query(): { where(expr: string): ReturnType; select(columns: string[]): ReturnType; @@ -24,6 +26,7 @@ type LanceTable = { const TABLE_NAME = "memories"; const EVENTS_TABLE_NAME = "effectiveness_events"; +const EVENTS_SOURCE_COLUMN = "source"; interface ScopeCache { records: MemoryRecord[]; @@ -99,6 +102,8 @@ export class MemoryStore { await this.eventTable.delete("id = '__bootstrap__'"); } + await this.ensureEventTableCompatibility(); + await this.ensureIndexes(); } @@ -505,6 +510,24 @@ export class MemoryStore { this.indexState.ftsError = error instanceof Error ? error.message : String(error); } } + + private async ensureEventTableCompatibility(): Promise { + const table = this.requireEventTable(); + const schema = await table.schema(); + const hasSourceColumn = schema.fields.some((field) => field.name === EVENTS_SOURCE_COLUMN); + if (hasSourceColumn) { + return; + } + + try { + await table.addColumns([{ name: EVENTS_SOURCE_COLUMN, valueSql: "CAST(NULL AS STRING)" }]); + } catch (error) { + const reason = error instanceof Error ? error.message : String(error); + throw new Error( + `Failed to patch ${EVENTS_TABLE_NAME} schema for ${EVENTS_SOURCE_COLUMN}: ${reason}`, + ); + } + } } function normalizeRow(row: Record): MemoryRecord | null { diff --git a/test/foundation/foundation.test.ts b/test/foundation/foundation.test.ts index 7bab942..b62c602 100644 --- a/test/foundation/foundation.test.ts +++ b/test/foundation/foundation.test.ts @@ -4,10 +4,12 @@ import { assertRecordsMatch, cleanupDbPath, createScopedRecords, + createTempDbPath, createTestEvent, createTestRecord, createTestStore, createVector, + seedLegacyEffectivenessEventsTable, } from "../setup.js"; test("write-read persistence keeps field integrity across multiple scopes", async () => { @@ -272,6 +274,34 @@ test("auto and manual recall events are stored and scoped correctly and summariz } }); +test("store init patches legacy effectiveness_events schema before writing recall source", async () => { + const dbPath = await createTempDbPath(); + + try { + await seedLegacyEffectivenessEventsTable(dbPath); + const { store } = await createTestStore(dbPath); + + await store.putEvent( + createTestEvent({ + id: "patched-recall", + type: "recall", + scope: "project:legacy", + source: "manual-search", + resultCount: 2, + injected: false, + }), + ); + + const events = await store.listEvents(["project:legacy"], 10); + const patchedEvent = events.find((event) => event.id === "patched-recall"); + assert.ok(patchedEvent); + assert.equal(patchedEvent?.type, "recall"); + assert.equal(patchedEvent?.source, "manual-search"); + } finally { + await cleanupDbPath(dbPath); + } +}); + test("search scoring uses normalized RRF fusion when recency and importance boosts are disabled", async () => { const { store, dbPath } = await createTestStore(); diff --git a/test/regression/plugin.test.ts b/test/regression/plugin.test.ts index 21b8fb3..578582e 100644 --- a/test/regression/plugin.test.ts +++ b/test/regression/plugin.test.ts @@ -2,7 +2,7 @@ import assert from "node:assert/strict"; import test from "node:test"; import { resolveMemoryConfig } from "../../src/config.js"; import plugin from "../../src/index.js"; -import { cleanupDbPath, createScopedRecords, createTempDbPath, createTestStore, createVector } from "../setup.js"; +import { cleanupDbPath, createScopedRecords, createTempDbPath, createTestStore, createVector, seedLegacyEffectivenessEventsTable } from "../setup.js"; const SESSION_ID = "sess-test-001"; const WORKTREE = "/workspace/project-under-test"; @@ -80,9 +80,10 @@ async function createPluginHarness(options?: { userMessages?: SessionMessage[]; sessionDirectory?: string; embeddingProvider?: "ollama" | "openai"; + dbPath?: string; }) { const embeddingProvider = options?.embeddingProvider ?? "ollama"; - const dbPath = await createTempDbPath("lancedb-opencode-pro-regression-"); + const dbPath = options?.dbPath ?? (await createTempDbPath("lancedb-opencode-pro-regression-")); const memoryConfig = { memory: { provider: "lancedb-opencode-pro", @@ -534,6 +535,39 @@ test("memory_search emits manual-search recall event and effectiveness summary s } }); +test("upgraded legacy event data defaults missing source to system-transform while accepting new manual-search events", async () => { + const dbPath = await createTempDbPath("lancedb-opencode-pro-legacy-regression-"); + await seedLegacyEffectivenessEventsTable(dbPath, "global"); + const harness = await createPluginHarness({ dbPath }); + + try { + await harness.capture("Nginx 502 fixed by increasing proxy_buffer_size and confirming upstream health checks. Resolved successfully."); + + await withPatchedFetch(() => + harness.toolHooks.memory_search.execute({ query: "Nginx 502 proxy_buffer_size", limit: 5 }, harness.context), + ); + + const summaryOutput = await withPatchedFetch(() => harness.toolHooks.memory_effectiveness.execute({}, harness.context)); + const summary = parseJson<{ + recall: { + requested: number; + auto: { requested: number; returnedResults: number }; + manual: { requested: number; returnedResults: number }; + manualRescueRatio: number; + }; + }>(summaryOutput); + + assert.equal(summary.recall.requested, 2); + assert.equal(summary.recall.auto.requested, 1); + assert.equal(summary.recall.auto.returnedResults, 0); + assert.equal(summary.recall.manual.requested, 1); + assert.ok(summary.recall.manual.returnedResults >= 0); + assert.ok(Math.abs(summary.recall.manualRescueRatio - 1) < 1e-9); + } finally { + await harness.cleanup(); + } +}); + test("feedback commands persist missing wrong and useful signals", async () => { const harness = await createPluginHarness(); diff --git a/test/setup.ts b/test/setup.ts index 641938a..e9687cc 100644 --- a/test/setup.ts +++ b/test/setup.ts @@ -1,8 +1,8 @@ +import assert from "node:assert/strict"; +import { randomUUID } from "node:crypto"; import { mkdtemp, rm } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { randomUUID } from "node:crypto"; -import assert from "node:assert/strict"; import { MemoryStore } from "../src/store.js"; import type { EffectivenessSummary, MemoryCategory, MemoryEffectivenessEvent, MemoryRecord, RecallSource } from "../src/types.js"; @@ -24,6 +24,31 @@ export async function cleanupDbPath(dbPath: string): Promise { await rm(dbPath, { recursive: true, force: true }); } +export async function seedLegacyEffectivenessEventsTable(dbPath: string, scope = "project:legacy"): Promise { + const lancedb = await import("@lancedb/lancedb"); + const connection = await lancedb.connect(dbPath); + await connection.createTable("effectiveness_events", [ + { + id: "legacy-recall-1", + type: "recall", + scope, + sessionID: "sess-legacy", + timestamp: 1_000, + memoryId: "", + text: "", + outcome: "", + skipReason: "", + resultCount: 0, + injected: false, + feedbackType: "", + helpful: -1, + reason: "", + labelsJson: "[]", + metadataJson: "{}", + }, + ]); +} + export function createVector(dim = DEFAULT_VECTOR_DIM, seed = 0.1): number[] { return Array.from({ length: dim }, (_, index) => Number((seed + index * 0.0001).toFixed(6))); }