Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.8.5] - 2026-05-23

Bugfix release. Removes the last concurrent-writer path that could still reach
`SQLITE_BUSY` after 0.8.4 (RC5 in the SQLite busy incident postmortem).

### Fixed

- **Foreground `index_documents` could write concurrently with the file-watcher** — `server.callIndexDocuments` called `Engine.IndexDocuments` directly, bypassing the `indexWithLock` guard (`re.indexing`) that only the startup and file-watcher paths used. So a manual `index_documents` and a background watcher index could write to `vectors.db` at the same time. 0.8.4 made them queue politely (busy_timeout up to 5s), but a heavy index holding a write transaction longer than that could still hit `SQLITE_BUSY`. `IndexDocuments` now holds a dedicated `Engine.indexMu` for its whole duration, serialising every indexing run regardless of caller. The watcher's `indexing` flag is unchanged (it still coalesces debounced ticks); foreground callers now wait on the mutex and index for real instead of being skipped. Regression test `TestIndexDocumentsSerialisesWriters` asserts a second call blocks while the lock is held and completes once released. See `06-planning/2026-05-05-sqlite-busy-incident.md` §7 RC5.

## [0.8.4] - 2026-05-23

Bugfix release. Closes the SQLITE_BUSY recurrence on `index_documents` that the
Expand Down
13 changes: 13 additions & 0 deletions internal/rag/rag.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ type Engine struct {
vecService *vectorService
mu sync.Mutex
indexing bool
// indexMu serialises the whole IndexDocuments write path so two indexing
// runs never write to vectors.db concurrently. The file-watcher path
// (indexWithLock) coalesces debounced ticks via the `indexing` flag, but
// the foreground index_documents MCP tool calls IndexDocuments directly —
// without this mutex the two could race and hit SQLITE_BUSY (RC5 in
// 06-planning/2026-05-05-sqlite-busy-incident.md). Foreground callers wait
// here and then index for real, rather than being skipped.
indexMu sync.Mutex
lastIndexCheck time.Time
stopWatcher chan struct{}
stopOnce sync.Once
Expand Down Expand Up @@ -405,6 +413,11 @@ func (re *Engine) IndexDocuments(ctx context.Context) error {
return fmt.Errorf("RAG engine not available")
}

// Serialise all indexing runs (foreground tool + background watcher) so
// they never write to vectors.db concurrently. See Engine.indexMu.
re.indexMu.Lock()
defer re.indexMu.Unlock()

startTime := time.Now().UTC()
// chunkerVersion bumps trigger a full rebuild on next IndexDocuments.
// char-v1 — naive char-budget splitter (initial)
Expand Down
78 changes: 78 additions & 0 deletions internal/rag/rag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,84 @@ func TestIndexDocumentsRecoversAfterDirtyCommitFailure(t *testing.T) {
}
}

// TestIndexDocumentsSerialisesWriters guards RC5: IndexDocuments must hold the
// engine's write lock for its whole duration so the foreground index_documents
// tool and the background file-watcher never write to vectors.db concurrently.
// We hold indexMu (simulating an in-progress run) and assert a second call
// blocks until it is released, then completes for real.
func TestIndexDocumentsSerialisesWriters(t *testing.T) {
repoRoot := t.TempDir()
if err := os.MkdirAll(filepath.Join(repoRoot, "docs"), 0o755); err != nil {
t.Fatalf("mkdir docs: %v", err)
}
if err := os.WriteFile(filepath.Join(repoRoot, "docs", "runbook.md"), []byte("# Runbook\nrollback ingress safely"), 0o644); err != nil {
t.Fatalf("write runbook: %v", err)
}

embeddingServer := newTestEmbeddingServer(t)
defer embeddingServer.Close()

emb, err := embedder.New(embedder.Config{
OllamaBaseURL: embeddingServer.URL,
Dimension: 2,
Mode: "local-only",
MaxRetries: 0,
Timeout: time.Second,
}, zap.NewNop())
if err != nil {
t.Fatalf("embedder.New: %v", err)
}

store, err := vectorstore.NewSQLiteStore(filepath.Join(t.TempDir(), "vectors.db"), 2, zap.NewNop())
if err != nil {
t.Fatalf("NewSQLiteStore: %v", err)
}
t.Cleanup(func() { _ = store.Close() })

engine := &Engine{
config: config.Config{RootPath: repoRoot, RAGMaxResults: 10},
repoRoot: repoRoot,
logger: zap.NewNop(),
docService: newDocumentService(docServiceConfig{
RepoRoot: repoRoot,
IndexDirs: []string{"docs"},
ChunkSize: 2000,
ChunkOverlap: 200,
}, zap.NewNop()),
vecService: &vectorService{
config: vecServiceConfig{Embedder: emb, MaxResults: 10},
logger: zap.NewNop(),
store: store,
},
stopWatcher: make(chan struct{}),
}

// Simulate an indexing run already holding the writer lock.
engine.indexMu.Lock()

done := make(chan error, 1)
go func() { done <- engine.IndexDocuments(context.Background()) }()

select {
case <-done:
engine.indexMu.Unlock()
t.Fatal("IndexDocuments ran while indexMu was held — writers are not serialised")
case <-time.After(150 * time.Millisecond):
// Expected: the second run is blocked on indexMu.
}

engine.indexMu.Unlock()

select {
case err := <-done:
if err != nil {
t.Fatalf("IndexDocuments after lock release: %v", err)
}
case <-time.After(3 * time.Second):
t.Fatal("IndexDocuments did not complete after indexMu was released")
}
}

func TestCalculateIndexChanges_NewFiles(t *testing.T) {
e := &Engine{}

Expand Down
Loading