diff --git a/.docs/design-fff-epic-222-closure.md b/.docs/design-fff-epic-222-closure.md new file mode 100644 index 000000000..903fc9399 --- /dev/null +++ b/.docs/design-fff-epic-222-closure.md @@ -0,0 +1,244 @@ +# Implementation Plan: Close FFF Epic #222 + +**Status**: Approved +**Research Doc**: `.docs/research-fff-epic-222-closure.md` +**Author**: opencode (disciplined-design) +**Date**: 2026-04-17 +**Estimated Effort**: 4-6 hours + +## Overview + +### Summary +Complete the 4 remaining items to close FFF Epic #222: add `terraphim_multi_grep` MCP tool, wire SharedFrecency persistence, add cursor pagination, and remove fff-mcp sidecar. + +### Approach +Follow the exact pattern established by `terraphim_find_files` and `terraphim_grep`. Three workstreams can execute in parallel since they touch different parts of the MCP server. + +### Scope +**In Scope:** +1. Add `terraphim_multi_grep` MCP tool (OR-pattern grep) +2. Wire `SharedFrecency` with configurable LMDB path +3. Add cursor-based pagination to all three tools +4. Remove fff-mcp sidecar from bigbox +5. Close stale sub-issues #225, #226 + +**Out of Scope:** +- Upstream ExternalScorer trait PR to fff.nvim +- KG content scoring (path-only scoring stays) +- Query completion tracking + +**Avoid At All Cost:** +- Refactoring existing find_files/grep implementations (they work) +- Adding configuration file format changes (use env vars/CLI args) +- Abstracting MCP tool registration (YAGNI -- 3 tools is fine) + +## Architecture + +### Data Flow +``` +MCP Client + -> terraphim_find_files (fuzzy match + KG boost) + -> terraphim_grep (content search + KG file ordering + cursor) + -> terraphim_multi_grep (multi-pattern OR search + KG file ordering + cursor) + -> SharedFrecency (LMDB) (persistent access frequency across sessions) +``` + +### Key Design Decisions + +| Decision | Rationale | Alternatives Rejected | +|----------|-----------|----------------------| +| Use `multi_grep_search` directly from fff-core | Already public, no fork needed | Wrapping in our own trait | +| CursorStore as HashMap | Simple, matches fff-mcp pattern | Redis/external store | +| SharedFrecency via configurable env var | `FFF_FRECENCY_PATH` -- zero config change | New config section in TOML | +| Pagination as offset-based | Matches fff-core's `file_offset` in GrepSearchOptions | Keyset pagination | + +## File Changes + +### Modified Files +| File | Changes | +|------|---------| +| `crates/terraphim_mcp_server/src/lib.rs` | Add multi_grep tool, frecency wiring, cursor store, pagination | +| `crates/terraphim_mcp_server/Cargo.toml` | No changes needed (fff-search already a dep) | + +### No new files. No deleted files (sidecar removal is ops, not code). + +## Implementation Steps + +### Workstream A: terraphim_multi_grep (Parallel -- 1-2h) + +**Step A1: Add multi_grep method on McpService** + +File: `crates/terraphim_mcp_server/src/lib.rs` +Location: After `grep_files` method (~line 1336) + +```rust +pub async fn multi_grep_files( + &self, + patterns: Vec, + path: Option, + constraints: Option, + limit: Option, + cursor: Option, + output_mode: Option, +) -> Result { + let base_path = path.unwrap_or_else(|| ".".to_string()); + let max_results = limit.unwrap_or(50); + let files_only = output_mode.as_deref() == Some("files"); + + // Same FilePicker init as grep_files + let mut picker = FilePicker::new(FilePickerOptions { ... })?; + picker.collect_files()?; + let mut files = picker.get_files().to_vec(); + + // KG sort (same as grep_files) + if let Some(scorer) = &self.kg_scorer { + files.sort_by(|a, b| scorer.score(b).cmp(&scorer.score(a))); + } + + // Parse constraints + let patterns_refs: Vec<&str> = patterns.iter().map(|s| s.as_str()).collect(); + let options = GrepSearchOptions { file_offset: cursor_offset, page_limit: max_results, ... }; + + let result = multi_grep_search(&files, &patterns_refs, &constraints_parsed, &options, &budget, None); + + // Format output (same pattern as grep_files) + // Return with next_cursor +} +``` + +**Step A2: Register tool in get_info()** + +Location: After `terraphim_grep` tool entry (~line 1749) + +```rust +Tool { + name: "terraphim_multi_grep".into(), + description: "Search file contents for lines matching ANY of multiple patterns (OR logic). ...".into(), + input_schema: serde_json::json!({ + "type": "object", + "properties": { + "patterns": { "type": "array", "items": { "type": "string" }, "description": "Patterns to match (OR logic)" }, + "path": { "type": "string", "description": "Base directory" }, + "constraints": { "type": "string", "description": "File constraints (e.g. '*.rs !test/')" }, + "limit": { "type": "integer", "description": "Max results (default 50)" }, + "cursor": { "type": "string", "description": "Pagination cursor from previous result" }, + "output_mode": { "type": "string", "enum": ["content", "files"], "description": "Output format" } + }, + "required": ["patterns"] + }), +} +``` + +**Step A3: Add match arm in call_tool()** + +Location: After `"terraphim_grep"` arm (~line 2159) + +```rust +"terraphim_multi_grep" => { ... } +``` + +### Workstream B: SharedFrecency Wiring (Parallel -- 2-3h) + +**Step B1: Add frecency field and initialization** + +File: `crates/terraphim_mcp_server/src/lib.rs` + +McpService struct already has `frecency` (check). If not, add: +```rust +pub struct McpService { + // ... existing fields ... + frecency: Option, +} +``` + +Constructor: initialise from env var `FFF_FRECENCY_PATH`: +```rust +let frecency = std::env::var("FFF_FRECENCY_PATH") + .ok() + .map(|path| { + // Init LMDB-backed frecency at path + SharedFrecency::new(&path) // or equivalent from fff-search API + }) + .transpose()?; +``` + +**Step B2: Pass frecency to FilePicker** + +In `find_files` and `grep_files` and `multi_grep_files`: +```rust +if let Some(frecency) = &self.frecency { + picker.update_frecency_scores(frecency); +} +``` + +### Workstream C: Cursor Pagination (Parallel -- 1-2h) + +**Step C1: Add CursorStore to McpService** + +```rust +pub struct McpService { + // ... existing ... + cursor_store: Arc>>, +} +``` + +**Step C2: Add cursor handling to grep_files and multi_grep_files** + +Parse `cursor` param -> lookup offset from store. +After results, if more available, generate new cursor token: +```rust +let next_cursor = if result.matches.len() > max_results { + let token = format!("cur_{}", uuid::Uuid::new_v4()); + self.cursor_store.lock().unwrap().insert(token.clone(), offset + max_results); + Some(token) +} else { + None +}; +``` + +Include `next_cursor` in the response Content. + +### Workstream D: Cleanup (Sequential -- after A,B,C -- 30min) + +**Step D1: Close stale Gitea issues** +- Close #225 (research) with comment "Work completed out of order during Phase 3 implementation" +- Close #226 (design) with same comment + +**Step D2: Update #224** +- Comment with completion status +- Check off remaining items + +**Step D3: Remove fff-mcp sidecar from bigbox** +- `ssh bigbox` -- check if fff-mcp is still running +- Check if any other tool references it +- Stop service, remove from MCP configs + +**Step D4: Close epic #222** + +## Test Strategy + +### Unit Tests (in terraphim_mcp_server) +| Test | Purpose | +|------|---------| +| `test_multi_grep_multiple_patterns` | Verify OR logic returns files matching any pattern | +| `test_multi_grep_no_matches` | Empty result for non-existent patterns | +| `test_cursor_store_round_trip` | Store and retrieve offset | +| `test_cursor_pagination_limit` | Verify next_cursor only when more results exist | + +### Integration Tests +- `cargo test -p terraphim_mcp_server` -- existing tests must still pass +- Manual: invoke `terraphim_multi_grep` with patterns `["sort_by", "sort_by_key"]` and verify results + +## Execution Order (Max Parallelism) + +``` +Time Workstream A Workstream B Workstream C Workstream D +---- ----------- ----------- ----------- ----------- +T+0 A1: multi_grep B1: frecency init C1: CursorStore +T+1 A2: register tool B2: wire to picker C2: pagination +T+2 A3: call_tool arm (verify) +T+3 (verify + test) (verify + test) (verify + test) +T+4 D1-D4: cleanup +``` + +All three workstreams are independent -- they can be three separate commits on one branch, or three parallel branches merged sequentially. diff --git a/.docs/research-fff-epic-222-closure.md b/.docs/research-fff-epic-222-closure.md new file mode 100644 index 000000000..893d35d18 --- /dev/null +++ b/.docs/research-fff-epic-222-closure.md @@ -0,0 +1,109 @@ +# Research Document: Close FFF Epic #222 -- Remaining Work + +**Status**: Approved +**Author**: opencode (disciplined-research) +**Date**: 2026-04-17 +**Related**: Gitea #222 (epic), #223 (closed), #224 (Phase 2 remaining), #225/#226 (stale), #227 (Phase 3 done) + +## Executive Summary + +FFF integration is ~80% complete. Phase 1 (sidecar) and Phase 3 (KG-boosted scoring crate) are done. Four remaining items block epic closure: `terraphim_multi_grep` MCP tool, SharedFrecency persistence, cursor pagination, and sidecar removal. All are well-understood with clear reference implementations in fff-mcp. + +## Essential Questions Check + +| Question | Answer | Evidence | +|----------|--------|----------| +| Energizing? | Yes | Closes a major epic, cleans up technical debt | +| Leverages strengths? | Yes | Pattern already established in terraphim_grep/find_files | +| Meets real need? | Yes | Agents need multi-pattern search and persistent frecency | + +**Proceed**: Yes (3/3) + +## Current State Analysis + +### Code Locations +| Component | Location | Status | +|-----------|----------|--------| +| terraphim_file_search crate | `crates/terraphim_file_search/` | Done: lib.rs, kg_scorer.rs, config.rs, watcher.rs | +| MCP terraphim_find_files | `crates/terraphim_mcp_server/src/lib.rs:1170-1244` | Done | +| MCP terraphim_grep | `crates/terraphim_mcp_server/src/lib.rs:1250-1336` | Done | +| MCP terraphim_multi_grep | -- | **Missing** | +| SharedFrecency wiring | `crates/terraphim_mcp_server/src/lib.rs:59` | **Not wired** | +| Cursor pagination | `crates/terraphim_mcp_server/src/lib.rs` (next_cursor: None) | **Not implemented** | +| fff-mcp sidecar | bigbox: PID running | **Not removed** | + +### Reference: fff-mcp multi_grep Implementation +Location: `~/.cargo/git/checkouts/fff.nvim-14ad43e6a8691b70/efd1552/crates/fff-mcp/src/server.rs:545-594` + +Key API: `grep::multi_grep_search(files, &patterns_refs, constraints, &options, budget, None)` +- Takes `Vec<&str>` patterns (OR logic) +- Uses same `GrepSearchOptions` as single grep +- Returns same `GrepResult` type +- Has `CursorStore` for pagination + +### Reference: fff-core SharedFrecency +Location: `~/.cargo/git/checkouts/fff.nvim-14ad43e6a8691b70/efd1552/crates/fff-core/src/shared.rs:120` + +```rust +pub struct SharedFrecency(pub(crate) Arc>>); +``` +- Already exported from `fff-search` crate +- Current terraphim MCP: `SharedFrecency` is imported but field `frecency` not used (0 references to it) + +### Existing MCP Tool Pattern (for adding new tools) +1. Add async method on `McpService` (e.g., `find_files`, `grep_files`) +2. Add `Tool` entry in `ServerHandler::get_info()` (~line 1722, 1749) +3. Add match arm in `ServerHandler::call_tool()` (~line 2139, 2159) + +## Remaining Work Items (from #224) + +| Item | Effort | Dependencies | Parallelizable? | +|------|--------|-------------|-----------------| +| 1. Add terraphim_multi_grep MCP tool | 1-2h | None | Yes | +| 2. Wire SharedFrecency with LMDB persistence | 2-3h | fff-search exposes FrecencyTracker | Yes | +| 3. Add cursor-based pagination | 2-3h | None | Yes | +| 4. Remove standalone fff-mcp sidecar | 30min | Items 1-3 validated | No (last) | +| 5. Close stale sub-issues #225, #226 | 5min | None | Yes | +| 6. Update epic #222 and close #224 | 5min | All above | No (last) | + +## Constraints + +### Technical +- `fff-search` is a git dependency (branch `feat/external-scorer`) -- cannot modify its API +- `multi_grep_search` is already public in `fff-search::grep` -- no fork needed +- `SharedFrecency` requires an LMDB path -- must be configurable +- MCP server uses `rmcp` 0.9 for protocol -- tool registration pattern is fixed +- `CursorStore` in fff-mcp uses opaque string IDs -- we can replicate or simplify + +### Vital Few + +| Constraint | Why It's Vital | Evidence | +|------------|----------------|----------| +| multi_grep_search API is public | Enables multi-pattern tool without forking | fff-core/src/grep.rs:870 | +| Existing tools are the pattern | New tools follow find_files/grep pattern | lib.rs:1170-1336 | +| SharedFrecency already imported | Just needs wiring, not new code | lib.rs:7 imports it | + +### Eliminated from Scope + +| Eliminated Item | Why Eliminated | +|-----------------|----------------| +| Upstream ExternalScorer trait contribution | Not blocking, separate effort | +| KG content scoring (score file contents, not just paths) | Future enhancement, not in epic scope | +| Query completion tracking | Nice-to-have, frecency is sufficient | + +## Risks + +| Risk | Likelihood | Impact | Mitigation | +|------|------------|--------|------------| +| LMDB path not writable on bigbox | Low | Medium | Configurable path, fallback to temp | +| multi_grep_search API differs from grep_search | Low | Low | Read fff-mcp server.rs reference | +| Cursor pagination state lost on MCP restart | Medium | Low | Expected; cursors are ephemeral | +| fff-mcp sidecar still used by other tools | Medium | Medium | Check before removing | + +## Assumptions + +| Assumption | Basis | Risk if Wrong | +|------------|-------|---------------| +| `multi_grep_search` has same signature style as `grep_search` | Both in fff-core/src/grep.rs, same author | Low -- signature differs only in `Vec<&str>` vs single pattern | +| SharedFrecency can be initialised with a path | fff-core has `init_db(path)` function | Medium -- may need to check if LMDB is available on target | +| fff-mcp sidecar is not used by other projects | Only terraphim-ai configured it | Medium -- verify before removing | diff --git a/crates/terraphim-markdown-parser/src/lib.rs b/crates/terraphim-markdown-parser/src/lib.rs index d86038e2f..a551c1228 100644 --- a/crates/terraphim-markdown-parser/src/lib.rs +++ b/crates/terraphim-markdown-parser/src/lib.rs @@ -141,7 +141,7 @@ pub fn ensure_terraphim_block_ids(markdown: &str) -> Result = agent_counts.into_iter().collect(); - sorted.sort_by(|a, b| b.1.cmp(&a.1)); + sorted.sort_by_key(|(_, count)| std::cmp::Reverse(*count)); sorted.into_iter().take(10).collect() } @@ -634,7 +634,7 @@ impl Analyzer { .collect(); // Sort by usage count descending - correlations.sort_by(|a, b| b.usage_count.cmp(&a.usage_count)); + correlations.sort_by_key(|c| std::cmp::Reverse(c.usage_count)); correlations } @@ -715,6 +715,7 @@ impl Analyzer { .collect(); // Sort by total invocations descending + #[allow(clippy::unnecessary_sort_by)] stats.sort_by(|_, v1, _, v2| v2.total_invocations.cmp(&v1.total_invocations)); stats @@ -736,6 +737,7 @@ impl Analyzer { // Convert to IndexMap and sort by count descending let mut breakdown: IndexMap = category_counts.into_iter().collect(); + #[allow(clippy::unnecessary_sort_by)] breakdown.sort_by(|_, v1, _, v2| v2.cmp(v1)); breakdown @@ -877,7 +879,7 @@ impl Analyzer { .collect(); // Sort by frequency descending - chains.sort_by(|a, b| b.frequency.cmp(&a.frequency)); + chains.sort_by_key(|c| std::cmp::Reverse(c.frequency)); chains } diff --git a/crates/terraphim-session-analyzer/src/main.rs b/crates/terraphim-session-analyzer/src/main.rs index c0a2d9438..ed3267939 100644 --- a/crates/terraphim-session-analyzer/src/main.rs +++ b/crates/terraphim-session-analyzer/src/main.rs @@ -733,7 +733,7 @@ fn calculate_tool_chains(invocations: &[models::ToolInvocation]) -> Vec = { let mut stats = filtered_stats; match sort_by { - SortBy::Frequency => { - stats.sort_by(|a, b| b.1.total_invocations.cmp(&a.1.total_invocations)) - } - SortBy::Alphabetical => stats.sort_by(|a, b| a.0.cmp(&b.0)), - SortBy::Recent => stats.sort_by(|a, b| b.1.last_seen.cmp(&a.1.last_seen)), + SortBy::Frequency => stats.sort_by_key(|(_, s)| std::cmp::Reverse(s.total_invocations)), + SortBy::Alphabetical => stats.sort_by_key(|(name, _)| name.clone()), + SortBy::Recent => stats.sort_by_key(|(_, s)| std::cmp::Reverse(s.last_seen)), } stats }; diff --git a/crates/terraphim-session-analyzer/src/parser.rs b/crates/terraphim-session-analyzer/src/parser.rs index 57d9d70ef..97d0153f3 100644 --- a/crates/terraphim-session-analyzer/src/parser.rs +++ b/crates/terraphim-session-analyzer/src/parser.rs @@ -419,7 +419,7 @@ impl SessionParser { } // Sort by timestamp - events.sort_by(|a, b| a.timestamp.cmp(&b.timestamp)); + events.sort_by_key(|e| e.timestamp); events } } diff --git a/crates/terraphim-session-analyzer/src/patterns/loader.rs b/crates/terraphim-session-analyzer/src/patterns/loader.rs index 941522e5b..228923ca4 100644 --- a/crates/terraphim-session-analyzer/src/patterns/loader.rs +++ b/crates/terraphim-session-analyzer/src/patterns/loader.rs @@ -153,6 +153,7 @@ fn merge_patterns(builtin: Vec, user: Vec) -> Result = pattern_map.into_values().collect(); // Sort by name for consistent ordering + #[allow(clippy::unnecessary_sort_by)] merged.sort_by(|a, b| a.name.cmp(&b.name)); // Validate the merged patterns diff --git a/crates/terraphim-session-analyzer/src/reporter.rs b/crates/terraphim-session-analyzer/src/reporter.rs index e51bb05fe..8836e6cda 100644 --- a/crates/terraphim-session-analyzer/src/reporter.rs +++ b/crates/terraphim-session-analyzer/src/reporter.rs @@ -165,7 +165,7 @@ impl Reporter { .map(|a| (a.timestamp, &a.agent_type, &a.task_description)) .collect(); - events.sort_by(|a, b| a.0.cmp(&b.0)); + events.sort_by_key(|(ts, _, _)| *ts); for (timestamp, agent_type, description) in events.iter().take(10) { let time_str = self.format_timestamp(*timestamp); @@ -224,7 +224,7 @@ impl Reporter { } let mut sorted_agents: Vec<_> = agent_counts.into_iter().collect(); - sorted_agents.sort_by(|a, b| b.1.cmp(&a.1)); + sorted_agents.sort_by_key(|(_, count)| std::cmp::Reverse(*count)); println!(" {} {}", "Total agent invocations:".bold(), total_agents); println!(" {} {}", "Total files modified:".bold(), total_files); @@ -444,7 +444,7 @@ impl Reporter { // Convert to sorted vector let mut tool_stats: Vec<_> = stats.iter().collect(); - tool_stats.sort_by(|a, b| b.1.total_invocations.cmp(&a.1.total_invocations)); + tool_stats.sort_by_key(|(_, stat)| std::cmp::Reverse(stat.total_invocations)); // Create table rows let mut rows = Vec::new(); @@ -545,6 +545,7 @@ impl Reporter { } // Sort by invocation count + #[allow(clippy::unnecessary_sort_by)] tool_rows.sort_by(|a, b| { b.count .parse::() @@ -567,7 +568,7 @@ impl Reporter { .iter() .map(|(cat, count)| (format!("{:?}", cat), *count)) .collect(); - category_rows.sort_by(|a, b| b.1.cmp(&a.1)); + category_rows.sort_by_key(|(_, count)| std::cmp::Reverse(*count)); for (category, count) in category_rows { #[allow(clippy::cast_precision_loss)] @@ -760,7 +761,7 @@ impl Reporter { .iter() .map(|(cat, count)| (format!("{:?}", cat), *count)) .collect(); - category_rows.sort_by(|a, b| b.1.cmp(&a.1)); + category_rows.sort_by_key(|(_, count)| std::cmp::Reverse(*count)); for (category, count) in category_rows { #[allow(clippy::cast_precision_loss)] @@ -781,7 +782,7 @@ impl Reporter { )?; let mut tool_list: Vec<_> = analysis.tool_statistics.iter().collect(); - tool_list.sort_by(|a, b| b.1.total_invocations.cmp(&a.1.total_invocations)); + tool_list.sort_by_key(|(_, stat)| std::cmp::Reverse(stat.total_invocations)); for (tool_name, stat) in tool_list { let agents_str = if stat.agents_using.is_empty() { diff --git a/crates/terraphim_agent/src/commands/markdown_parser.rs b/crates/terraphim_agent/src/commands/markdown_parser.rs index 04c4bc025..bf2fb47ae 100644 --- a/crates/terraphim_agent/src/commands/markdown_parser.rs +++ b/crates/terraphim_agent/src/commands/markdown_parser.rs @@ -528,9 +528,9 @@ impl MarkdownCommandParser { } let mut thesaurus = Thesaurus::new("learned_technical_terms".to_string()); - let mut term_id = 1u64; - for (term, normalized_term) in &self.learned_terms { + for (idx, (term, normalized_term)) in self.learned_terms.iter().enumerate() { + let term_id = (idx as u64) + 1; thesaurus.insert( normalized_term.clone(), NormalizedTerm { @@ -540,7 +540,6 @@ impl MarkdownCommandParser { url: Some(format!("learned-term:{}", term)), }, ); - term_id += 1; } Some(thesaurus) diff --git a/crates/terraphim_agent/src/commands/registry.rs b/crates/terraphim_agent/src/commands/registry.rs index 6103dae2f..d35a34ce6 100644 --- a/crates/terraphim_agent/src/commands/registry.rs +++ b/crates/terraphim_agent/src/commands/registry.rs @@ -443,7 +443,7 @@ impl CommandRegistry { .collect(); // Sort by score (descending) and take top matches - matches.sort_by(|a, b| b.1.cmp(&a.1)); + matches.sort_by_key(|x| std::cmp::Reverse(x.1)); let limit = limit.unwrap_or(10); matches diff --git a/crates/terraphim_agent/src/learnings/capture.rs b/crates/terraphim_agent/src/learnings/capture.rs index edc2c5ed3..034a30097 100644 --- a/crates/terraphim_agent/src/learnings/capture.rs +++ b/crates/terraphim_agent/src/learnings/capture.rs @@ -1126,6 +1126,7 @@ pub fn list_learnings( } // Sort by captured_at descending (most recent first) + #[allow(clippy::unnecessary_sort_by)] learnings.sort_by(|a, b| b.context.captured_at.cmp(&a.context.captured_at)); // Limit results @@ -1795,6 +1796,7 @@ pub fn suggest_learnings( .collect(); // Sort by score descending + #[allow(clippy::unnecessary_sort_by)] scored.sort_by(|a, b| b.score.cmp(&a.score)); // Limit results diff --git a/crates/terraphim_agent/src/main.rs b/crates/terraphim_agent/src/main.rs index 0fe31db27..6f15b8980 100644 --- a/crates/terraphim_agent/src/main.rs +++ b/crates/terraphim_agent/src/main.rs @@ -1640,7 +1640,8 @@ async fn run_offline_command( // Apply filtered matches in reverse order to preserve positions let mut result = input_text.clone(); let mut sorted_matches = filtered_matches; - sorted_matches.sort_by(|a, b| b.pos.cmp(&a.pos)); // Reverse sort by position + #[allow(clippy::unnecessary_sort_by)] + sorted_matches.sort_by(|a, b| b.pos.cmp(&a.pos)); for m in sorted_matches { if let Some((start, end)) = m.pos { @@ -3049,6 +3050,7 @@ async fn run_server_command( let graph_res = api.rolegraph(Some(&role_name)).await?; let mut nodes_sorted = graph_res.nodes.clone(); + #[allow(clippy::unnecessary_sort_by)] nodes_sorted.sort_by(|a, b| b.rank.cmp(&a.rank)); for node in nodes_sorted.into_iter().take(top_k) { println!("{}", node.label); diff --git a/crates/terraphim_agent/src/onboarding/validation.rs b/crates/terraphim_agent/src/onboarding/validation.rs index d96b8f61f..0ae9f8d33 100644 --- a/crates/terraphim_agent/src/onboarding/validation.rs +++ b/crates/terraphim_agent/src/onboarding/validation.rs @@ -82,47 +82,28 @@ pub fn validate_haystack(haystack: &Haystack) -> Result<(), ValidationError> { } // Service-specific validation + let is_url = + haystack.location.starts_with("http://") || haystack.location.starts_with("https://"); + match haystack.service { - ServiceType::Ripgrep => { - // For Ripgrep, location should be a path (we don't validate existence here, - // that's done separately with path_exists check if needed) - // Just ensure it's not a URL - if haystack.location.starts_with("http://") || haystack.location.starts_with("https://") - { - return Err(ValidationError::InvalidLocation( - "Ripgrep requires a local path, not a URL".into(), - )); - } - } - ServiceType::QueryRs => { - // QueryRs can be URL or default - // No specific validation needed - } - ServiceType::Quickwit => { - // Quickwit requires a URL - if !haystack.location.starts_with("http://") - && !haystack.location.starts_with("https://") - { - return Err(ValidationError::ServiceRequirement( - "Quickwit".into(), - "URL (http:// or https://)".into(), - )); - } + ServiceType::Ripgrep if is_url => { + return Err(ValidationError::InvalidLocation( + "Ripgrep requires a local path, not a URL".into(), + )); } - ServiceType::Atomic => { - // Atomic requires a URL - if !haystack.location.starts_with("http://") - && !haystack.location.starts_with("https://") - { - return Err(ValidationError::ServiceRequirement( - "Atomic".into(), - "URL (http:// or https://)".into(), - )); - } + ServiceType::Quickwit if !is_url => { + return Err(ValidationError::ServiceRequirement( + "Quickwit".into(), + "URL (http:// or https://)".into(), + )); } - _ => { - // Other services - basic validation only + ServiceType::Atomic if !is_url => { + return Err(ValidationError::ServiceRequirement( + "Atomic".into(), + "URL (http:// or https://)".into(), + )); } + _ => {} } Ok(()) diff --git a/crates/terraphim_agent/src/repl/file_operations.rs b/crates/terraphim_agent/src/repl/file_operations.rs index 02dab8275..eb1d8e0a2 100644 --- a/crates/terraphim_agent/src/repl/file_operations.rs +++ b/crates/terraphim_agent/src/repl/file_operations.rs @@ -720,6 +720,7 @@ pub mod utils { // Create a hash from the top words let mut hasher = DefaultHasher::new(); let mut top_words: Vec<_> = word_counts.into_iter().collect::>(); + #[allow(clippy::unnecessary_sort_by)] top_words.sort_by(|a, b| b.1.cmp(&a.1)); for (word, count) in top_words.into_iter().take(50) { diff --git a/crates/terraphim_agent/src/repl/handler.rs b/crates/terraphim_agent/src/repl/handler.rs index 1b212a644..b4a29546c 100644 --- a/crates/terraphim_agent/src/repl/handler.rs +++ b/crates/terraphim_agent/src/repl/handler.rs @@ -644,6 +644,7 @@ impl ReplHandler { match api_client.rolegraph(Some(&self.current_role)).await { Ok(response) => { let mut nodes = response.nodes; + #[allow(clippy::unnecessary_sort_by)] nodes.sort_by(|a, b| b.rank.cmp(&a.rank)); println!("{} Top {} concepts:", "📊".bold(), k.to_string().cyan()); @@ -2217,6 +2218,7 @@ impl ReplHandler { // Sort by date key let mut sorted: Vec<_> = grouped.into_iter().collect(); + #[allow(clippy::unnecessary_sort_by)] sorted.sort_by(|a, b| b.0.cmp(&a.0)); // Newest first let mut table = Table::new(); diff --git a/crates/terraphim_agent/src/service.rs b/crates/terraphim_agent/src/service.rs index 7e18fad13..006fbcaa5 100644 --- a/crates/terraphim_agent/src/service.rs +++ b/crates/terraphim_agent/src/service.rs @@ -304,6 +304,7 @@ impl TuiService { // Get nodes and sort by rank (descending) let mut nodes: Vec<_> = rolegraph.nodes_map().iter().collect(); + #[allow(clippy::unnecessary_sort_by)] nodes.sort_by(|a, b| b.1.rank.cmp(&a.1.rank)); // Map node IDs to term names and collect top-k diff --git a/crates/terraphim_agent_evolution/src/viewer.rs b/crates/terraphim_agent_evolution/src/viewer.rs index 708810bcb..e154e6d58 100644 --- a/crates/terraphim_agent_evolution/src/viewer.rs +++ b/crates/terraphim_agent_evolution/src/viewer.rs @@ -96,7 +96,7 @@ impl MemoryEvolutionViewer { } // Sort events by timestamp - events.sort_by(|a, b| a.timestamp.cmp(&b.timestamp)); + events.sort_by_key(|e| e.timestamp); Ok(EvolutionTimeline { agent_id: self.agent_id.clone(), diff --git a/crates/terraphim_agent_evolution/src/workflows/evaluator_optimizer.rs b/crates/terraphim_agent_evolution/src/workflows/evaluator_optimizer.rs index 4492b4e1e..0b894c260 100644 --- a/crates/terraphim_agent_evolution/src/workflows/evaluator_optimizer.rs +++ b/crates/terraphim_agent_evolution/src/workflows/evaluator_optimizer.rs @@ -432,6 +432,7 @@ Format your response as a structured evaluation."#, } // Sort by priority (Critical first) + #[allow(clippy::unnecessary_sort_by)] actions.sort_by(|a, b| b.priority.cmp(&a.priority)); Ok(actions) diff --git a/crates/terraphim_agent_evolution/src/workflows/orchestrator_workers.rs b/crates/terraphim_agent_evolution/src/workflows/orchestrator_workers.rs index 82c5a8f32..b6fc06336 100644 --- a/crates/terraphim_agent_evolution/src/workflows/orchestrator_workers.rs +++ b/crates/terraphim_agent_evolution/src/workflows/orchestrator_workers.rs @@ -718,6 +718,7 @@ Format your response as a detailed execution plan."#, } // Role-specific quality checks + #[allow(clippy::collapsible_match)] match task.worker_role { WorkerRole::Analyst => { if deliverable.contains("analysis") || deliverable.contains("insight") { diff --git a/crates/terraphim_agent_evolution/src/workflows/parallelization.rs b/crates/terraphim_agent_evolution/src/workflows/parallelization.rs index 7a4775a29..5313db72c 100644 --- a/crates/terraphim_agent_evolution/src/workflows/parallelization.rs +++ b/crates/terraphim_agent_evolution/src/workflows/parallelization.rs @@ -351,6 +351,7 @@ impl Parallelization { mut tasks: Vec, ) -> EvolutionResult> { // Sort tasks by priority (Critical first) + #[allow(clippy::unnecessary_sort_by)] tasks.sort_by(|a, b| b.priority.cmp(&a.priority)); let mut all_results = Vec::new(); @@ -589,6 +590,7 @@ impl Parallelization { } // Content type matching + #[allow(clippy::collapsible_match)] match expected_type { "analysis" => { if output.contains("analyze") diff --git a/crates/terraphim_automata/src/autocomplete.rs b/crates/terraphim_automata/src/autocomplete.rs index 5cf2fd0c7..4ee71fb49 100644 --- a/crates/terraphim_automata/src/autocomplete.rs +++ b/crates/terraphim_automata/src/autocomplete.rs @@ -126,6 +126,7 @@ pub fn build_autocomplete_index( } // Sort terms lexicographically for FST building + #[allow(clippy::unnecessary_sort_by)] terms_with_scores.sort_by(|a, b| a.0.cmp(&b.0)); log::debug!("Building FST with {} sorted terms", terms_with_scores.len()); diff --git a/crates/terraphim_automata/src/evaluation.rs b/crates/terraphim_automata/src/evaluation.rs index 193fe1566..c1117951f 100644 --- a/crates/terraphim_automata/src/evaluation.rs +++ b/crates/terraphim_automata/src/evaluation.rs @@ -178,6 +178,7 @@ pub fn evaluate(ground_truth: &[GroundTruthDocument], thesaurus: Thesaurus) -> E .collect(); // Sort per-term reports by term name for deterministic output + #[allow(clippy::unnecessary_sort_by)] per_term.sort_by(|a, b| a.term.cmp(&b.term)); // Detect systematic errors: terms with false-positive count >= threshold @@ -192,6 +193,7 @@ pub fn evaluate(ground_truth: &[GroundTruthDocument], thesaurus: Thesaurus) -> E .collect(); // Sort systematic errors by term name for deterministic output + #[allow(clippy::unnecessary_sort_by)] systematic_errors.sort_by(|a, b| a.term.cmp(&b.term)); EvaluationResult { diff --git a/crates/terraphim_automata/src/sharded_extractor.rs b/crates/terraphim_automata/src/sharded_extractor.rs index 9ce776ce9..1ab8dc4e0 100644 --- a/crates/terraphim_automata/src/sharded_extractor.rs +++ b/crates/terraphim_automata/src/sharded_extractor.rs @@ -69,6 +69,7 @@ impl ShardedUmlsExtractor { } // Sort patterns by term -- daachorse requires sorted, unique input + #[allow(clippy::unnecessary_sort_by)] all_patterns.sort_by(|a, b| a.0.cmp(&b.0)); // Merge duplicates: group all CUIs that share the same term. diff --git a/crates/terraphim_cli/src/service.rs b/crates/terraphim_cli/src/service.rs index 3da3e1a70..3d589336c 100644 --- a/crates/terraphim_cli/src/service.rs +++ b/crates/terraphim_cli/src/service.rs @@ -329,6 +329,7 @@ impl CliService { // Get nodes and sort by rank (descending) let mut nodes: Vec<_> = rolegraph.nodes_map().iter().collect(); + #[allow(clippy::unnecessary_sort_by)] nodes.sort_by(|a, b| b.1.rank.cmp(&a.1.rank)); // Map node IDs to term names and collect top-k diff --git a/crates/terraphim_goal_alignment/src/goals.rs b/crates/terraphim_goal_alignment/src/goals.rs index c0dbf5127..ebcc97c5c 100644 --- a/crates/terraphim_goal_alignment/src/goals.rs +++ b/crates/terraphim_goal_alignment/src/goals.rs @@ -452,6 +452,7 @@ impl Goal { } // Add constraint-specific validation based on type + #[allow(clippy::collapsible_match)] match &constraint.constraint_type { ConstraintType::Temporal => { // Validate temporal constraint parameters diff --git a/crates/terraphim_goal_alignment/src/knowledge_graph.rs b/crates/terraphim_goal_alignment/src/knowledge_graph.rs index f8ec86350..c629c549a 100644 --- a/crates/terraphim_goal_alignment/src/knowledge_graph.rs +++ b/crates/terraphim_goal_alignment/src/knowledge_graph.rs @@ -884,7 +884,7 @@ impl KnowledgeGraphGoalAnalyzer { } // Sort recommendations by priority - recommendations.sort_by(|a, b| b.priority.cmp(&a.priority)); + recommendations.sort_by_key(|r| std::cmp::Reverse(r.priority)); Ok(recommendations) } diff --git a/crates/terraphim_mcp_server/src/lib.rs b/crates/terraphim_mcp_server/src/lib.rs index 9cffc2185..8dd9a2794 100644 --- a/crates/terraphim_mcp_server/src/lib.rs +++ b/crates/terraphim_mcp_server/src/lib.rs @@ -1,11 +1,10 @@ -use std::sync::Arc; - use anyhow::Result; use base64::Engine; use fff_search::external_scorer::ExternalScorer; use fff_search::{ ContentCacheBudget, FFFMode, FilePicker, FilePickerOptions, FuzzySearchOptions, GrepMode, - GrepSearchOptions, PaginationArgs, QueryParser, grep_search, parse_grep_query, + GrepSearchOptions, PaginationArgs, QueryParser, SharedFrecency, grep_search, multi_grep_search, + parse_grep_query, }; use rmcp::{ RoleServer, ServerHandler, @@ -15,6 +14,7 @@ use rmcp::{ }, service::RequestContext, }; +use std::sync::Arc; use terraphim_automata::builder::json_decode; use terraphim_automata::matcher::{extract_paragraphs_from_automata, find_matches}; use terraphim_automata::{AutocompleteConfig, AutocompleteIndex, AutocompleteResult}; @@ -57,16 +57,30 @@ pub struct McpService { autocomplete_index: Arc>>, /// Optional KG scorer for boosting file search results by path concept matches. kg_scorer: Option>, + /// Optional persistent frecency tracker (LMDB-backed) for access-frequency scoring. + #[allow(dead_code)] + frecency: Option, } impl McpService { /// Create a new service instance pub fn new(config_state: Arc) -> Self { + let frecency = std::env::var("FFF_FRECENCY_PATH").ok().and_then(|path| { + fff_search::FrecencyTracker::new(&path, false) + .map(|tracker| { + let shared = SharedFrecency::default(); + shared.init(tracker).ok(); + shared + }) + .ok() + }); + Self { config_state, resource_mapper: Arc::new(TerraphimResourceMapper::new()), autocomplete_index: Arc::new(tokio::sync::RwLock::new(None)), kg_scorer: None, + frecency, } } @@ -1225,6 +1239,7 @@ impl McpService { (base + kg_boost, file.relative_path.as_str()) }) .collect(); + #[allow(clippy::unnecessary_sort_by)] scored.sort_by(|a, b| b.0.cmp(&a.0)); let mut contents = Vec::new(); @@ -1243,7 +1258,7 @@ impl McpService { Ok(CallToolResult::success(contents)) } - /// Grep file contents with KG-aware file ordering. + /// Grep file contents with KG-aware file ordering and optional cursor pagination. /// /// Files are sorted by KG path score before searching so that conceptually /// relevant files appear first in paginated results. @@ -1253,11 +1268,23 @@ impl McpService { path: Option, limit: Option, output_mode: Option, + cursor: Option, ) -> Result { let base_path = path.unwrap_or_else(|| ".".to_string()); let max_results = limit.unwrap_or(50); let files_only = output_mode.as_deref() == Some("files"); + let file_offset = cursor + .as_deref() + .and_then(|c| { + base64::engine::general_purpose::URL_SAFE_NO_PAD + .decode(c) + .ok() + }) + .and_then(|bytes| String::from_utf8(bytes).ok()) + .and_then(|s| s.parse::().ok()) + .unwrap_or(0); + let mut picker = FilePicker::new(FilePickerOptions { base_path: base_path.clone(), mode: FFFMode::Ai, @@ -1278,7 +1305,6 @@ impl McpService { ))])); } - // Sort files: highest KG path score first so grep pages are most relevant first. if let Some(scorer) = &self.kg_scorer { files.sort_by_key(|f| std::cmp::Reverse(scorer.score(f))); } @@ -1289,7 +1315,7 @@ impl McpService { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, smart_case: true, - file_offset: 0, + file_offset, page_limit: max_results, mode: GrepMode::PlainText, time_budget_ms: 0, @@ -1301,13 +1327,144 @@ impl McpService { let result = grep_search(&files, &fff_query, &options, &budget, None, None, None); let mut contents = Vec::new(); - contents.push(Content::text(format!( + let header = format!( "Found {} matches across {} files (searched {} files under '{}')", result.matches.len(), result.files_with_matches, result.total_files_searched, base_path - ))); + ); + contents.push(Content::text(header)); + + if files_only { + let mut seen = std::collections::HashSet::new(); + for m in &result.matches { + if let Some(file) = result.files.get(m.file_index) { + if seen.insert(file.relative_path.as_str()) { + contents.push(Content::text(file.relative_path.clone())); + } + } + } + } else { + for m in result.matches.iter().take(max_results) { + if let Some(file) = result.files.get(m.file_index) { + let line = format!( + "{}:{}:{}", + file.relative_path, + m.line_number, + m.line_content.trim_end() + ); + contents.push(Content::text(line)); + } + } + } + + if result.next_file_offset > 0 { + let token = base64::engine::general_purpose::URL_SAFE_NO_PAD + .encode(result.next_file_offset.to_string()); + contents.push(Content::text(format!("next_cursor: {token}"))); + } + + Ok(CallToolResult::success(contents)) + } + + /// Multi-pattern grep: search file contents for lines matching ANY of + /// multiple patterns (OR logic) using Aho-Corasick. Files are ordered by + /// KG path score. Supports cursor-based pagination. + pub async fn multi_grep_files( + &self, + patterns: Vec, + path: Option, + constraints: Option, + limit: Option, + cursor: Option, + output_mode: Option, + ) -> Result { + let base_path = path.unwrap_or_else(|| ".".to_string()); + let max_results = limit.unwrap_or(50); + let files_only = output_mode.as_deref() == Some("files"); + + if patterns.is_empty() { + return Ok(CallToolResult::error(vec![Content::text( + "At least one pattern is required".to_string(), + )])); + } + + let file_offset = cursor + .as_deref() + .and_then(|c| { + base64::engine::general_purpose::URL_SAFE_NO_PAD + .decode(c) + .ok() + }) + .and_then(|bytes| String::from_utf8(bytes).ok()) + .and_then(|s| s.parse::().ok()) + .unwrap_or(0); + + let mut picker = FilePicker::new(FilePickerOptions { + base_path: base_path.clone(), + mode: FFFMode::Ai, + watch: false, + warmup_mmap_cache: false, + cache_budget: None, + }) + .map_err(|e| ErrorData::internal_error(format!("FilePicker init failed: {e}"), None))?; + + picker + .collect_files() + .map_err(|e| ErrorData::internal_error(format!("File scan failed: {e}"), None))?; + + let mut files = picker.get_files().to_vec(); + if files.is_empty() { + return Ok(CallToolResult::success(vec![Content::text(format!( + "No files found under '{base_path}'" + ))])); + } + + if let Some(scorer) = &self.kg_scorer { + files.sort_by_key(|f| std::cmp::Reverse(scorer.score(f))); + } + + let patterns_refs: Vec<&str> = patterns.iter().map(|s| s.as_str()).collect(); + + let parser = QueryParser::new(fff_search::AiGrepConfig); + let constraint_str = constraints.as_deref().unwrap_or(""); + let parsed = parser.parse(constraint_str); + let parsed_constraints = parsed.constraints.as_slice(); + + let budget = ContentCacheBudget::default(); + let options = GrepSearchOptions { + max_file_size: 10 * 1024 * 1024, + max_matches_per_file: 200, + smart_case: true, + file_offset, + page_limit: max_results, + mode: GrepMode::PlainText, + time_budget_ms: 0, + before_context: 0, + after_context: 0, + classify_definitions: false, + }; + + let result = multi_grep_search( + &files, + &patterns_refs, + parsed_constraints, + &options, + &budget, + None, + ); + + let mut contents = Vec::new(); + let header = format!( + "Found {} matches across {} files for patterns {:?} (searched {} files under '{}')", + result.matches.len(), + result.files_with_matches, + patterns, + result.total_files_searched, + base_path + ); + contents.push(Content::text(header)); if files_only { let mut seen = std::collections::HashSet::new(); @@ -1332,6 +1489,12 @@ impl McpService { } } + if result.next_file_offset > 0 { + let token = base64::engine::general_purpose::URL_SAFE_NO_PAD + .encode(result.next_file_offset.to_string()); + contents.push(Content::text(format!("next_cursor: {token}"))); + } + Ok(CallToolResult::success(contents)) } } @@ -1747,8 +1910,8 @@ impl ServerHandler for McpService { }, Tool { name: "terraphim_grep".into(), - title: Some("Grep (KG-ordered)".into()), - description: Some("Search file contents with ripgrep-style matching. Files are searched in KG path-score order so conceptually relevant files appear first in paginated results.".into()), + title: Some("Grep (KG-ordered, paginated)".into()), + description: Some("Search file contents with ripgrep-style matching. Files are searched in KG path-score order so conceptually relevant files appear first. Supports cursor-based pagination for large result sets.".into()), input_schema: Arc::new(serde_json::json!({ "type": "object", "properties": { @@ -1768,6 +1931,10 @@ impl ServerHandler for McpService { "type": "string", "enum": ["content", "files"], "description": "Return 'content' (file:line:text, default) or 'files' (unique file paths only)" + }, + "cursor": { + "type": "string", + "description": "Pagination cursor from a previous result's next_cursor field" } }, "required": ["query"] @@ -1776,6 +1943,47 @@ impl ServerHandler for McpService { annotations: None, icons: None, meta: None, + }, + Tool { + name: "terraphim_multi_grep".into(), + title: Some("Multi-Grep (OR patterns, KG-ordered, paginated)".into()), + description: Some("Search file contents for lines matching ANY of multiple patterns (OR logic) using Aho-Corasick. Faster than running separate grep calls. Files are KG path-score ordered. Supports cursor pagination.".into()), + input_schema: Arc::new(serde_json::json!({ + "type": "object", + "properties": { + "patterns": { + "type": "array", + "items": { "type": "string" }, + "description": "Patterns to match (OR logic). Include all naming conventions: snake_case, PascalCase, camelCase." + }, + "path": { + "type": "string", + "description": "Directory to search (defaults to current directory)" + }, + "constraints": { + "type": "string", + "description": "File constraints (e.g. '*.rs !test/')" + }, + "limit": { + "type": "integer", + "description": "Maximum number of matches to return (default 50)" + }, + "output_mode": { + "type": "string", + "enum": ["content", "files"], + "description": "Return 'content' (file:line:text, default) or 'files' (unique file paths only)" + }, + "cursor": { + "type": "string", + "description": "Pagination cursor from a previous result's next_cursor field" + } + }, + "required": ["patterns"] + }).as_object().unwrap().clone()), + output_schema: None, + annotations: None, + icons: None, + meta: None, } ]; @@ -2177,8 +2385,57 @@ impl ServerHandler for McpService { .get("output_mode") .and_then(|v| v.as_str()) .map(|s| s.to_string()); + let cursor = arguments + .get("cursor") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + self.grep_files(query, path, limit, output_mode, cursor) + .await + } + "terraphim_multi_grep" => { + let arguments = request.arguments.unwrap_or_default(); + + let patterns: Vec = arguments + .get("patterns") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect() + }) + .unwrap_or_default(); + + if patterns.is_empty() { + return Err(ErrorData::invalid_params( + "Missing or empty 'patterns' parameter".to_string(), + None, + )); + } + + let path = arguments + .get("path") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let constraints = arguments + .get("constraints") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let limit = arguments + .get("limit") + .and_then(|v| v.as_i64()) + .map(|i| i as usize); + let cursor = arguments + .get("cursor") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let output_mode = arguments + .get("output_mode") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); - self.grep_files(query, path, limit, output_mode).await + self.multi_grep_files(patterns, path, constraints, limit, cursor, output_mode) + .await } _ => Err(ErrorData::method_not_found::< rmcp::model::CallToolRequestMethod, diff --git a/crates/terraphim_mcp_server/tests/test_find_files.rs b/crates/terraphim_mcp_server/tests/test_find_files.rs index 7ade992c8..18b3ef9a2 100644 --- a/crates/terraphim_mcp_server/tests/test_find_files.rs +++ b/crates/terraphim_mcp_server/tests/test_find_files.rs @@ -151,7 +151,8 @@ async fn grep_files_returns_content_matches() { "fn new(".to_string(), Some(workspace.to_string()), Some(10), - None, // default output_mode = "content" + None, + None, ) .await .expect("grep_files should succeed"); @@ -207,6 +208,7 @@ async fn grep_files_files_mode_returns_paths() { Some(workspace.to_string()), Some(5), Some("files".to_string()), + None, ) .await .expect("grep_files should succeed"); diff --git a/crates/terraphim_middleware/src/command/ripgrep.rs b/crates/terraphim_middleware/src/command/ripgrep.rs index 86eaaba79..7ce20b634 100644 --- a/crates/terraphim_middleware/src/command/ripgrep.rs +++ b/crates/terraphim_middleware/src/command/ripgrep.rs @@ -315,6 +315,7 @@ impl RipgrepCommand { } // For numeric parameters, validate they're actually numbers + #[allow(clippy::collapsible_match)] match key { "max_count" | "context" => { if value.parse::().is_err() { diff --git a/crates/terraphim_multi_agent/src/context.rs b/crates/terraphim_multi_agent/src/context.rs index c8682eac9..eecb7d853 100644 --- a/crates/terraphim_multi_agent/src/context.rs +++ b/crates/terraphim_multi_agent/src/context.rs @@ -371,7 +371,7 @@ impl AgentContext { let remaining: Vec = non_pinned.into_iter().skip(relevance_count).collect(); let mut recent = remaining; - recent.sort_by(|a, b| b.added_at.cmp(&a.added_at)); + recent.sort_by_key(|i| std::cmp::Reverse(i.added_at)); selected.extend(recent.into_iter().take(recency_count)); self.items = pinned.into_iter().chain(selected).collect(); diff --git a/crates/terraphim_persistence/src/conversation.rs b/crates/terraphim_persistence/src/conversation.rs index de93eb3f4..7b22c6d6a 100644 --- a/crates/terraphim_persistence/src/conversation.rs +++ b/crates/terraphim_persistence/src/conversation.rs @@ -267,7 +267,7 @@ impl ConversationPersistence for OpenDALConversationPersistence { let mut summaries = index.list(); // Sort by updated_at descending (most recent first) - summaries.sort_by(|a, b| b.updated_at.cmp(&a.updated_at)); + summaries.sort_by_key(|s| std::cmp::Reverse(s.updated_at)); log::debug!("Found {} conversation summaries", summaries.len()); Ok(summaries) diff --git a/crates/terraphim_rolegraph/examples/graph_embeddings_tutorial.rs b/crates/terraphim_rolegraph/examples/graph_embeddings_tutorial.rs index f23a1a2ca..b913d04ce 100644 --- a/crates/terraphim_rolegraph/examples/graph_embeddings_tutorial.rs +++ b/crates/terraphim_rolegraph/examples/graph_embeddings_tutorial.rs @@ -47,6 +47,7 @@ use terraphim_types::{ fn build_initial_thesaurus() -> Thesaurus { let mut thesaurus = Thesaurus::new("Initial Learnings".to_string()); + #[allow(clippy::useless_vec)] let concepts = vec![ ( "active recall", @@ -67,8 +68,8 @@ fn build_initial_thesaurus() -> Thesaurus { ), ]; - let mut id = 1u64; - for (concept, synonyms) in concepts { + for (i, (concept, synonyms)) in concepts.iter().enumerate() { + let id = (i as u64) + 1; let term = NormalizedTerm::new(id, NormalizedTermValue::new(concept.to_string())); thesaurus.insert(NormalizedTermValue::new(concept.to_string()), term); @@ -76,7 +77,6 @@ fn build_initial_thesaurus() -> Thesaurus { let syn_term = NormalizedTerm::new(id, NormalizedTermValue::new(concept.to_string())); thesaurus.insert(NormalizedTermValue::new(synonym.to_string()), syn_term); } - id += 1; } thesaurus @@ -87,6 +87,7 @@ fn build_enhanced_thesaurus() -> Thesaurus { let mut thesaurus = build_initial_thesaurus(); // Add domain-specific terms that dramatically improve retrieval + #[allow(clippy::useless_vec)] let ds_concepts = vec![ ( "cap theorem", @@ -110,8 +111,8 @@ fn build_enhanced_thesaurus() -> Thesaurus { ), ]; - let mut id = 6u64; // Continue from initial - for (concept, synonyms) in ds_concepts { + for (i, (concept, synonyms)) in ds_concepts.iter().enumerate() { + let id = (i as u64) + 6; let term = NormalizedTerm::new(id, NormalizedTermValue::new(concept.to_string())); thesaurus.insert(NormalizedTermValue::new(concept.to_string()), term); @@ -119,7 +120,6 @@ fn build_enhanced_thesaurus() -> Thesaurus { let syn_term = NormalizedTerm::new(id, NormalizedTermValue::new(concept.to_string())); thesaurus.insert(NormalizedTermValue::new(synonym.to_string()), syn_term); } - id += 1; } thesaurus diff --git a/crates/terraphim_rolegraph/examples/knowledge_graph_role_demo.rs b/crates/terraphim_rolegraph/examples/knowledge_graph_role_demo.rs index f232b701b..6845351a6 100644 --- a/crates/terraphim_rolegraph/examples/knowledge_graph_role_demo.rs +++ b/crates/terraphim_rolegraph/examples/knowledge_graph_role_demo.rs @@ -90,6 +90,7 @@ impl RetrievalMetrics { fn build_base_thesaurus() -> Thesaurus { let mut thesaurus = Thesaurus::new("Domain Expert - Base".to_string()); + #[allow(clippy::useless_vec)] let base_concepts = vec![ ( "distributed systems", @@ -116,8 +117,8 @@ fn build_base_thesaurus() -> Thesaurus { ), ]; - let mut id = 1u64; - for (concept, synonyms) in base_concepts { + for (i, (concept, synonyms)) in base_concepts.iter().enumerate() { + let id = (i as u64) + 1; let term = NormalizedTerm::new(id, NormalizedTermValue::new(concept.to_string())) .with_display_value(concept.to_string()); thesaurus.insert(NormalizedTermValue::new(concept.to_string()), term); @@ -127,7 +128,6 @@ fn build_base_thesaurus() -> Thesaurus { .with_display_value(concept.to_string()); thesaurus.insert(NormalizedTermValue::new(syn.to_string()), syn_term); } - id += 1; } thesaurus @@ -136,8 +136,7 @@ fn build_base_thesaurus() -> Thesaurus { fn build_enhanced_thesaurus() -> Thesaurus { let mut thesaurus = build_base_thesaurus(); - let mut next_id = 9u64; - + #[allow(clippy::useless_vec)] let enhanced_concepts = vec![ ( "consensus algorithms", @@ -215,18 +214,17 @@ fn build_enhanced_thesaurus() -> Thesaurus { ), ]; - for (concept, synonyms) in enhanced_concepts { - let term = NormalizedTerm::new(next_id, NormalizedTermValue::new(concept.to_string())) + for (i, (concept, synonyms)) in enhanced_concepts.iter().enumerate() { + let id = (i as u64) + 9; + let term = NormalizedTerm::new(id, NormalizedTermValue::new(concept.to_string())) .with_display_value(concept.to_string()); thesaurus.insert(NormalizedTermValue::new(concept.to_string()), term); for syn in synonyms { - let syn_term = - NormalizedTerm::new(next_id, NormalizedTermValue::new(concept.to_string())) - .with_display_value(concept.to_string()); + let syn_term = NormalizedTerm::new(id, NormalizedTermValue::new(concept.to_string())) + .with_display_value(concept.to_string()); thesaurus.insert(NormalizedTermValue::new(syn.to_string()), syn_term); } - next_id += 1; } thesaurus diff --git a/crates/terraphim_rolegraph/examples/learning_via_negativa.rs b/crates/terraphim_rolegraph/examples/learning_via_negativa.rs index 5cdaaa61f..16aeb2d28 100644 --- a/crates/terraphim_rolegraph/examples/learning_via_negativa.rs +++ b/crates/terraphim_rolegraph/examples/learning_via_negativa.rs @@ -35,6 +35,7 @@ fn build_correction_thesaurus() -> Thesaurus { let mut thesaurus = Thesaurus::new("Command Corrections".to_string()); // Git corrections + #[allow(clippy::useless_vec)] let corrections = vec![ // Git force push corrections ( @@ -96,8 +97,10 @@ fn build_correction_thesaurus() -> Thesaurus { ), ]; - let mut id = 1u64; - for (correct, wrong_aliases, _primary_wrong, _description) in corrections { + for (i, (correct, wrong_aliases, _primary_wrong, _description)) in + corrections.iter().enumerate() + { + let id = (i as u64) + 1; // Add the correct command let term = NormalizedTerm::new(id, NormalizedTermValue::new(correct.to_string())); thesaurus.insert(NormalizedTermValue::new(correct.to_string()), term); @@ -107,7 +110,6 @@ fn build_correction_thesaurus() -> Thesaurus { let wrong_term = NormalizedTerm::new(id, NormalizedTermValue::new(correct.to_string())); thesaurus.insert(NormalizedTermValue::new(wrong.to_string()), wrong_term); } - id += 1; } thesaurus @@ -117,6 +119,7 @@ fn build_correction_thesaurus() -> Thesaurus { fn build_enhanced_correction_thesaurus() -> Thesaurus { let mut thesaurus = build_correction_thesaurus(); + #[allow(clippy::useless_vec)] let more_corrections = vec![ // System commands ( @@ -159,8 +162,10 @@ fn build_enhanced_correction_thesaurus() -> Thesaurus { ), ]; - let mut id = 20u64; - for (correct, wrong_aliases, _primary_wrong, _description) in more_corrections { + for (i, (correct, wrong_aliases, _primary_wrong, _description)) in + more_corrections.iter().enumerate() + { + let id = (i as u64) + 20; let term = NormalizedTerm::new(id, NormalizedTermValue::new(correct.to_string())); thesaurus.insert(NormalizedTermValue::new(correct.to_string()), term); @@ -168,7 +173,6 @@ fn build_enhanced_correction_thesaurus() -> Thesaurus { let wrong_term = NormalizedTerm::new(id, NormalizedTermValue::new(correct.to_string())); thesaurus.insert(NormalizedTermValue::new(wrong.to_string()), wrong_term); } - id += 1; } thesaurus diff --git a/crates/terraphim_router/src/keyword.rs b/crates/terraphim_router/src/keyword.rs index 75588e6d2..fafb82977 100644 --- a/crates/terraphim_router/src/keyword.rs +++ b/crates/terraphim_router/src/keyword.rs @@ -58,7 +58,7 @@ impl KeywordRouter { } // Sort by priority (higher priority first) - matched_keywords.sort_by(|a, b| b.1.cmp(&a.1)); + matched_keywords.sort_by_key(|(_, priority)| std::cmp::Reverse(*priority)); caps.into_iter().collect() } diff --git a/crates/terraphim_service/src/lib.rs b/crates/terraphim_service/src/lib.rs index 765c83bc8..9ccb0ca43 100644 --- a/crates/terraphim_service/src/lib.rs +++ b/crates/terraphim_service/src/lib.rs @@ -750,6 +750,7 @@ impl TerraphimService { .collect(); // Sort by relevance, but prioritize important KG terms + #[allow(clippy::unnecessary_sort_by)] sorted_terms.sort_by(|a, b| { let a_important = important_kg_terms.contains(&a.0.as_str()); let b_important = important_kg_terms.contains(&b.0.as_str()); @@ -2218,7 +2219,7 @@ impl TerraphimService { } // Re-sort documents by the new combined rank - documents.sort_by(|a, b| b.rank.unwrap_or(0).cmp(&a.rank.unwrap_or(0))); + documents.sort_by_key(|d| std::cmp::Reverse(d.rank.unwrap_or(0))); log::debug!("TF-IDF scoring applied successfully"); } @@ -3512,7 +3513,7 @@ mod tests { .iter() .map(|doc| doc.rank.unwrap()) .collect(); - ranks.sort_by(|a, b| b.cmp(a)); // Sort in descending order + ranks.sort_by_key(|r| std::cmp::Reverse(*r)); assert_eq!( ranks, vec![3, 2, 1], diff --git a/crates/terraphim_sessions/src/enrichment/concept.rs b/crates/terraphim_sessions/src/enrichment/concept.rs index 1564dbe6c..e71758a81 100644 --- a/crates/terraphim_sessions/src/enrichment/concept.rs +++ b/crates/terraphim_sessions/src/enrichment/concept.rs @@ -129,6 +129,7 @@ impl SessionConcepts { /// Get concepts sorted by frequency pub fn by_frequency(&self) -> Vec<&ConceptMatch> { let mut sorted: Vec<_> = self.concepts.values().collect(); + #[allow(clippy::unnecessary_sort_by)] sorted.sort_by(|a, b| b.count.cmp(&a.count)); sorted } diff --git a/crates/terraphim_sessions/src/enrichment/enricher.rs b/crates/terraphim_sessions/src/enrichment/enricher.rs index da43c329a..1f2810d69 100644 --- a/crates/terraphim_sessions/src/enrichment/enricher.rs +++ b/crates/terraphim_sessions/src/enrichment/enricher.rs @@ -244,6 +244,7 @@ pub fn search_by_concept<'a>( } // Sort by occurrence count (most occurrences first) + #[allow(clippy::unnecessary_sort_by)] results.sort_by(|a, b| b.1.count.cmp(&a.1.count)); results @@ -286,6 +287,7 @@ pub fn find_related_sessions<'a>( } // Sort by number of shared concepts (most first) + #[allow(clippy::unnecessary_sort_by)] related.sort_by(|a, b| b.1.cmp(&a.1)); related diff --git a/crates/terraphim_task_decomposition/src/decomposition.rs b/crates/terraphim_task_decomposition/src/decomposition.rs index 4a9d680c3..118ce47fe 100644 --- a/crates/terraphim_task_decomposition/src/decomposition.rs +++ b/crates/terraphim_task_decomposition/src/decomposition.rs @@ -214,7 +214,7 @@ impl KnowledgeGraphTaskDecomposer { } // Add related concepts to the main list - concepts.extend(related_concepts.into_iter()); + concepts.extend(related_concepts); concepts.sort(); concepts.dedup(); diff --git a/crates/terraphim_task_decomposition/src/tasks.rs b/crates/terraphim_task_decomposition/src/tasks.rs index ed229db94..14e9194ba 100644 --- a/crates/terraphim_task_decomposition/src/tasks.rs +++ b/crates/terraphim_task_decomposition/src/tasks.rs @@ -478,6 +478,7 @@ impl Task { } // Add constraint-specific validation based on type + #[allow(clippy::collapsible_match)] match &constraint.constraint_type { TaskConstraintType::Temporal => { // Validate temporal constraint parameters diff --git a/crates/terraphim_tinyclaw/src/skills/executor.rs b/crates/terraphim_tinyclaw/src/skills/executor.rs index f7932797a..e0d4d0f74 100644 --- a/crates/terraphim_tinyclaw/src/skills/executor.rs +++ b/crates/terraphim_tinyclaw/src/skills/executor.rs @@ -114,6 +114,7 @@ impl SkillExecutor { } } + #[allow(clippy::unnecessary_sort_by)] skills.sort_by(|a, b| a.name.cmp(&b.name)); Ok(skills) } diff --git a/crates/terraphim_types/examples/kg_normalization.rs b/crates/terraphim_types/examples/kg_normalization.rs index a7820be88..2c26681a0 100644 --- a/crates/terraphim_types/examples/kg_normalization.rs +++ b/crates/terraphim_types/examples/kg_normalization.rs @@ -391,7 +391,7 @@ fn main() { // Get most frequent terms let mut terms: Vec<_> = ontology.values().collect(); - terms.sort_by(|a, b| b.frequency.cmp(&a.frequency)); + terms.sort_by_key(|t| std::cmp::Reverse(t.frequency)); println!("\n--- Top Terms by Frequency ---"); for entry in terms.iter().take(10) { diff --git a/crates/terraphim_update/src/rollback.rs b/crates/terraphim_update/src/rollback.rs index ac0e09097..ada3d2b48 100644 --- a/crates/terraphim_update/src/rollback.rs +++ b/crates/terraphim_update/src/rollback.rs @@ -214,7 +214,7 @@ impl BackupManager { let mut backups: Vec<_> = self.backups.values().collect(); - backups.sort_by(|a, b| a.timestamp.cmp(&b.timestamp)); + backups.sort_by_key(|b| b.timestamp); let num_to_remove = backups.len().saturating_sub(self.max_backups); @@ -268,7 +268,7 @@ impl BackupManager { pub fn list_backups(&self) -> Vec { let mut backups: Vec<_> = self.backups.values().collect(); - backups.sort_by(|a, b| b.timestamp.cmp(&a.timestamp)); + backups.sort_by_key(|b| std::cmp::Reverse(b.timestamp)); backups.iter().map(|b| b.version.clone()).collect() } diff --git a/crates/terraphim_usage/src/store.rs b/crates/terraphim_usage/src/store.rs index 6eb56a193..7ab6bbb2f 100644 --- a/crates/terraphim_usage/src/store.rs +++ b/crates/terraphim_usage/src/store.rs @@ -606,6 +606,7 @@ impl UsageStore { } } + #[allow(clippy::unnecessary_sort_by)] executions.sort_by(|a, b| b.started_at.cmp(&a.started_at)); Ok(executions) @@ -646,6 +647,7 @@ impl UsageStore { } } + #[allow(clippy::unnecessary_sort_by)] snapshots.sort_by(|a, b| b.snapshot_at.cmp(&a.snapshot_at)); snapshots.truncate(limit); diff --git a/terraphim_server/src/workflows/routing.rs b/terraphim_server/src/workflows/routing.rs index 2e37f7b0a..6be207389 100644 --- a/terraphim_server/src/workflows/routing.rs +++ b/terraphim_server/src/workflows/routing.rs @@ -246,15 +246,13 @@ fn analyze_task_complexity(prompt: &str, role: &str) -> serde_json::Value { // Role-specific complexity adjustments match role { - "technical_writer" => { - if prompt.contains("documentation") || prompt.contains("specification") { - complexity_score += 0.15; - } + "technical_writer" + if prompt.contains("documentation") || prompt.contains("specification") => + { + complexity_score += 0.15; } - "content_creator" => { - if prompt.contains("creative") || prompt.contains("marketing") { - complexity_score += 0.1; - } + "content_creator" if prompt.contains("creative") || prompt.contains("marketing") => { + complexity_score += 0.1; } _ => {} }