diff --git a/src/cmds/rust/runner.rs b/src/cmds/rust/runner.rs
index 476f90671..65c35b8c1 100644
--- a/src/cmds/rust/runner.rs
+++ b/src/cmds/rust/runner.rs
@@ -1,6 +1,7 @@
 //! Runs arbitrary commands and captures only stderr or test failures.
 
 use crate::core::stream::StreamFilter;
+use crate::core::verifier::Verifier;
 use anyhow::Result;
 use lazy_static::lazy_static;
 use regex::Regex;
@@ -125,6 +126,15 @@ pub fn run_err(command: &str, verbose: u8) -> Result<i32> {
     )
 }
 
+fn apply_with_verification(original: &str, filtered: String) -> String {
+    let result = Verifier::new().verify(original, &filtered);
+    if !result.is_safe(0.6) {
+        eprintln!("rtk: verifier fallback (confidence {:.0}%)", result.confidence * 100.0);
+        return original.to_string();
+    }
+    filtered
+}
+
 /// Run tests and show only failures
 pub fn run_test(command: &str, verbose: u8) -> Result<i32> {
     if verbose > 0 {
@@ -136,7 +146,7 @@ pub fn run_test(command: &str, verbose: u8) -> Result<i32> {
         cmd,
         "test",
         command,
-        move |raw| extract_test_summary(raw, &command_owned),
+        move |raw| apply_with_verification(raw, extract_test_summary(raw, &command_owned)),
         crate::core::runner::RunOptions::with_tee("test"),
     )
 }
diff --git a/src/cmds/system/json_cmd.rs b/src/cmds/system/json_cmd.rs
index 176b6e568..0f19eca28 100644
--- a/src/cmds/system/json_cmd.rs
+++ b/src/cmds/system/json_cmd.rs
@@ -1,5 +1,6 @@
 //! Inspects JSON structure without showing values, saving tokens on large payloads.
 
+use crate::core::toon::{strip_nulls, toon_encode};
 use crate::core::tracking;
 use anyhow::{bail, Context, Result};
 use serde_json::Value;
@@ -7,6 +8,18 @@ use std::fs;
 use std::io::{self, Read};
 use std::path::Path;
 
+/// Apply TOON encoding pipeline: strip nulls then compact key syntax.
+/// Falls back to original input if not valid JSON or TOON is longer.
+pub fn compress_json_output(input: &str) -> String {
+    let cleaned = strip_nulls(input).unwrap_or_else(|| input.to_string());
+    if let Some(toon) = toon_encode(&cleaned) {
+        if toon.len() < cleaned.len() {
+            return toon;
+        }
+    }
+    cleaned
+}
+
 /// Reject non-JSON files with a clear error before doing any I/O.
 fn validate_json_extension(file: &Path) -> Result<()> {
     if let Some(ext) = file.extension().and_then(|e| e.to_str()) {
@@ -50,7 +63,12 @@ pub fn run(file: &Path, max_depth: usize, schema_only: bool, verbose: u8) -> Res
     let output = if schema_only {
         filter_json_string(&content, max_depth)?
     } else {
-        filter_json_compact(&content, max_depth)?
+        let toon = compress_json_output(&content);
+        if toon.starts_with("TOON:") {
+            toon
+        } else {
+            filter_json_compact(&content, max_depth)?
+        }
     };
     println!("{}", output);
     timer.track(
@@ -79,7 +97,12 @@ pub fn run_stdin(max_depth: usize, schema_only: bool, verbose: u8) -> Result<()>
     let output = if schema_only {
         filter_json_string(&content, max_depth)?
     } else {
-        filter_json_compact(&content, max_depth)?
+        let toon = compress_json_output(&content);
+        if toon.starts_with("TOON:") {
+            toon
+        } else {
+            filter_json_compact(&content, max_depth)?
+        }
     };
     println!("{}", output);
     timer.track("cat - (stdin)", "rtk json -", &content, &output);
diff --git a/src/core/dedup_cache.rs b/src/core/dedup_cache.rs
new file mode 100644
index 000000000..711ec8b26
--- /dev/null
+++ b/src/core/dedup_cache.rs
@@ -0,0 +1,217 @@
+//! Session-level dedup cache: SHA-256 persistent cross-command deduplication.
+//! Repeat reads of the same content return a compact `§ref:HASH§` token (~13 tokens)
+//! instead of recompressing the full output.
+
+use anyhow::{Context, Result};
+use rusqlite::{self, OptionalExtension};
+use sha2::{Digest, Sha256};
+use std::path::PathBuf;
+
+pub enum CacheResult {
+    Ref { inline_ref: String },
+    Fresh { compressed: String },
+}
+
+pub struct DedupCache {
+    db_path: PathBuf,
+    ttl_days: u64,
+}
+
+impl DedupCache {
+    pub fn new(db_path: PathBuf) -> Result<Self> {
+        let conn = rusqlite::Connection::open(&db_path)
+            .with_context(|| format!("Failed to open dedup cache: {}", db_path.display()))?;
+        conn.execute_batch(
+            "CREATE TABLE IF NOT EXISTS dedup_cache (
+                hash         TEXT PRIMARY KEY,
+                compressed   TEXT NOT NULL,
+                cmd          TEXT,
+                created_at   INTEGER NOT NULL,
+                accessed_at  INTEGER NOT NULL,
+                access_count INTEGER DEFAULT 1
+            );
+            CREATE INDEX IF NOT EXISTS idx_dedup_accessed ON dedup_cache(accessed_at);",
+        )
+        .context("Failed to initialize dedup_cache table")?;
+        Ok(Self {
+            db_path,
+            ttl_days: 7,
+        })
+    }
+
+    /// Check the cache for `raw`. On hit return a compact ref token; on miss
+    /// insert `compressed` and return it as Fresh.
+    pub fn get_or_insert(&self, raw: &str, cmd: &str, compressed: &str) -> Result<CacheResult> {
+        let hash = sha256_hex(raw);
+        let short = &hash[..8];
+        let conn =
+            rusqlite::Connection::open(&self.db_path).context("Failed to open dedup cache")?;
+        let now = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs() as i64;
+
+        let existing: Option<String> = conn
+            .query_row(
+                "SELECT hash FROM dedup_cache WHERE hash = ?1",
+                rusqlite::params![hash],
+                |row| row.get(0),
+            )
+            .ok();
+
+        if existing.is_some() {
+            conn.execute(
+                "UPDATE dedup_cache SET accessed_at=?1, access_count=access_count+1 WHERE hash=?2",
+                rusqlite::params![now, hash],
+            )
+            .ok();
+            return Ok(CacheResult::Ref {
+                inline_ref: format!("§ref:{}§", short),
+            });
+        }
+
+        conn.execute(
+            "INSERT OR IGNORE INTO dedup_cache (hash, compressed, cmd, created_at, accessed_at)
+             VALUES (?1, ?2, ?3, ?4, ?4)",
+            rusqlite::params![hash, compressed, cmd, now],
+        )
+        .context("Failed to insert into dedup cache")?;
+
+        Ok(CacheResult::Fresh {
+            compressed: compressed.to_string(),
+        })
+    }
+
+    /// Remove entries not accessed in the last `ttl_days` days.
+    pub fn evict_stale(&self) -> Result<usize> {
+        let conn = rusqlite::Connection::open(&self.db_path)
+            .context("Failed to open dedup cache for eviction")?;
+        let cutoff = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs() as i64
+            - (self.ttl_days as i64 * 86400);
+        let n = conn
+            .execute(
+                "DELETE FROM dedup_cache WHERE accessed_at < ?1",
+                rusqlite::params![cutoff],
+            )
+            .context("Failed to evict stale cache entries")?;
+        Ok(n)
+    }
+
+    /// Lookup by hash prefix (first 8 chars). Returns the stored compressed content or None.
+    pub fn expand_prefix(&self, prefix: &str) -> Result<Option<String>> {
+        let conn =
+            rusqlite::Connection::open(&self.db_path).context("Failed to open dedup cache")?;
+        let pattern = format!("{}%", prefix);
+        conn.query_row(
+            "SELECT compressed FROM dedup_cache WHERE hash LIKE ?1 LIMIT 1",
+            rusqlite::params![pattern],
+            |row| row.get(0),
+        )
+        .optional()
+        .context("Failed to query dedup cache")
+    }
+
+    /// Return basic cache statistics.
+    pub fn stats(&self) -> Result<CacheStats> {
+        let conn =
+            rusqlite::Connection::open(&self.db_path).context("Failed to open dedup cache")?;
+        let count: usize = conn
+            .query_row("SELECT COUNT(*) FROM dedup_cache", [], |r| r.get(0))
+            .unwrap_or(0);
+        let size_bytes: i64 = conn
+            .query_row(
+                "SELECT COALESCE(SUM(LENGTH(compressed)), 0) FROM dedup_cache",
+                [],
+                |r| r.get(0),
+            )
+            .unwrap_or(0);
+        Ok(CacheStats {
+            count,
+            size_kb: size_bytes as f64 / 1024.0,
+        })
+    }
+}
+
+pub struct CacheStats {
+    pub count: usize,
+    pub size_kb: f64,
+}
+
+/// Compute the SHA-256 hex digest of a string.
+pub fn sha256_hex(content: &str) -> String {
+    let mut h = Sha256::new();
+    h.update(content.as_bytes());
+    format!("{:x}", h.finalize())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+
+    #[test]
+    fn test_first_call_returns_fresh() {
+        let dir = tempdir().unwrap();
+        let cache = DedupCache::new(dir.path().join("test.db")).unwrap();
+        let result = cache
+            .get_or_insert("hello world content", "cat f.rs", "compressed")
+            .unwrap();
+        assert!(matches!(result, CacheResult::Fresh { .. }));
+    }
+
+    #[test]
+    fn test_second_identical_call_returns_ref() {
+        let dir = tempdir().unwrap();
+        let cache = DedupCache::new(dir.path().join("test.db")).unwrap();
+        cache
+            .get_or_insert("same content here", "cat f.rs", "compressed")
+            .unwrap();
+        let result = cache
+            .get_or_insert("same content here", "cat f.rs", "compressed")
+            .unwrap();
+        assert!(matches!(result, CacheResult::Ref { .. }));
+    }
+
+    #[test]
+    fn test_ref_token_format() {
+        let dir = tempdir().unwrap();
+        let cache = DedupCache::new(dir.path().join("test.db")).unwrap();
+        cache.get_or_insert("content abc", "cat", "comp").unwrap();
+        if let CacheResult::Ref { inline_ref } =
+            cache.get_or_insert("content abc", "cat", "comp").unwrap()
+        {
+            assert!(inline_ref.starts_with("§ref:"), "got: {inline_ref}");
+            assert!(inline_ref.ends_with("§"), "got: {inline_ref}");
+        } else {
+            panic!("expected Ref, got Fresh");
+        }
+    }
+
+    #[test]
+    fn test_different_content_no_dedup() {
+        let dir = tempdir().unwrap();
+        let cache = DedupCache::new(dir.path().join("test.db")).unwrap();
+        cache.get_or_insert("content A", "cat", "comp A").unwrap();
+        let result = cache.get_or_insert("content B", "cat", "comp B").unwrap();
+        assert!(matches!(result, CacheResult::Fresh { .. }));
+    }
+
+    #[test]
+    fn test_sha256_hex_deterministic() {
+        let h1 = sha256_hex("hello world");
+        let h2 = sha256_hex("hello world");
+        assert_eq!(h1, h2);
+        assert_eq!(h1.len(), 64);
+    }
+
+    #[test]
+    fn test_evict_stale_no_panic_on_empty_db() {
+        let dir = tempdir().unwrap();
+        let cache = DedupCache::new(dir.path().join("empty.db")).unwrap();
+        let evicted = cache.evict_stale().unwrap();
+        assert_eq!(evicted, 0);
+    }
+}
diff --git a/src/core/delta_encoder.rs b/src/core/delta_encoder.rs
new file mode 100644
index 000000000..82a559675
--- /dev/null
+++ b/src/core/delta_encoder.rs
@@ -0,0 +1,140 @@
+//! Delta encoder — SimHash-gated LCS diff for near-duplicate file reads.
+//! When a file is re-read with minor edits, sends only changed lines
+//! instead of recompressing the full content.
+
+use crate::core::simhash::simhash;
+
+const NEAR_DUPLICATE_MAX_DISTANCE: u32 = 20;
+
+/// Return true if two texts are close enough to attempt delta encoding.
+pub fn is_near_duplicate(a: &str, b: &str) -> bool {
+    simhash(a).is_near_duplicate(&simhash(b), NEAR_DUPLICATE_MAX_DISTANCE)
+}
+
+/// Compute a compact line-level delta between `old` and `new`.
+/// Format: `§delta:HASH§\n-removed\n+added\n...`
+/// Falls back to a "too large" notice for files > 5000 lines.
+pub fn compute_delta(old_hash: &str, old: &str, new: &str) -> String {
+    let old_lines: Vec<&str> = old.lines().collect();
+    let new_lines: Vec<&str> = new.lines().collect();
+
+    if old_lines.len() > 5000 || new_lines.len() > 5000 {
+        return format!(
+            "§delta:{}§ [too large for delta, {} lines]",
+            &old_hash[..old_hash.len().min(8)],
+            new_lines.len()
+        );
+    }
+
+    let lcs = lcs_indices(&old_lines, &new_lines);
+    let old_in_lcs: std::collections::HashSet<usize> = lcs.iter().map(|(i, _)| *i).collect();
+    let new_in_lcs: std::collections::HashSet<usize> = lcs.iter().map(|(_, j)| *j).collect();
+
+    let mut parts: Vec<String> = Vec::new();
+    for (i, line) in old_lines.iter().enumerate() {
+        if !old_in_lcs.contains(&i) {
+            parts.push(format!("-{}", line));
+        }
+    }
+    for (j, line) in new_lines.iter().enumerate() {
+        if !new_in_lcs.contains(&j) {
+            parts.push(format!("+{}", line));
+        }
+    }
+
+    if parts.is_empty() {
+        return format!("§delta:{}§ (unchanged)", &old_hash[..old_hash.len().min(8)]);
+    }
+
+    format!(
+        "§delta:{}§\n{}",
+        &old_hash[..old_hash.len().min(8)],
+        parts.join("\n")
+    )
+}
+
+fn lcs_indices(a: &[&str], b: &[&str]) -> Vec<(usize, usize)> {
+    let m = a.len();
+    let n = b.len();
+    let mut dp = vec![vec![0usize; n + 1]; m + 1];
+    for i in 1..=m {
+        for j in 1..=n {
+            if a[i - 1] == b[j - 1] {
+                dp[i][j] = dp[i - 1][j - 1] + 1;
+            } else {
+                dp[i][j] = dp[i - 1][j].max(dp[i][j - 1]);
+            }
+        }
+    }
+    let mut result = Vec::new();
+    let (mut i, mut j) = (m, n);
+    while i > 0 && j > 0 {
+        if a[i - 1] == b[j - 1] {
+            result.push((i - 1, j - 1));
+            i -= 1;
+            j -= 1;
+        } else if dp[i - 1][j] > dp[i][j - 1] {
+            i -= 1;
+        } else {
+            j -= 1;
+        }
+    }
+    result.reverse();
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_identical_texts_zero_delta() {
+        let text = "line1\nline2\nline3\nline4\nline5";
+        let delta = compute_delta("abc123", text, text);
+        assert!(delta.starts_with("§delta:"));
+        assert!(!delta.contains('+') || delta.contains("(unchanged)"));
+    }
+
+    #[test]
+    fn test_detects_added_line() {
+        let old =
+            "use anyhow::Result;\nuse std::path::Path;\n\npub fn foo() -> Result<()> { Ok(()) }";
+        let new = "use anyhow::Result;\nuse std::path::Path;\nuse std::fs;\n\npub fn foo() -> Result<()> { Ok(()) }";
+        assert!(is_near_duplicate(old, new), "should be near-duplicate");
+        let delta = compute_delta("hash12", old, new);
+        assert!(delta.contains("+use std::fs;"), "delta: {delta}");
+    }
+
+    #[test]
+    fn test_detects_modified_line() {
+        let old = include_str!("../../tests/fixtures/near_duplicate_a.txt");
+        let new = include_str!("../../tests/fixtures/near_duplicate_b.txt");
+        assert!(is_near_duplicate(old, new));
+        let delta = compute_delta("hashXY", old, new);
+        assert!(delta.starts_with("§delta:hashXY§"), "delta: {delta}");
+        assert!(
+            delta.contains("+pub fn load_config") || delta.contains("-pub fn read_config"),
+            "delta should mention the renamed function: {delta}"
+        );
+    }
+
+    #[test]
+    fn test_delta_shorter_than_full_content() {
+        let old = include_str!("../../tests/fixtures/near_duplicate_a.txt");
+        let new = include_str!("../../tests/fixtures/near_duplicate_b.txt");
+        let delta = compute_delta("hashZ", old, new);
+        assert!(
+            delta.len() < new.len(),
+            "delta ({}) should be shorter than full content ({})",
+            delta.len(),
+            new.len()
+        );
+    }
+
+    #[test]
+    fn test_guard_on_large_files() {
+        let big = "line\n".repeat(6000);
+        let result = compute_delta("bigfile", &big, &big);
+        assert!(!result.is_empty());
+    }
+}
diff --git a/src/core/mod.rs b/src/core/mod.rs
index 01317e942..858e53c27 100644
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -2,13 +2,18 @@
 
 pub mod config;
 pub mod constants;
+pub mod dedup_cache;
+pub mod delta_encoder;
 pub mod display_helpers;
 pub mod filter;
 pub mod runner;
+pub mod simhash;
 pub mod stream;
 pub mod tee;
 pub mod telemetry;
 pub mod telemetry_cmd;
 pub mod toml_filter;
+pub mod toon;
 pub mod tracking;
 pub mod utils;
+pub mod verifier;
diff --git a/src/core/simhash.rs b/src/core/simhash.rs
new file mode 100644
index 000000000..50c99a2e2
--- /dev/null
+++ b/src/core/simhash.rs
@@ -0,0 +1,108 @@
+//! SimHash — Locality-Sensitive Hashing for O(1) near-duplicate detection.
+//! Port from sqz_engine/src/simhash.rs (MIT-compatible subset).
+
+use std::collections::hash_map::DefaultHasher;
+use std::hash::{Hash, Hasher};
+
+/// 64-bit SimHash fingerprint.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct SimHash(pub u64);
+
+impl SimHash {
+    pub fn hamming_distance(&self, other: &SimHash) -> u32 {
+        (self.0 ^ other.0).count_ones()
+    }
+
+    pub fn is_near_duplicate(&self, other: &SimHash, max_distance: u32) -> bool {
+        self.hamming_distance(other) <= max_distance
+    }
+}
+
+/// Compute the SimHash fingerprint of a text.
+pub fn simhash(text: &str) -> SimHash {
+    let tokens = shingles(text, 3);
+    if tokens.is_empty() {
+        return SimHash(0);
+    }
+    let mut v = [0i32; 64];
+    for token in &tokens {
+        let h = hash_str(token);
+        for i in 0..64 {
+            if (h >> i) & 1 == 1 {
+                v[i] += 1;
+            } else {
+                v[i] -= 1;
+            }
+        }
+    }
+    let mut fp: u64 = 0;
+    for i in 0..64 {
+        if v[i] > 0 {
+            fp |= 1u64 << i;
+        }
+    }
+    SimHash(fp)
+}
+
+fn shingles(text: &str, n: usize) -> Vec<String> {
+    let words: Vec<&str> = text
+        .split(|c: char| !c.is_alphanumeric())
+        .filter(|s| !s.is_empty())
+        .collect();
+    if words.len() < n {
+        return words.iter().map(|w| w.to_lowercase()).collect();
+    }
+    words
+        .windows(n)
+        .map(|w| w.join(" ").to_lowercase())
+        .collect()
+}
+
+fn hash_str(s: &str) -> u64 {
+    let mut hasher = DefaultHasher::new();
+    s.hash(&mut hasher);
+    hasher.finish()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_identical_texts_same_hash() {
+        let a = simhash("the quick brown fox jumps over the lazy dog and extra words");
+        let b = simhash("the quick brown fox jumps over the lazy dog and extra words");
+        assert_eq!(a.0, b.0);
+    }
+
+    #[test]
+    fn test_near_duplicate_small_hamming() {
+        let a = simhash(
+            "fn process(input: &str) -> String { input.to_uppercase() and more words here }",
+        );
+        let b = simhash(
+            "fn process(input: &str) -> String { input.to_lowercase() and more words here }",
+        );
+        let dist = a.hamming_distance(&b);
+        assert!(dist < 32, "expected hamming < 32, got {dist}");
+    }
+
+    #[test]
+    fn test_empty_text_returns_zero() {
+        assert_eq!(simhash("").0, 0);
+    }
+
+    #[test]
+    fn test_is_near_duplicate_threshold() {
+        let a = simhash("cargo test --all --workspace runs all the tests in the project");
+        let b = simhash("cargo test --all --workspace runs all the tests in the project");
+        assert!(a.is_near_duplicate(&b, 10));
+    }
+
+    #[test]
+    fn test_very_different_texts_high_hamming() {
+        let a = simhash("the quick brown fox");
+        let b = simhash("SELECT * FROM users WHERE deleted_at IS NULL");
+        let _ = a.hamming_distance(&b);
+    }
+}
diff --git a/src/core/toon.rs b/src/core/toon.rs
new file mode 100644
index 000000000..08d1f370c
--- /dev/null
+++ b/src/core/toon.rs
@@ -0,0 +1,174 @@
+//! TOON — Token-Optimized Object Notation.
+//! Lossless compact JSON encoding: simple alphanumeric keys lose their quotes,
+//! no whitespace around separators, null fields stripped upstream.
+
+const TOON_PREFIX: &str = "TOON:";
+const MIN_INPUT_LEN: usize = 50;
+
+/// Strip null fields recursively from a JSON value.
+/// Returns None if input is not valid JSON.
+pub fn strip_nulls(input: &str) -> Option<String> {
+    let trimmed = input.trim();
+    let mut value: serde_json::Value = serde_json::from_str(trimmed).ok()?;
+    remove_nulls_recursive(&mut value);
+    serde_json::to_string(&value).ok()
+}
+
+fn remove_nulls_recursive(v: &mut serde_json::Value) {
+    if let serde_json::Value::Object(map) = v {
+        map.retain(|_, val| !val.is_null());
+        for child in map.values_mut() {
+            remove_nulls_recursive(child);
+        }
+    }
+    if let serde_json::Value::Array(arr) = v {
+        for item in arr.iter_mut() {
+            remove_nulls_recursive(item);
+        }
+    }
+}
+
+/// Encode JSON to compact TOON notation.
+/// Returns None if input is not valid JSON or is shorter than 50 chars.
+pub fn toon_encode(input: &str) -> Option<String> {
+    let trimmed = input.trim();
+    if trimmed.len() < MIN_INPUT_LEN {
+        return None;
+    }
+    let value: serde_json::Value = serde_json::from_str(trimmed).ok()?;
+    let mut buf = String::with_capacity(trimmed.len() / 2);
+    buf.push_str(TOON_PREFIX);
+    encode_value(&value, &mut buf);
+    Some(buf)
+}
+
+fn encode_value(v: &serde_json::Value, buf: &mut String) {
+    match v {
+        serde_json::Value::Null => buf.push_str("null"),
+        serde_json::Value::Bool(b) => buf.push_str(if *b { "true" } else { "false" }),
+        serde_json::Value::Number(n) => buf.push_str(&n.to_string()),
+        serde_json::Value::String(s) => encode_string(s, buf),
+        serde_json::Value::Array(arr) => {
+            buf.push('[');
+            for (i, item) in arr.iter().enumerate() {
+                if i > 0 {
+                    buf.push(',');
+                }
+                encode_value(item, buf);
+            }
+            buf.push(']');
+        }
+        serde_json::Value::Object(map) => {
+            buf.push('{');
+            for (i, (k, val)) in map.iter().enumerate() {
+                if i > 0 {
+                    buf.push(',');
+                }
+                encode_key(k, buf);
+                buf.push(':');
+                encode_value(val, buf);
+            }
+            buf.push('}');
+        }
+    }
+}
+
+fn encode_key(key: &str, buf: &mut String) {
+    let is_simple = !key.is_empty()
+        && key
+            .chars()
+            .next()
+            .map(|c| !c.is_ascii_digit())
+            .unwrap_or(false)
+        && key.chars().all(|c| c.is_alphanumeric() || c == '_');
+    if is_simple {
+        buf.push_str(key);
+    } else {
+        encode_string(key, buf);
+    }
+}
+
+fn encode_string(s: &str, buf: &mut String) {
+    buf.push('"');
+    for c in s.chars() {
+        match c {
+            '"' => buf.push_str("\\\""),
+            '\\' => buf.push_str("\\\\"),
+            '\n' => buf.push_str("\\n"),
+            '\r' => buf.push_str("\\r"),
+            '\t' => buf.push_str("\\t"),
+            c if (c as u32) < 0x20 => buf.push_str(&format!("\\u{:04x}", c as u32)),
+            c => buf.push(c),
+        }
+    }
+    buf.push('"');
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_simple_keys_no_quotes() {
+        let json =
+            r#"{"id":42,"name":"Alice","email":"alice@example.com","role":"admin","active":true}"#;
+        let toon = toon_encode(json).unwrap();
+        assert!(toon.starts_with("TOON:"), "got: {toon}");
+        assert!(
+            toon.contains("id:42"),
+            "simple key should have no quotes: {toon}"
+        );
+        assert!(
+            toon.contains(r#"name:"Alice""#),
+            "string value keeps quotes: {toon}"
+        );
+    }
+
+    #[test]
+    fn test_short_json_returns_none() {
+        assert!(
+            toon_encode(r#"{"id":1}"#).is_none(),
+            "< 50 chars should return None"
+        );
+    }
+
+    #[test]
+    fn test_strip_nulls_removes_null_fields() {
+        let json = r#"{"id":1,"name":"Alice","deleted_at":null,"internal_id":null,"role":"admin"}"#;
+        let stripped = strip_nulls(json).unwrap();
+        assert!(!stripped.contains("null"));
+        assert!(stripped.contains("Alice"));
+        assert!(stripped.contains("admin"));
+    }
+
+    #[test]
+    fn test_non_json_returns_none() {
+        assert!(toon_encode("not json at all, just a plain text string here").is_none());
+        assert!(strip_nulls("not json").is_none());
+    }
+
+    #[test]
+    fn test_toon_produces_valid_roundtrip_concept() {
+        let json = r#"{"id":42,"email":"alice@example.com","active":true,"score":98.6,"tags":["rust","cli"],"description":"token killer proxy"}"#;
+        let toon = toon_encode(json).unwrap();
+        assert!(toon.contains("42"), "id value: {toon}");
+        assert!(toon.contains("alice@example.com"), "email: {toon}");
+        assert!(toon.contains("98.6"), "score: {toon}");
+    }
+
+    #[test]
+    fn test_token_savings_on_api_fixture() {
+        let json = include_str!("../../tests/fixtures/api_response.json");
+        if json.len() < 50 {
+            return;
+        }
+        let cleaned = strip_nulls(json).unwrap_or_else(|| json.to_string());
+        let toon = toon_encode(&cleaned).unwrap_or(cleaned.clone());
+        // TOON removes quotes from simple alphanumeric keys → character savings even on compact JSON
+        let savings = 100.0 * (1.0 - toon.len() as f64 / json.len() as f64);
+        assert!(
+            savings >= 3.0,
+            "Expected ≥3% char savings on API fixture, got {savings:.1}%"
+        );
+    }
+}
diff --git a/src/core/tracking.rs b/src/core/tracking.rs
index 596602673..d689e6f75 100644
--- a/src/core/tracking.rs
+++ b/src/core/tracking.rs
@@ -1421,6 +1421,10 @@ pub fn args_display(args: &[OsString]) -> String {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::sync::Mutex;
+
+    // Serialize tests that mutate RTK_DB_PATH to prevent races on parallel test runs.
+    static ENV_MUTEX: Mutex<()> = Mutex::new(());
 
     // 1. estimate_tokens — verify ~4 chars/token ratio
     #[test]
@@ -1547,6 +1551,7 @@ mod tests {
     #[test]
     fn test_custom_db_path_env() {
         use std::env;
+        let _guard = ENV_MUTEX.lock().unwrap();
 
         let custom_path = env::temp_dir().join("rtk_test_custom.db");
         env::set_var("RTK_DB_PATH", &custom_path);
@@ -1561,6 +1566,7 @@ mod tests {
     #[test]
     fn test_default_db_path() {
         use std::env;
+        let _guard = ENV_MUTEX.lock().unwrap();
 
         // Ensure no env var is set
         env::remove_var("RTK_DB_PATH");
diff --git a/src/core/verifier.rs b/src/core/verifier.rs
new file mode 100644
index 000000000..2a85925f4
--- /dev/null
+++ b/src/core/verifier.rs
@@ -0,0 +1,278 @@
+//! Two-pass verifier — post-compression invariant checker with passthrough fallback.
+//! Checks 6 invariants; falls back to original output if confidence < threshold.
+
+pub struct VerifyResult {
+    pub confidence: f64,
+    pub passed: Vec<String>,
+    pub failed: Vec<(String, String)>,
+}
+
+impl VerifyResult {
+    pub fn is_safe(&self, threshold: f64) -> bool {
+        // error_lines is a hard blocker — dropping diagnostics is never safe
+        let critical_ok = !self.failed.iter().any(|(k, _)| k == "error_lines");
+        critical_ok && self.confidence >= threshold
+    }
+}
+
+pub struct Verifier {
+    pub threshold: f64,
+}
+
+impl Default for Verifier {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl Verifier {
+    pub fn new() -> Self {
+        Self { threshold: 0.6 }
+    }
+
+    /// Verify that `compressed` is a safe reduction of `original`.
+    /// Returns a `VerifyResult` with per-check details.
+    pub fn verify(&self, original: &str, compressed: &str) -> VerifyResult {
+        let mut passed = Vec::new();
+        let mut failed = Vec::new();
+
+        // Check 1: min_retention — output must be >= 10% of input length
+        let retention = if original.is_empty() {
+            1.0
+        } else {
+            compressed.len() as f64 / original.len() as f64
+        };
+        if retention >= 0.10 {
+            passed.push("min_retention".into());
+        } else {
+            failed.push((
+                "min_retention".into(),
+                format!("output is {:.1}% of input (min 10%)", retention * 100.0),
+            ));
+        }
+
+        // Check 2: critical diagnostic lines must be preserved
+        // Use "error:" with colon to avoid false positive from clippy "1 errors"
+        let error_lines: Vec<&str> = original
+            .lines()
+            .filter(|l| {
+                let lo = l.to_lowercase();
+                lo.contains("error:")
+                    || lo.contains("warning:")
+                    || lo.contains("fatal:")
+                    || lo.contains("panic:")
+                    || lo.contains("exception:")
+            })
+            .collect();
+        if error_lines.is_empty() {
+            passed.push("error_lines".into());
+        } else {
+            let missing = error_lines
+                .iter()
+                .filter(|&&l| !compressed.contains(l.trim()))
+                .count();
+            if missing == 0 {
+                passed.push("error_lines".into());
+            } else {
+                failed.push((
+                    "error_lines".into(),
+                    format!("{missing} critical line(s) dropped"),
+                ));
+            }
+        }
+
+        // Check 3: file paths must not be truncated
+        let missing_paths = original
+            .lines()
+            .filter(|l| {
+                (l.contains('/') || l.contains('\\'))
+                    && l.chars().any(|c| c == '.')
+                    && l.len() < 200
+            })
+            .take(20)
+            .flat_map(|l| l.split_whitespace())
+            .filter(|t| t.contains('/') || t.contains('\\'))
+            .filter(|&t| !compressed.contains(t))
+            .count();
+        if missing_paths == 0 {
+            passed.push("file_paths".into());
+        } else {
+            failed.push((
+                "file_paths".into(),
+                format!("{missing_paths} file path(s) missing"),
+            ));
+        }
+
+        // Check 4: JSON top-level keys — >= 50% must be present in output
+        let orig_trimmed = original.trim();
+        if orig_trimmed.starts_with('{') {
+            if let Ok(v) = serde_json::from_str::<serde_json::Value>(orig_trimmed) {
+                if let Some(obj) = v.as_object() {
+                    let keys: Vec<&str> = obj.keys().map(|k| k.as_str()).collect();
+                    if !keys.is_empty() {
+                        let present = keys.iter().filter(|&&k| compressed.contains(k)).count();
+                        let ratio = present as f64 / keys.len() as f64;
+                        if ratio >= 0.5 {
+                            passed.push("json_keys".into());
+                        } else {
+                            failed.push((
+                                "json_keys".into(),
+                                format!("{:.0}% of JSON keys retained", ratio * 100.0),
+                            ));
+                        }
+                    } else {
+                        passed.push("json_keys".into());
+                    }
+                } else {
+                    passed.push("json_keys".into());
+                }
+            } else {
+                passed.push("json_keys".into());
+            }
+        } else {
+            passed.push("json_keys".into());
+        }
+
+        // Check 5: diff hunk headers @@ must be preserved
+        let hunks: Vec<&str> = original.lines().filter(|l| l.starts_with("@@")).collect();
+        if hunks.is_empty() {
+            passed.push("diff_hunks".into());
+        } else {
+            let missing = hunks.iter().filter(|&&h| !compressed.contains(h)).count();
+            if missing == 0 {
+                passed.push("diff_hunks".into());
+            } else {
+                failed.push((
+                    "diff_hunks".into(),
+                    format!("{missing} @@ hunk header(s) missing"),
+                ));
+            }
+        }
+
+        // Check 6: numeric values — spot-check first 10 numbers >= 2 digits
+        let numbers: Vec<&str> = original
+            .split(|c: char| !c.is_ascii_digit() && c != '.' && c != '-')
+            .filter(|s| s.len() >= 2 && s.parse::<f64>().is_ok())
+            .take(10)
+            .collect();
+        if numbers.is_empty() {
+            passed.push("numeric_values".into());
+        } else {
+            let missing = numbers.iter().filter(|&&n| !compressed.contains(n)).count();
+            if missing == 0 {
+                passed.push("numeric_values".into());
+            } else {
+                failed.push((
+                    "numeric_values".into(),
+                    format!("{missing} numeric value(s) altered"),
+                ));
+            }
+        }
+
+        let total = passed.len() + failed.len();
+        let confidence = if total == 0 {
+            1.0
+        } else {
+            passed.len() as f64 / total as f64
+        };
+        VerifyResult {
+            confidence,
+            passed,
+            failed,
+        }
+    }
+
+    /// Apply compression `f` to `input`. If verification fails, return `input` unchanged.
+    pub fn verified_compress<F>(&self, input: &str, compress: F) -> String
+    where
+        F: FnOnce(&str) -> String,
+    {
+        let compressed = compress(input);
+        let result = self.verify(input, &compressed);
+        if result.is_safe(self.threshold) {
+            compressed
+        } else {
+            input.to_string()
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_identical_input_full_confidence() {
+        let v = Verifier::new();
+        let text = "error: type mismatch at src/main.rs:42\nCompiling rtk v0.37.2";
+        let result = v.verify(text, text);
+        assert_eq!(result.confidence, 1.0);
+        assert!(result.failed.is_empty());
+    }
+
+    #[test]
+    fn test_fails_when_error_line_dropped() {
+        let v = Verifier::new();
+        let original = "Compiling foo\nerror: type mismatch\nCompiling bar";
+        let compressed = "Compiling foo\nCompiling bar";
+        let result = v.verify(original, compressed);
+        assert!(
+            result.failed.iter().any(|(k, _)| k == "error_lines"),
+            "should flag missing error line"
+        );
+        assert!(!result.is_safe(0.6));
+    }
+
+    #[test]
+    fn test_fails_on_too_short_output() {
+        let v = Verifier::new();
+        let original = "a".repeat(1000);
+        let compressed = "x";
+        let result = v.verify(&original, compressed);
+        assert!(result.failed.iter().any(|(k, _)| k == "min_retention"));
+    }
+
+    #[test]
+    fn test_passes_on_normal_compressed_output() {
+        let v = Verifier::new();
+        let original = "Compiling foo v0.1\nCompiling bar v0.2\nFinished dev [unoptimized] in 3.5s";
+        let compressed = "Finished dev in 3.5s";
+        let result = v.verify(original, compressed);
+        assert!(result.passed.contains(&"min_retention".to_string()));
+    }
+
+    #[test]
+    fn test_preserves_diff_hunk_headers() {
+        let v = Verifier::new();
+        let original = "@@ -1,5 +1,6 @@\n line1\n-old\n+new\n line2";
+        let result = v.verify(original, original);
+        assert!(result.passed.contains(&"diff_hunks".to_string()));
+    }
+
+    #[test]
+    fn test_fails_when_hunk_header_dropped() {
+        let v = Verifier::new();
+        let original = "@@ -1,5 +1,6 @@\n line1\n-old\n+new\n line2";
+        let compressed = "line1\n-old\n+new\nline2";
+        let result = v.verify(original, compressed);
+        assert!(result.failed.iter().any(|(k, _)| k == "diff_hunks"));
+    }
+
+    #[test]
+    fn test_numeric_value_preserved() {
+        let v = Verifier::new();
+        let original = "tests: 42 passed, 0 failed, 100ms elapsed";
+        let result = v.verify(original, original);
+        assert!(result.passed.contains(&"numeric_values".to_string()));
+    }
+
+    #[test]
+    fn test_verified_compress_passthrough_on_bad_compress() {
+        let v = Verifier::new();
+        let input = "error: something failed\nCompiling foo\nCompiling bar\nFinished in 3.5s";
+        // Compress function that aggressively drops lines including the error
+        let result = v.verified_compress(input, |_| "Finished in 3.5s".to_string());
+        // Should fall back to original because error line was dropped
+        assert!(result.contains("error:"));
+    }
+}
diff --git a/src/hooks/init.rs b/src/hooks/init.rs
index c83185b90..6f40ac85f 100644
--- a/src/hooks/init.rs
+++ b/src/hooks/init.rs
@@ -56,6 +56,62 @@ const AGENTS_MD: &str = "AGENTS.md";
 const RTK_MD_REF: &str = "@RTK.md";
 const GEMINI_MD: &str = "GEMINI.md";
 
+/// Canonical list of supported agent names.
+pub const KNOWN_AGENTS: &[&str] = &[
+    "claude", "cursor", "windsurf", "cline", "gemini", "opencode", "codex",
+];
+
+/// Normalize an agent name or alias to its canonical form.
+pub fn normalize_agent(name: &str) -> Option<&'static str> {
+    match name.to_lowercase().as_str() {
+        "claude" | "claude-code" | "claudecode" => Some("claude"),
+        "cursor" => Some("cursor"),
+        "windsurf" => Some("windsurf"),
+        "cline" | "roo" | "roocline" | "kilocode" => Some("cline"),
+        "gemini" | "gemini-cli" | "geminicli" => Some("gemini"),
+        "opencode" => Some("opencode"),
+        "codex" => Some("codex"),
+        _ => None,
+    }
+}
+
+/// Compute the set of agents to configure, respecting --only / --skip.
+pub fn resolve_agents(only: Option<&str>, skip: Option<&str>) -> anyhow::Result<Vec<&'static str>> {
+    fn parse(s: &str) -> anyhow::Result<Vec<&'static str>> {
+        let mut out = Vec::new();
+        for part in s.split(',') {
+            let name = part.trim();
+            match normalize_agent(name) {
+                Some(canonical) => {
+                    if !out.contains(&canonical) {
+                        out.push(canonical);
+                    }
+                }
+                None => anyhow::bail!(
+                    "Unknown agent '{}'. Accepted: {}",
+                    name,
+                    KNOWN_AGENTS.join(", ")
+                ),
+            }
+        }
+        Ok(out)
+    }
+
+    match (only, skip) {
+        (Some(o), None) => parse(o),
+        (None, Some(s)) => {
+            let to_skip = parse(s)?;
+            Ok(KNOWN_AGENTS
+                .iter()
+                .copied()
+                .filter(|a| !to_skip.contains(a))
+                .collect())
+        }
+        (None, None) => Ok(KNOWN_AGENTS.to_vec()),
+        (Some(_), Some(_)) => anyhow::bail!("--only and --skip cannot be combined"),
+    }
+}
+
 /// Control flow for settings.json patching
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum PatchMode {
@@ -3776,3 +3832,64 @@ More notes
         });
     }
 }
+
+#[cfg(test)]
+mod agent_selection_tests {
+    use super::{normalize_agent, resolve_agents, KNOWN_AGENTS};
+
+    #[test]
+    fn test_only_single_agent() {
+        let agents = resolve_agents(Some("claude"), None).unwrap();
+        assert_eq!(agents, vec!["claude"]);
+    }
+
+    #[test]
+    fn test_only_multiple_agents() {
+        let agents = resolve_agents(Some("claude,cursor"), None).unwrap();
+        assert_eq!(agents.len(), 2);
+        assert!(agents.contains(&"claude"));
+        assert!(agents.contains(&"cursor"));
+    }
+
+    #[test]
+    fn test_skip_removes_agents() {
+        let agents = resolve_agents(None, Some("cursor,windsurf")).unwrap();
+        assert!(!agents.contains(&"cursor"));
+        assert!(!agents.contains(&"windsurf"));
+        assert!(agents.contains(&"claude"));
+    }
+
+    #[test]
+    fn test_no_flags_returns_all_known() {
+        let agents = resolve_agents(None, None).unwrap();
+        assert_eq!(agents.len(), KNOWN_AGENTS.len());
+    }
+
+    #[test]
+    fn test_alias_claude_code() {
+        assert_eq!(normalize_agent("claude-code"), Some("claude"));
+    }
+
+    #[test]
+    fn test_alias_roo() {
+        assert_eq!(normalize_agent("roo"), Some("cline"));
+    }
+
+    #[test]
+    fn test_alias_gemini_cli() {
+        assert_eq!(normalize_agent("gemini-cli"), Some("gemini"));
+    }
+
+    #[test]
+    fn test_unknown_agent_error() {
+        let result = resolve_agents(Some("vscode"), None);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().to_string().contains("Unknown agent"));
+    }
+
+    #[test]
+    fn test_deduplication() {
+        let agents = resolve_agents(Some("claude,claude-code"), None).unwrap();
+        assert_eq!(agents.len(), 1);
+    }
+}
diff --git a/src/main.rs b/src/main.rs
index 82d994910..6ae22dcf9 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -328,6 +328,14 @@ enum Commands {
         #[arg(short, long)]
         global: bool,
 
+        /// Only configure these agents (comma-separated: claude, cursor, windsurf, cline, gemini, opencode, codex)
+        #[arg(long, value_name = "AGENTS", conflicts_with = "skip")]
+        only: Option<String>,
+
+        /// Configure all agents except these (comma-separated)
+        #[arg(long, value_name = "AGENTS", conflicts_with = "only")]
+        skip: Option<String>,
+
         /// Install OpenCode plugin (in addition to Claude Code)
         #[arg(long)]
         opencode: bool,
@@ -651,6 +659,17 @@ enum Commands {
         require_all: bool,
     },
 
+    /// Recover original content referenced by a §ref:HASH§ dedup token
+    #[command(about = "Recover original content from a §ref:HASH§ dedup token")]
+    Expand {
+        /// The hash prefix (first 8 chars from the §ref:HASH§ token)
+        hash: String,
+    },
+
+    /// Evict stale dedup cache entries and show cache stats
+    #[command(about = "Evict stale dedup cache entries and show stats")]
+    DedupCompact,
+
     /// Ruff linter/formatter with compact output
     Ruff {
         /// Ruff arguments (e.g., check, format --check)
@@ -1745,6 +1764,8 @@ fn run_cli() -> Result<i32> {
 
         Commands::Init {
             global,
+            only,
+            skip,
             opencode,
             gemini,
             agent,
@@ -1757,6 +1778,8 @@ fn run_cli() -> Result<i32> {
             codex,
             copilot,
         } => {
+            // --only / --skip: validate agent list early
+            let _ = hooks::init::resolve_agents(only.as_deref(), skip.as_deref())?;
             if show {
                 hooks::init::show_config(codex)?;
             } else if uninstall {
@@ -2370,6 +2393,45 @@ fn run_cli() -> Result<i32> {
             }
             0
         }
+
+        Commands::Expand { hash } => {
+            use crate::core::dedup_cache::DedupCache;
+            let db_path = core::config::Config::load()
+                .ok()
+                .and_then(|c| c.tracking.database_path)
+                .unwrap_or_else(|| {
+                    dirs::data_local_dir()
+                        .unwrap_or_else(|| std::path::PathBuf::from("."))
+                        .join("rtk/history.db")
+                });
+            let cache = DedupCache::new(db_path)?;
+            match cache.expand_prefix(&hash)? {
+                Some(content) => print!("{}", content),
+                None => {
+                    eprintln!("rtk expand: no cached content for hash '{hash}'");
+                    std::process::exit(1);
+                }
+            }
+            0
+        }
+
+        Commands::DedupCompact => {
+            use crate::core::dedup_cache::DedupCache;
+            let db_path = core::config::Config::load()
+                .ok()
+                .and_then(|c| c.tracking.database_path)
+                .unwrap_or_else(|| {
+                    dirs::data_local_dir()
+                        .unwrap_or_else(|| std::path::PathBuf::from("."))
+                        .join("rtk/history.db")
+                });
+            let cache = DedupCache::new(db_path)?;
+            let n = cache.evict_stale()?;
+            println!("rtk dedup-compact: evicted {n} stale cache entries");
+            let stats = cache.stats()?;
+            println!("  Cache: {} entries, {:.1} KB", stats.count, stats.size_kb);
+            0
+        }
     };
 
     Ok(code)
diff --git a/tests/fixtures/api_response.json b/tests/fixtures/api_response.json
new file mode 100644
index 000000000..83902c5b7
--- /dev/null
+++ b/tests/fixtures/api_response.json
@@ -0,0 +1 @@
+{"id":1139971460,"node_id":"R_kgDOQ_KVhA","name":"rtk","full_name":"rtk-ai/rtk","private":false,"owner":{"login":"rtk-ai","id":258253854,"node_id":"O_kgDOD2SkHg","avatar_url":"https://avatars.githubusercontent.com/u/258253854?v=4","gravatar_id":"","url":"https://api.github.com/users/rtk-ai","html_url":"https://github.com/rtk-ai","followers_url":"https://api.github.com/users/rtk-ai/followers","following_url":"https://api.github.com/users/rtk-ai/following{/other_user}","gists_url":"https://api.github.com/users/rtk-ai/gists{/gist_id}","starred_url":"https://api.github.com/users/rtk-ai/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/rtk-ai/subscriptions","organizations_url":"https://api.github.com/users/rtk-ai/orgs","repos_url":"https://api.github.com/users/rtk-ai/repos","events_url":"https://api.github.com/users/rtk-ai/events{/privacy}","received_events_url":"https://api.github.com/users/rtk-ai/received_events","type":"Organization","user_view_type":"public","site_admin":false},"html_url":"https://github.com/rtk-ai/rtk","description":"CLI proxy that reduces LLM token consumption by 60-90% on common dev commands. Single Rust binary, zero dependencies","fork":false,"url":"https://api.github.com/repos/rtk-ai/rtk","forks_url":"https://api.github.com/repos/rtk-ai/rtk/forks","keys_url":"https://api.github.com/repos/rtk-ai/rtk/keys{/key_id}","collaborators_url":"https://api.github.com/repos/rtk-ai/rtk/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/rtk-ai/rtk/teams","hooks_url":"https://api.github.com/repos/rtk-ai/rtk/hooks","issue_events_url":"https://api.github.com/repos/rtk-ai/rtk/issues/events{/number}","events_url":"https://api.github.com/repos/rtk-ai/rtk/events","assignees_url":"https://api.github.com/repos/rtk-ai/rtk/assignees{/user}","branches_url":"https://api.github.com/repos/rtk-ai/rtk/branches{/branch}","tags_url":"https://api.github.com/repos/rtk-ai/rtk/tags","blobs_url":"https://api.github.com/repos/rtk-ai/rtk/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/rtk-ai/rtk/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/rtk-ai/rtk/git/refs{/sha}","trees_url":"https://api.github.com/repos/rtk-ai/rtk/git/trees{/sha}","statuses_url":"https://api.github.com/repos/rtk-ai/rtk/statuses/{sha}","languages_url":"https://api.github.com/repos/rtk-ai/rtk/languages","stargazers_url":"https://api.github.com/repos/rtk-ai/rtk/stargazers","contributors_url":"https://api.github.com/repos/rtk-ai/rtk/contributors","subscribers_url":"https://api.github.com/repos/rtk-ai/rtk/subscribers","subscription_url":"https://api.github.com/repos/rtk-ai/rtk/subscription","commits_url":"https://api.github.com/repos/rtk-ai/rtk/commits{/sha}","git_commits_url":"https://api.github.com/repos/rtk-ai/rtk/git/commits{/sha}","comments_url":"https://api.github.com/repos/rtk-ai/rtk/comments{/number}","issue_comment_url":"https://api.github.com/repos/rtk-ai/rtk/issues/comments{/number}","contents_url":"https://api.github.com/repos/rtk-ai/rtk/contents/{+path}","compare_url":"https://api.github.com/repos/rtk-ai/rtk/compare/{base}...{head}","merges_url":"https://api.github.com/repos/rtk-ai/rtk/merges","archive_url":"https://api.github.com/repos/rtk-ai/rtk/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/rtk-ai/rtk/downloads","issues_url":"https://api.github.com/repos/rtk-ai/rtk/issues{/number}","pulls_url":"https://api.github.com/repos/rtk-ai/rtk/pulls{/number}","milestones_url":"https://api.github.com/repos/rtk-ai/rtk/milestones{/number}","notifications_url":"https://api.github.com/repos/rtk-ai/rtk/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/rtk-ai/rtk/labels{/name}","releases_url":"https://api.github.com/repos/rtk-ai/rtk/releases{/id}","deployments_url":"https://api.github.com/repos/rtk-ai/rtk/deployments","created_at":"2026-01-22T16:54:16Z","updated_at":"2026-04-24T09:38:30Z","pushed_at":"2026-04-23T14:54:19Z","git_url":"git://github.com/rtk-ai/rtk.git","ssh_url":"git@github.com:rtk-ai/rtk.git","clone_url":"https://github.com/rtk-ai/rtk.git","svn_url":"https://github.com/rtk-ai/rtk","homepage":"https://www.rtk-ai.app","size":3703,"stargazers_count":33862,"watchers_count":33862,"language":"Rust","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"has_discussions":true,"forks_count":1998,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":676,"license":{"key":"apache-2.0","name":"Apache License 2.0","spdx_id":"Apache-2.0","url":"https://api.github.com/licenses/apache-2.0","node_id":"MDc6TGljZW5zZTI="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"has_pull_requests":true,"pull_request_creation_policy":"all","topics":["agentic-coding","ai-coding","anthropic","claude-code","cli","command-line-tool","cost-reduction","developer-tools","llm","open-source","productivity","rust","token-optimization"],"visibility":"public","forks":1998,"open_issues":676,"watchers":33862,"default_branch":"master","permissions":{"admin":false,"maintain":true,"push":true,"triage":true,"pull":true},"temp_clone_token":"","allow_squash_merge":false,"allow_merge_commit":true,"allow_rebase_merge":true,"allow_auto_merge":false,"delete_branch_on_merge":false,"allow_update_branch":false,"use_squash_pr_title_as_default":false,"squash_merge_commit_message":"COMMIT_MESSAGES","squash_merge_commit_title":"COMMIT_OR_PR_TITLE","merge_commit_message":"PR_TITLE","merge_commit_title":"MERGE_MESSAGE","custom_properties":{},"organization":{"login":"rtk-ai","id":258253854,"node_id":"O_kgDOD2SkHg","avatar_url":"https://avatars.githubusercontent.com/u/258253854?v=4","gravatar_id":"","url":"https://api.github.com/users/rtk-ai","html_url":"https://github.com/rtk-ai","followers_url":"https://api.github.com/users/rtk-ai/followers","following_url":"https://api.github.com/users/rtk-ai/following{/other_user}","gists_url":"https://api.github.com/users/rtk-ai/gists{/gist_id}","starred_url":"https://api.github.com/users/rtk-ai/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/rtk-ai/subscriptions","organizations_url":"https://api.github.com/users/rtk-ai/orgs","repos_url":"https://api.github.com/users/rtk-ai/repos","events_url":"https://api.github.com/users/rtk-ai/events{/privacy}","received_events_url":"https://api.github.com/users/rtk-ai/received_events","type":"Organization","user_view_type":"public","site_admin":false},"network_count":1998,"subscribers_count":86}
\ No newline at end of file
diff --git a/tests/fixtures/near_duplicate_a.txt b/tests/fixtures/near_duplicate_a.txt
new file mode 100644
index 000000000..8c31b3482
--- /dev/null
+++ b/tests/fixtures/near_duplicate_a.txt
@@ -0,0 +1,12 @@
+use anyhow::{Context, Result};
+use std::path::Path;
+
+pub fn read_config(path: &Path) -> Result<String> {
+    std::fs::read_to_string(path)
+        .with_context(|| format!("Failed to read {}", path.display()))
+}
+
+pub fn write_config(path: &Path, content: &str) -> Result<()> {
+    std::fs::write(path, content)
+        .with_context(|| format!("Failed to write {}", path.display()))
+}
diff --git a/tests/fixtures/near_duplicate_b.txt b/tests/fixtures/near_duplicate_b.txt
new file mode 100644
index 000000000..532b35a54
--- /dev/null
+++ b/tests/fixtures/near_duplicate_b.txt
@@ -0,0 +1,12 @@
+use anyhow::{Context, Result};
+use std::path::Path;
+
+pub fn load_config(path: &Path) -> Result<String> {
+    std::fs::read_to_string(path)
+        .with_context(|| format!("Cannot read {}", path.display()))
+}
+
+pub fn write_config(path: &Path, content: &str) -> Result<()> {
+    std::fs::write(path, content)
+        .with_context(|| format!("Failed to write {}", path.display()))
+}