From 8b557efbdd2c93a0c65d6bdbcabf498804066e37 Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 21:16:49 +0900
Subject: [PATCH 1/5] feat: improve KanjiVariantRewriter with subsplit and
 reading-scan phases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

KanjiVariantRewriter now handles three cases:
- Phase 1a: exact 2-char hiragana segment → kanji replacement
- Phase 1b: 3+ char hiragana segment → subsplit into 2-char kanji prefix + remainder
- Phase 2: single-segment hiragana path → reading-scan for kanji sub-spans

This fixes "しておいた方が" not appearing in candidates when the only
hiragana path was single-segment (from HiraganaVariantRewriter) and the
multi-segment paths had compound segments that didn't expose ほう.

Also reduces non_independent_kanji_penalty from 3000 to 1500, extracts
rewriter tests to converter/tests/rewriter.rs (430 → 430+969 lines),
and adds engine settings TOML to mise.toml sources.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../crates/lex-core/src/converter/rewriter.rs | 1057 ++++-------------
 .../lex-core/src/converter/tests/mod.rs       |    1 +
 .../lex-core/src/converter/tests/rewriter.rs  |  969 +++++++++++++++
 .../crates/lex-core/src/default_settings.toml |    2 +-
 engine/crates/lex-core/src/settings.rs        |   14 +-
 mise.toml                                     |    2 +-
 6 files changed, 1194 insertions(+), 851 deletions(-)
 create mode 100644 engine/crates/lex-core/src/converter/tests/rewriter.rs
diff --git a/engine/crates/lex-core/src/converter/rewriter.rs b/engine/crates/lex-core/src/converter/rewriter.rs
index a212b41..2f50295 100644
--- a/engine/crates/lex-core/src/converter/rewriter.rs
+++ b/engine/crates/lex-core/src/converter/rewriter.rs
@@ -148,9 +148,10 @@ pub(crate) struct KanjiVariantRewriter<'a> {
 const MAX_KANJI_PER_SEGMENT: usize = 3;
 
 impl Rewriter for KanjiVariantRewriter<'_> {
-    fn generate(&self, paths: &[ScoredPath], _reading: &str) -> Vec<ScoredPath> {
+    fn generate(&self, paths: &[ScoredPath], reading: &str) -> Vec<ScoredPath> {
         let mut new_paths = Vec::new();
 
+        // Phase 1: Segment-based replacement on multi-segment paths.
         // Consider up to 5 eligible source paths (with more than one segment),
         // so that single-segment candidates added by earlier rewriters do not
         // consume this rewriter's processing budget.
@@ -163,48 +164,231 @@ impl Rewriter for KanjiVariantRewriter<'_> {
                 let seg_end = char_pos + seg_char_len;
                 char_pos = seg_end;
 
-                // Only process 2-char hiragana segments. Single-char segments
-                // are skipped because they are almost always function morphemes
-                // (し, た, な, が) where kanji replacements would be incorrect.
-                // Segments of 3+ chars are skipped because they often come from
-                // resegmentation with incorrect morpheme boundaries
-                // (e.g. たほう → 他方).
-                if seg_char_len != 2
-                    || seg.surface != seg.reading
-                    || !seg.surface.chars().all(is_hiragana)
-                {
+                // Skip non-hiragana or already-kanji segments
+                if seg.surface != seg.reading || !seg.surface.chars().all(is_hiragana) {
                     continue;
                 }
 
-                // Find kanji nodes at the same [start, end) span in the lattice
-                let node_indices = match self.lattice.nodes_by_start.get(seg_start) {
-                    Some(indices) => indices,
-                    None => continue,
-                };
-
-                let mut kanji_nodes: Vec<_> = node_indices
-                    .iter()
-                    .map(|&idx| &self.lattice.nodes[idx])
-                    .filter(|node| node.end == seg_end && node.surface.chars().any(is_kanji))
-                    .collect();
-                kanji_nodes.sort_by_key(|n| n.cost);
-                kanji_nodes.truncate(MAX_KANJI_PER_SEGMENT);
+                if seg_char_len == 1 {
+                    // Single-char segments are almost always function morphemes
+                    // (し, た, な, が) where kanji replacements would be incorrect.
+                    continue;
+                }
 
-                for node in kanji_nodes {
-                    let mut new_segments = path.segments.clone();
-                    new_segments[seg_idx] = super::viterbi::RichSegment::from(node);
-                    new_paths.push(ScoredPath {
-                        segments: new_segments,
-                        viterbi_cost: path.viterbi_cost.saturating_add(2000),
-                    });
+                if seg_char_len == 2 {
+                    // Exact match: find kanji nodes at the same [start, end) span
+                    self.kanji_variants_exact(path, seg_idx, seg_start, seg_end, &mut new_paths);
+                } else {
+                    // 3+ char hiragana segment (e.g. ほうが): try splitting into
+                    // a 2-char kanji prefix + hiragana remainder. This handles
+                    // cases where the Viterbi chose a compound segment (cheaper
+                    // due to no connection cost) but we want the kanji sub-form
+                    // (e.g. ほうが → 方+が).
+                    self.kanji_variants_subsplit(path, seg_idx, seg_start, seg_end, &mut new_paths);
                 }
             }
         }
 
+        // Phase 2: Reading-scan for single-segment hiragana paths.
+        // When HiraganaVariantRewriter produces a single-segment all-hiragana
+        // path, the segment-based approach above can't find kanji sub-spans.
+        // Scan the reading directly to find 2-char kanji alternatives in the
+        // lattice and build 3-segment variants (prefix + kanji + suffix).
+        if let Some(base) = paths.iter().find(|p| {
+            p.segments.len() == 1
+                && p.segments[0].surface == p.segments[0].reading
+                && p.segments[0].surface.chars().all(is_hiragana)
+        }) {
+            self.kanji_variants_from_reading(reading, base.viterbi_cost, &mut new_paths);
+        }
+
         new_paths
     }
 }
 
+impl KanjiVariantRewriter<'_> {
+    /// Replace a 2-char hiragana segment with kanji alternatives from the lattice.
+    fn kanji_variants_exact(
+        &self,
+        path: &ScoredPath,
+        seg_idx: usize,
+        seg_start: usize,
+        seg_end: usize,
+        new_paths: &mut Vec<ScoredPath>,
+    ) {
+        let node_indices = match self.lattice.nodes_by_start.get(seg_start) {
+            Some(indices) => indices,
+            None => return,
+        };
+
+        let mut kanji_nodes: Vec<_> = node_indices
+            .iter()
+            .map(|&idx| &self.lattice.nodes[idx])
+            .filter(|node| node.end == seg_end && node.surface.chars().any(is_kanji))
+            .collect();
+        kanji_nodes.sort_by_key(|n| n.cost);
+        kanji_nodes.truncate(MAX_KANJI_PER_SEGMENT);
+
+        for node in kanji_nodes {
+            let mut new_segments = path.segments.clone();
+            new_segments[seg_idx] = super::viterbi::RichSegment::from(node);
+            new_paths.push(ScoredPath {
+                segments: new_segments,
+                viterbi_cost: path.viterbi_cost.saturating_add(2000),
+            });
+        }
+    }
+
+    /// For a 3+ char hiragana segment, try splitting at each internal boundary
+    /// to find a 2-char kanji prefix with a hiragana remainder.
+    ///
+    /// Example: segment "ほうが" [5,8) → split at 7 → kanji "方" [5,7) + "が" [7,8)
+    fn kanji_variants_subsplit(
+        &self,
+        path: &ScoredPath,
+        seg_idx: usize,
+        seg_start: usize,
+        seg_end: usize,
+        new_paths: &mut Vec<ScoredPath>,
+    ) {
+        // Try each internal split point
+        for mid in (seg_start + 2)..seg_end {
+            let left_len = mid - seg_start;
+            // Only consider 2-char kanji prefixes to avoid incorrect boundaries
+            // (e.g. たほう → 他方)
+            if left_len != 2 {
+                continue;
+            }
+
+            // Find kanji nodes for the left part [seg_start, mid)
+            let left_indices = match self.lattice.nodes_by_start.get(seg_start) {
+                Some(indices) => indices,
+                None => continue,
+            };
+            let mut kanji_nodes: Vec<_> = left_indices
+                .iter()
+                .map(|&idx| &self.lattice.nodes[idx])
+                .filter(|node| node.end == mid && node.surface.chars().any(is_kanji))
+                .collect();
+            kanji_nodes.sort_by_key(|n| n.cost);
+            kanji_nodes.truncate(MAX_KANJI_PER_SEGMENT);
+
+            if kanji_nodes.is_empty() {
+                continue;
+            }
+
+            // Find a hiragana node for the right part [mid, seg_end)
+            let right_indices = match self.lattice.nodes_by_start.get(mid) {
+                Some(indices) => indices,
+                None => continue,
+            };
+            // Pick the lowest-cost hiragana node for the remainder
+            let right_node = right_indices
+                .iter()
+                .map(|&idx| &self.lattice.nodes[idx])
+                .filter(|node| {
+                    node.end == seg_end
+                        && node.surface == node.reading
+                        && node.surface.chars().all(is_hiragana)
+                })
+                .min_by_key(|n| n.cost);
+            let Some(right_node) = right_node else {
+                continue;
+            };
+
+            for kanji_node in kanji_nodes {
+                let mut new_segments = path.segments.clone();
+                let right_seg = super::viterbi::RichSegment::from(right_node);
+                new_segments[seg_idx] = super::viterbi::RichSegment::from(kanji_node);
+                new_segments.insert(seg_idx + 1, right_seg);
+                new_paths.push(ScoredPath {
+                    segments: new_segments,
+                    viterbi_cost: path.viterbi_cost.saturating_add(2000),
+                });
+            }
+        }
+    }
+
+    /// Scan the full reading for 2-char positions that have kanji alternatives
+    /// in the lattice, and build 3-segment variants (hiragana prefix + kanji + hiragana suffix).
+    ///
+    /// This handles cases where the only hiragana path is single-segment
+    /// (from HiraganaVariantRewriter) and the multi-segment paths all have
+    /// kanji/compound segments that don't expose the 2-char hiragana sub-span.
+    ///
+    /// Example: reading "しておいたほうが" → finds 方 at [5,7) →
+    /// builds "しておいた" + "方" + "が"
+    fn kanji_variants_from_reading(
+        &self,
+        reading: &str,
+        base_cost: i64,
+        new_paths: &mut Vec<ScoredPath>,
+    ) {
+        let char_count = reading.chars().count();
+        if char_count < 3 {
+            return;
+        }
+
+        for pos in 0..char_count.saturating_sub(1) {
+            let end = pos + 2;
+            if end > char_count {
+                break;
+            }
+
+            // Skip positions at the start or end (single-char prefix/suffix
+            // would be a function morpheme — let segment-based rewriter handle those)
+            if pos == 0 || end == char_count {
+                continue;
+            }
+
+            let node_indices = match self.lattice.nodes_by_start.get(pos) {
+                Some(indices) => indices,
+                None => continue,
+            };
+
+            let mut kanji_nodes: Vec<_> = node_indices
+                .iter()
+                .map(|&idx| &self.lattice.nodes[idx])
+                .filter(|node| node.end == end && node.surface.chars().any(is_kanji))
+                .collect();
+            kanji_nodes.sort_by_key(|n| n.cost);
+            kanji_nodes.truncate(MAX_KANJI_PER_SEGMENT);
+
+            if kanji_nodes.is_empty() {
+                continue;
+            }
+
+            // Build prefix reading [0, pos) and suffix reading [end, char_count)
+            let prefix_reading: String = reading.chars().take(pos).collect();
+            let suffix_reading: String = reading.chars().skip(end).collect();
+
+            for node in kanji_nodes {
+                let segments = vec![
+                    super::viterbi::RichSegment {
+                        reading: prefix_reading.clone(),
+                        surface: prefix_reading.clone(),
+                        left_id: 0,
+                        right_id: 0,
+                        word_cost: 0,
+                    },
+                    super::viterbi::RichSegment::from(node),
+                    super::viterbi::RichSegment {
+                        reading: suffix_reading.clone(),
+                        surface: suffix_reading.clone(),
+                        left_id: 0,
+                        right_id: 0,
+                        word_cost: 0,
+                    },
+                ];
+                new_paths.push(ScoredPath {
+                    segments,
+                    viterbi_cost: base_cost.saturating_add(2000),
+                });
+            }
+        }
+    }
+}
+
 /// Adds numeric candidates (half-width and full-width) when the reading is a
 /// Japanese number expression.
 pub(crate) struct NumericRewriter;
@@ -244,814 +428,3 @@ impl Rewriter for NumericRewriter {
         candidates
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::super::viterbi::RichSegment;
-    use super::*;
-
-    #[test]
-    fn test_katakana_rewriter_generates_candidate() {
-        let rw = KatakanaRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "きょう".into(),
-                surface: "今日".into(),
-                left_id: 10,
-                right_id: 10,
-                word_cost: 0,
-            }],
-            viterbi_cost: 3000,
-        }];
-
-        let result = rw.generate(&paths, "きょう");
-
-        assert_eq!(result.len(), 1);
-        assert_eq!(result[0].surface_key(), "キョウ");
-        assert_eq!(result[0].viterbi_cost, 3000 + 10000);
-    }
-
-    #[test]
-    fn test_katakana_dedup_via_run_rewriters() {
-        let rw = KatakanaRewriter;
-        let mut paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "きょう".into(),
-                surface: "キョウ".into(),
-                left_id: 0,
-                right_id: 0,
-                word_cost: 0,
-            }],
-            viterbi_cost: 5000,
-        }];
-
-        run_rewriters(&[&rw], &mut paths, "きょう");
-
-        assert_eq!(
-            paths.len(),
-            1,
-            "should not add duplicate katakana candidate"
-        );
-    }
-
-    #[test]
-    fn test_katakana_rewriter_empty_paths() {
-        let rw = KatakanaRewriter;
-        let paths: Vec<ScoredPath> = Vec::new();
-
-        let result = rw.generate(&paths, "てすと");
-
-        assert_eq!(result.len(), 1);
-        assert_eq!(result[0].surface_key(), "テスト");
-        assert_eq!(result[0].viterbi_cost, 10000);
-    }
-
-    #[test]
-    fn test_run_rewriters_applies_all() {
-        let rw = KatakanaRewriter;
-        let mut paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "あ".into(),
-                surface: "亜".into(),
-                left_id: 0,
-                right_id: 0,
-                word_cost: 0,
-            }],
-            viterbi_cost: 1000,
-        }];
-
-        run_rewriters(&[&rw], &mut paths, "あ");
-
-        assert_eq!(paths.len(), 2);
-        // Katakana has higher cost, so inserted after 亜
-        assert_eq!(paths[0].surface_key(), "亜");
-        assert_eq!(paths[1].surface_key(), "ア");
-    }
-
-    #[test]
-    fn test_run_rewriters_dedup_across_rewriters() {
-        // HiraganaVariant and PartialHiragana could produce the same surface;
-        // run_rewriters should keep only the first one.
-        let hiragana_rw = HiraganaVariantRewriter;
-        let partial_rw = PartialHiraganaRewriter;
-        let mut paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "され".into(),
-                    surface: "去れ".into(),
-                    left_id: 10,
-                    right_id: 10,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "ます".into(),
-                    surface: "ます".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 1000,
-        }];
-
-        run_rewriters(&[&hiragana_rw, &partial_rw], &mut paths, "されます");
-
-        // Both would generate "されます", but only one copy should exist
-        let count = paths
-            .iter()
-            .filter(|p| p.surface_key() == "されます")
-            .count();
-        assert_eq!(count, 1, "dedup should prevent duplicate across rewriters");
-    }
-
-    #[test]
-    fn test_run_rewriters_cost_ordered_insertion() {
-        // Compound kanji (best_cost) should be inserted at position 0
-        let rw = NumericRewriter;
-        let mut paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "にじゅうさん".into(),
-                surface: "に十三".into(),
-                left_id: 10,
-                right_id: 10,
-                word_cost: 0,
-            }],
-            viterbi_cost: 3000,
-        }];
-
-        run_rewriters(&[&rw], &mut paths, "にじゅうさん");
-
-        assert_eq!(paths[0].surface_key(), "二十三");
-        assert_eq!(paths[0].viterbi_cost, 3000); // best_cost = 3000
-        assert_eq!(paths[1].surface_key(), "に十三");
-    }
-
-    #[test]
-    fn test_numeric_rewriter_generates_candidates() {
-        let rw = NumericRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "にじゅうさん".into(),
-                surface: "に十三".into(),
-                left_id: 10,
-                right_id: 10,
-                word_cost: 0,
-            }],
-            viterbi_cost: 3000,
-        }];
-
-        let result = rw.generate(&paths, "にじゅうさん");
-
-        assert_eq!(result.len(), 3);
-        assert_eq!(result[0].surface_key(), "二十三");
-        assert_eq!(result[0].viterbi_cost, 3000); // compound → best_cost
-        assert_eq!(result[1].surface_key(), "23");
-        assert_eq!(result[1].viterbi_cost, 3000 + 5000);
-        assert_eq!(result[2].surface_key(), "２３");
-        assert_eq!(result[2].viterbi_cost, 3000 + 5001);
-    }
-
-    #[test]
-    fn test_numeric_rewriter_kanji_duplicate_skip() {
-        let rw = NumericRewriter;
-        let mut paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "にじゅうさん".into(),
-                surface: "二十三".into(),
-                left_id: 10,
-                right_id: 10,
-                word_cost: 0,
-            }],
-            viterbi_cost: 3000,
-        }];
-
-        run_rewriters(&[&rw], &mut paths, "にじゅうさん");
-
-        // Kanji already exists, only halfwidth + fullwidth added
-        assert_eq!(paths.len(), 3);
-        assert_eq!(paths[0].surface_key(), "二十三");
-        assert_eq!(paths[1].surface_key(), "23");
-        assert_eq!(paths[2].surface_key(), "２３");
-    }
-
-    #[test]
-    fn test_numeric_rewriter_single_char_kanji_low_priority() {
-        let rw = NumericRewriter;
-        let mut paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "じゅう".into(),
-                surface: "中".into(),
-                left_id: 10,
-                right_id: 10,
-                word_cost: 0,
-            }],
-            viterbi_cost: 3000,
-        }];
-
-        run_rewriters(&[&rw], &mut paths, "じゅう");
-
-        // 十 is single-char → base_cost (not best_cost), all after 中
-        assert_eq!(paths[0].surface_key(), "中");
-        let kanji = paths.iter().find(|p| p.surface_key() == "十").unwrap();
-        assert_eq!(kanji.viterbi_cost, 3000 + 5000);
-    }
-
-    #[test]
-    fn test_numeric_rewriter_skips_non_numeric() {
-        let rw = NumericRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "きょう".into(),
-                surface: "今日".into(),
-                left_id: 0,
-                right_id: 0,
-                word_cost: 0,
-            }],
-            viterbi_cost: 1000,
-        }];
-
-        let result = rw.generate(&paths, "きょう");
-
-        assert!(
-            result.is_empty(),
-            "should not generate numeric candidates for non-numeric input"
-        );
-    }
-
-    #[test]
-    fn test_numeric_rewriter_skips_duplicate() {
-        let rw = NumericRewriter;
-        let mut paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "いち".into(),
-                surface: "1".into(),
-                left_id: 0,
-                right_id: 0,
-                word_cost: 0,
-            }],
-            viterbi_cost: 1000,
-        }];
-
-        run_rewriters(&[&rw], &mut paths, "いち");
-
-        // Half-width "1" already exists; kanji "一" (single-char) + full-width "１" added
-        assert_eq!(paths.len(), 3);
-        // All have high cost, so they come after "1"
-        assert_eq!(paths[0].surface_key(), "1");
-        assert!(paths.iter().any(|p| p.surface_key() == "一"));
-        assert!(paths.iter().any(|p| p.surface_key() == "１"));
-    }
-
-    #[test]
-    fn test_hiragana_variant_replaces_kanji() {
-        let rw = HiraganaVariantRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "りだいれくと".into(),
-                    surface: "リダイレクト".into(),
-                    left_id: 10,
-                    right_id: 10,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "され".into(),
-                    surface: "去れ".into(),
-                    left_id: 20,
-                    right_id: 20,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "ます".into(),
-                    surface: "ます".into(),
-                    left_id: 30,
-                    right_id: 30,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "か".into(),
-                    surface: "化".into(),
-                    left_id: 40,
-                    right_id: 40,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 3000,
-        }];
-
-        let result = rw.generate(&paths, "りだいれくとされますか");
-
-        assert_eq!(result.len(), 1);
-        assert_eq!(result[0].surface_key(), "リダイレクトされますか");
-        assert_eq!(result[0].viterbi_cost, 3000 + 5000);
-    }
-
-    #[test]
-    fn test_hiragana_variant_skips_all_hiragana() {
-        let rw = HiraganaVariantRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "され".into(),
-                    surface: "され".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "ます".into(),
-                    surface: "ます".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 1000,
-        }];
-
-        let result = rw.generate(&paths, "されます");
-
-        assert!(
-            result.is_empty(),
-            "should not add variant when all segments are already hiragana"
-        );
-    }
-
-    #[test]
-    fn test_hiragana_variant_dedup_via_run_rewriters() {
-        let rw = HiraganaVariantRewriter;
-        let mut paths = vec![
-            ScoredPath {
-                segments: vec![RichSegment {
-                    reading: "され".into(),
-                    surface: "去れ".into(),
-                    left_id: 10,
-                    right_id: 10,
-                    word_cost: 0,
-                }],
-                viterbi_cost: 3000,
-            },
-            ScoredPath {
-                segments: vec![RichSegment {
-                    reading: "され".into(),
-                    surface: "され".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                }],
-                viterbi_cost: 4000,
-            },
-        ];
-
-        run_rewriters(&[&rw], &mut paths, "され");
-
-        assert_eq!(paths.len(), 2, "should not add duplicate hiragana variant");
-    }
-
-    #[test]
-    fn test_hiragana_variant_keeps_katakana() {
-        let rw = HiraganaVariantRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "てすと".into(),
-                    surface: "テスト".into(),
-                    left_id: 10,
-                    right_id: 10,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "ちゅう".into(),
-                    surface: "中".into(),
-                    left_id: 20,
-                    right_id: 20,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 2000,
-        }];
-
-        let result = rw.generate(&paths, "てすとちゅう");
-
-        assert_eq!(result.len(), 1);
-        assert_eq!(result[0].surface_key(), "テストちゅう");
-    }
-
-    // ── PartialHiraganaRewriter tests ──────────────────────────────
-
-    #[test]
-    fn test_partial_hiragana_basic() {
-        let rw = PartialHiraganaRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "した".into(),
-                    surface: "下".into(),
-                    left_id: 10,
-                    right_id: 10,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "ほう".into(),
-                    surface: "方".into(),
-                    left_id: 20,
-                    right_id: 20,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 3000,
-        }];
-
-        let result = rw.generate(&paths, "したほう");
-
-        // Should produce 2 variants: した|方 and 下|ほう
-        assert_eq!(result.len(), 2);
-        assert!(result.iter().any(|p| p.surface_key() == "した方"));
-        assert!(result.iter().any(|p| p.surface_key() == "下ほう"));
-        assert!(result.iter().all(|p| p.viterbi_cost == 5000));
-    }
-
-    #[test]
-    fn test_partial_hiragana_multiple_kanji() {
-        let rw = PartialHiraganaRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "した".into(),
-                    surface: "舌".into(),
-                    left_id: 10,
-                    right_id: 10,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "ほう".into(),
-                    surface: "法".into(),
-                    left_id: 20,
-                    right_id: 20,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "が".into(),
-                    surface: "が".into(),
-                    left_id: 30,
-                    right_id: 30,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 1000,
-        }];
-
-        let result = rw.generate(&paths, "したほうが");
-
-        // Two kanji segments → 2 variants: した|法|が and 舌|ほう|が
-        assert_eq!(result.len(), 2);
-        assert!(result.iter().any(|p| p.surface_key() == "した法が"));
-        assert!(result.iter().any(|p| p.surface_key() == "舌ほうが"));
-    }
-
-    #[test]
-    fn test_partial_hiragana_dedup_via_run_rewriters() {
-        let rw = PartialHiraganaRewriter;
-        let mut paths = vec![
-            ScoredPath {
-                segments: vec![
-                    RichSegment {
-                        reading: "した".into(),
-                        surface: "下".into(),
-                        left_id: 10,
-                        right_id: 10,
-                        word_cost: 0,
-                    },
-                    RichSegment {
-                        reading: "ほう".into(),
-                        surface: "方".into(),
-                        left_id: 20,
-                        right_id: 20,
-                        word_cost: 0,
-                    },
-                ],
-                viterbi_cost: 3000,
-            },
-            // This path already has the surface "した方"
-            ScoredPath {
-                segments: vec![
-                    RichSegment {
-                        reading: "した".into(),
-                        surface: "した".into(),
-                        left_id: 0,
-                        right_id: 0,
-                        word_cost: 0,
-                    },
-                    RichSegment {
-                        reading: "ほう".into(),
-                        surface: "方".into(),
-                        left_id: 20,
-                        right_id: 20,
-                        word_cost: 0,
-                    },
-                ],
-                viterbi_cost: 5000,
-            },
-        ];
-
-        run_rewriters(&[&rw], &mut paths, "したほう");
-
-        // "した方" already exists in paths, should not be duplicated
-        let count = paths.iter().filter(|p| p.surface_key() == "した方").count();
-        assert_eq!(count, 1, "should not add duplicate した方");
-    }
-
-    #[test]
-    fn test_partial_hiragana_all_hiragana_no_variants() {
-        let rw = PartialHiraganaRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "した".into(),
-                    surface: "した".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "ほう".into(),
-                    surface: "ほう".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 1000,
-        }];
-
-        let result = rw.generate(&paths, "したほう");
-
-        assert!(
-            result.is_empty(),
-            "all-hiragana path should produce no variants"
-        );
-    }
-
-    #[test]
-    fn test_partial_hiragana_keeps_katakana() {
-        let rw = PartialHiraganaRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "てすと".into(),
-                    surface: "テスト".into(),
-                    left_id: 10,
-                    right_id: 10,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "ちゅう".into(),
-                    surface: "中".into(),
-                    left_id: 20,
-                    right_id: 20,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 2000,
-        }];
-
-        let result = rw.generate(&paths, "てすとちゅう");
-
-        // Only 中→ちゅう variant, katakana テスト should NOT be replaced
-        assert_eq!(result.len(), 1);
-        assert_eq!(result[0].surface_key(), "テストちゅう");
-    }
-
-    #[test]
-    fn test_partial_hiragana_single_segment_skip() {
-        let rw = PartialHiraganaRewriter;
-        let paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "した".into(),
-                surface: "下".into(),
-                left_id: 10,
-                right_id: 10,
-                word_cost: 0,
-            }],
-            viterbi_cost: 1000,
-        }];
-
-        let result = rw.generate(&paths, "した");
-
-        assert!(result.is_empty(), "single-segment path should be skipped");
-    }
-
-    // ── KanjiVariantRewriter tests ────────────────────────────────────
-
-    fn make_lattice(input: &str, nodes: Vec<super::super::lattice::LatticeNode>) -> Lattice {
-        let char_count = input.chars().count();
-        let mut nodes_by_start: Vec<Vec<usize>> = vec![Vec::new(); char_count];
-        let mut nodes_by_end: Vec<Vec<usize>> = vec![Vec::new(); char_count + 1];
-        for (i, node) in nodes.iter().enumerate() {
-            nodes_by_start[node.start].push(i);
-            nodes_by_end[node.end].push(i);
-        }
-        Lattice {
-            input: input.to_string(),
-            nodes,
-            nodes_by_start,
-            nodes_by_end,
-            char_count,
-        }
-    }
-
-    fn lattice_node(
-        start: usize,
-        end: usize,
-        reading: &str,
-        surface: &str,
-        cost: i16,
-    ) -> super::super::lattice::LatticeNode {
-        super::super::lattice::LatticeNode {
-            start,
-            end,
-            reading: reading.into(),
-            surface: surface.into(),
-            cost,
-            left_id: 0,
-            right_id: 0,
-        }
-    }
-
-    #[test]
-    fn test_kanji_variant_replaces_2char_hiragana() {
-        // Lattice has ほう → 方 (cost=733) at position [3,5)
-        let lattice = make_lattice(
-            "あったほうが",
-            vec![
-                lattice_node(3, 5, "ほう", "ほう", 0),
-                lattice_node(3, 5, "ほう", "方", 733),
-                lattice_node(3, 5, "ほう", "法", 2181),
-            ],
-        );
-        let rw = KanjiVariantRewriter { lattice: &lattice };
-
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "あっ".into(),
-                    surface: "あっ".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "た".into(),
-                    surface: "た".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "ほう".into(),
-                    surface: "ほう".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "が".into(),
-                    surface: "が".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 20000,
-        }];
-
-        let result = rw.generate(&paths, "あったほうが");
-
-        // Should produce variants for 方 and 法 (top 3, but only 2 kanji available)
-        assert_eq!(result.len(), 2);
-        assert!(result.iter().any(|p| p.surface_key() == "あった方が"));
-        assert!(result.iter().any(|p| p.surface_key() == "あった法が"));
-        // All variants should have +2000 penalty
-        assert!(result.iter().all(|p| p.viterbi_cost == 22000));
-    }
-
-    #[test]
-    fn test_kanji_variant_skips_single_char() {
-        // Single-char hiragana (し) should NOT be replaced
-        let lattice = make_lattice("した", vec![lattice_node(0, 1, "し", "死", 500)]);
-        let rw = KanjiVariantRewriter { lattice: &lattice };
-
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "し".into(),
-                    surface: "し".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "た".into(),
-                    surface: "た".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 1000,
-        }];
-
-        let result = rw.generate(&paths, "した");
-
-        assert!(result.is_empty(), "single-char hiragana should be skipped");
-    }
-
-    #[test]
-    fn test_kanji_variant_skips_single_segment() {
-        let lattice = make_lattice("ほう", vec![lattice_node(0, 2, "ほう", "方", 733)]);
-        let rw = KanjiVariantRewriter { lattice: &lattice };
-
-        let paths = vec![ScoredPath {
-            segments: vec![RichSegment {
-                reading: "ほう".into(),
-                surface: "ほう".into(),
-                left_id: 0,
-                right_id: 0,
-                word_cost: 0,
-            }],
-            viterbi_cost: 1000,
-        }];
-
-        let result = rw.generate(&paths, "ほう");
-
-        assert!(result.is_empty(), "single-segment path should be skipped");
-    }
-
-    #[test]
-    fn test_kanji_variant_skips_kanji_segments() {
-        // Segments already containing kanji should not be processed
-        let lattice = make_lattice("したほう", vec![lattice_node(2, 4, "ほう", "方", 733)]);
-        let rw = KanjiVariantRewriter { lattice: &lattice };
-
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "した".into(),
-                    surface: "下".into(), // kanji — should skip
-                    left_id: 10,
-                    right_id: 10,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "ほう".into(),
-                    surface: "方".into(), // kanji — should skip
-                    left_id: 20,
-                    right_id: 20,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 3000,
-        }];
-
-        let result = rw.generate(&paths, "したほう");
-
-        assert!(
-            result.is_empty(),
-            "kanji segments should not produce variants"
-        );
-    }
-
-    #[test]
-    fn test_kanji_variant_skips_3char_segments() {
-        // 3-char hiragana segments should not be replaced
-        let lattice = make_lattice("たほうが", vec![lattice_node(0, 3, "たほう", "他方", 5290)]);
-        let rw = KanjiVariantRewriter { lattice: &lattice };
-
-        let paths = vec![ScoredPath {
-            segments: vec![
-                RichSegment {
-                    reading: "たほう".into(),
-                    surface: "たほう".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-                RichSegment {
-                    reading: "が".into(),
-                    surface: "が".into(),
-                    left_id: 0,
-                    right_id: 0,
-                    word_cost: 0,
-                },
-            ],
-            viterbi_cost: 5000,
-        }];
-
-        let result = rw.generate(&paths, "たほうが");
-
-        assert!(
-            result.is_empty(),
-            "3-char hiragana segment should be skipped"
-        );
-    }
-}
diff --git a/engine/crates/lex-core/src/converter/tests/mod.rs b/engine/crates/lex-core/src/converter/tests/mod.rs
index 55df4d6..77d5abf 100644
--- a/engine/crates/lex-core/src/converter/tests/mod.rs
+++ b/engine/crates/lex-core/src/converter/tests/mod.rs
@@ -6,3 +6,4 @@ mod grouping;
 mod history;
 mod nbest;
 mod reranker;
+mod rewriter;
diff --git a/engine/crates/lex-core/src/converter/tests/rewriter.rs b/engine/crates/lex-core/src/converter/tests/rewriter.rs
new file mode 100644
index 0000000..9718b0d
--- /dev/null
+++ b/engine/crates/lex-core/src/converter/tests/rewriter.rs
@@ -0,0 +1,969 @@
+use crate::converter::lattice::{Lattice, LatticeNode};
+use crate::converter::rewriter::{
+    run_rewriters, HiraganaVariantRewriter, KanjiVariantRewriter, KatakanaRewriter,
+    NumericRewriter, PartialHiraganaRewriter, Rewriter,
+};
+use crate::converter::viterbi::{RichSegment, ScoredPath};
+
+#[test]
+fn test_katakana_rewriter_generates_candidate() {
+    let rw = KatakanaRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "きょう".into(),
+            surface: "今日".into(),
+            left_id: 10,
+            right_id: 10,
+            word_cost: 0,
+        }],
+        viterbi_cost: 3000,
+    }];
+
+    let result = rw.generate(&paths, "きょう");
+
+    assert_eq!(result.len(), 1);
+    assert_eq!(result[0].surface_key(), "キョウ");
+    assert_eq!(result[0].viterbi_cost, 3000 + 10000);
+}
+
+#[test]
+fn test_katakana_dedup_via_run_rewriters() {
+    let rw = KatakanaRewriter;
+    let mut paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "きょう".into(),
+            surface: "キョウ".into(),
+            left_id: 0,
+            right_id: 0,
+            word_cost: 0,
+        }],
+        viterbi_cost: 5000,
+    }];
+
+    run_rewriters(&[&rw], &mut paths, "きょう");
+
+    assert_eq!(
+        paths.len(),
+        1,
+        "should not add duplicate katakana candidate"
+    );
+}
+
+#[test]
+fn test_katakana_rewriter_empty_paths() {
+    let rw = KatakanaRewriter;
+    let paths: Vec<ScoredPath> = Vec::new();
+
+    let result = rw.generate(&paths, "てすと");
+
+    assert_eq!(result.len(), 1);
+    assert_eq!(result[0].surface_key(), "テスト");
+    assert_eq!(result[0].viterbi_cost, 10000);
+}
+
+#[test]
+fn test_run_rewriters_applies_all() {
+    let rw = KatakanaRewriter;
+    let mut paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "あ".into(),
+            surface: "亜".into(),
+            left_id: 0,
+            right_id: 0,
+            word_cost: 0,
+        }],
+        viterbi_cost: 1000,
+    }];
+
+    run_rewriters(&[&rw], &mut paths, "あ");
+
+    assert_eq!(paths.len(), 2);
+    // Katakana has higher cost, so inserted after 亜
+    assert_eq!(paths[0].surface_key(), "亜");
+    assert_eq!(paths[1].surface_key(), "ア");
+}
+
+#[test]
+fn test_run_rewriters_dedup_across_rewriters() {
+    // HiraganaVariant and PartialHiragana could produce the same surface;
+    // run_rewriters should keep only the first one.
+    let hiragana_rw = HiraganaVariantRewriter;
+    let partial_rw = PartialHiraganaRewriter;
+    let mut paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "され".into(),
+                surface: "去れ".into(),
+                left_id: 10,
+                right_id: 10,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ます".into(),
+                surface: "ます".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 1000,
+    }];
+
+    run_rewriters(&[&hiragana_rw, &partial_rw], &mut paths, "されます");
+
+    // Both would generate "されます", but only one copy should exist
+    let count = paths
+        .iter()
+        .filter(|p| p.surface_key() == "されます")
+        .count();
+    assert_eq!(count, 1, "dedup should prevent duplicate across rewriters");
+}
+
+#[test]
+fn test_run_rewriters_cost_ordered_insertion() {
+    // Compound kanji (best_cost) should be inserted at position 0
+    let rw = NumericRewriter;
+    let mut paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "にじゅうさん".into(),
+            surface: "に十三".into(),
+            left_id: 10,
+            right_id: 10,
+            word_cost: 0,
+        }],
+        viterbi_cost: 3000,
+    }];
+
+    run_rewriters(&[&rw], &mut paths, "にじゅうさん");
+
+    assert_eq!(paths[0].surface_key(), "二十三");
+    assert_eq!(paths[0].viterbi_cost, 3000); // best_cost = 3000
+    assert_eq!(paths[1].surface_key(), "に十三");
+}
+
+#[test]
+fn test_numeric_rewriter_generates_candidates() {
+    let rw = NumericRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "にじゅうさん".into(),
+            surface: "に十三".into(),
+            left_id: 10,
+            right_id: 10,
+            word_cost: 0,
+        }],
+        viterbi_cost: 3000,
+    }];
+
+    let result = rw.generate(&paths, "にじゅうさん");
+
+    assert_eq!(result.len(), 3);
+    assert_eq!(result[0].surface_key(), "二十三");
+    assert_eq!(result[0].viterbi_cost, 3000); // compound → best_cost
+    assert_eq!(result[1].surface_key(), "23");
+    assert_eq!(result[1].viterbi_cost, 3000 + 5000);
+    assert_eq!(result[2].surface_key(), "２３");
+    assert_eq!(result[2].viterbi_cost, 3000 + 5001);
+}
+
+#[test]
+fn test_numeric_rewriter_kanji_duplicate_skip() {
+    let rw = NumericRewriter;
+    let mut paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "にじゅうさん".into(),
+            surface: "二十三".into(),
+            left_id: 10,
+            right_id: 10,
+            word_cost: 0,
+        }],
+        viterbi_cost: 3000,
+    }];
+
+    run_rewriters(&[&rw], &mut paths, "にじゅうさん");
+
+    // Kanji already exists, only halfwidth + fullwidth added
+    assert_eq!(paths.len(), 3);
+    assert_eq!(paths[0].surface_key(), "二十三");
+    assert_eq!(paths[1].surface_key(), "23");
+    assert_eq!(paths[2].surface_key(), "２３");
+}
+
+#[test]
+fn test_numeric_rewriter_single_char_kanji_low_priority() {
+    let rw = NumericRewriter;
+    let mut paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "じゅう".into(),
+            surface: "中".into(),
+            left_id: 10,
+            right_id: 10,
+            word_cost: 0,
+        }],
+        viterbi_cost: 3000,
+    }];
+
+    run_rewriters(&[&rw], &mut paths, "じゅう");
+
+    // 十 is single-char → base_cost (not best_cost), all after 中
+    assert_eq!(paths[0].surface_key(), "中");
+    let kanji = paths.iter().find(|p| p.surface_key() == "十").unwrap();
+    assert_eq!(kanji.viterbi_cost, 3000 + 5000);
+}
+
+#[test]
+fn test_numeric_rewriter_skips_non_numeric() {
+    let rw = NumericRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "きょう".into(),
+            surface: "今日".into(),
+            left_id: 0,
+            right_id: 0,
+            word_cost: 0,
+        }],
+        viterbi_cost: 1000,
+    }];
+
+    let result = rw.generate(&paths, "きょう");
+
+    assert!(
+        result.is_empty(),
+        "should not generate numeric candidates for non-numeric input"
+    );
+}
+
+#[test]
+fn test_numeric_rewriter_skips_duplicate() {
+    let rw = NumericRewriter;
+    let mut paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "いち".into(),
+            surface: "1".into(),
+            left_id: 0,
+            right_id: 0,
+            word_cost: 0,
+        }],
+        viterbi_cost: 1000,
+    }];
+
+    run_rewriters(&[&rw], &mut paths, "いち");
+
+    // Half-width "1" already exists; kanji "一" (single-char) + full-width "１" added
+    assert_eq!(paths.len(), 3);
+    // All have high cost, so they come after "1"
+    assert_eq!(paths[0].surface_key(), "1");
+    assert!(paths.iter().any(|p| p.surface_key() == "一"));
+    assert!(paths.iter().any(|p| p.surface_key() == "１"));
+}
+
+#[test]
+fn test_hiragana_variant_replaces_kanji() {
+    let rw = HiraganaVariantRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "りだいれくと".into(),
+                surface: "リダイレクト".into(),
+                left_id: 10,
+                right_id: 10,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "され".into(),
+                surface: "去れ".into(),
+                left_id: 20,
+                right_id: 20,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ます".into(),
+                surface: "ます".into(),
+                left_id: 30,
+                right_id: 30,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "か".into(),
+                surface: "化".into(),
+                left_id: 40,
+                right_id: 40,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 3000,
+    }];
+
+    let result = rw.generate(&paths, "りだいれくとされますか");
+
+    assert_eq!(result.len(), 1);
+    assert_eq!(result[0].surface_key(), "リダイレクトされますか");
+    assert_eq!(result[0].viterbi_cost, 3000 + 5000);
+}
+
+#[test]
+fn test_hiragana_variant_skips_all_hiragana() {
+    let rw = HiraganaVariantRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "され".into(),
+                surface: "され".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ます".into(),
+                surface: "ます".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 1000,
+    }];
+
+    let result = rw.generate(&paths, "されます");
+
+    assert!(
+        result.is_empty(),
+        "should not add variant when all segments are already hiragana"
+    );
+}
+
+#[test]
+fn test_hiragana_variant_dedup_via_run_rewriters() {
+    let rw = HiraganaVariantRewriter;
+    let mut paths = vec![
+        ScoredPath {
+            segments: vec![RichSegment {
+                reading: "され".into(),
+                surface: "去れ".into(),
+                left_id: 10,
+                right_id: 10,
+                word_cost: 0,
+            }],
+            viterbi_cost: 3000,
+        },
+        ScoredPath {
+            segments: vec![RichSegment {
+                reading: "され".into(),
+                surface: "され".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            }],
+            viterbi_cost: 4000,
+        },
+    ];
+
+    run_rewriters(&[&rw], &mut paths, "され");
+
+    assert_eq!(paths.len(), 2, "should not add duplicate hiragana variant");
+}
+
+#[test]
+fn test_hiragana_variant_keeps_katakana() {
+    let rw = HiraganaVariantRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "てすと".into(),
+                surface: "テスト".into(),
+                left_id: 10,
+                right_id: 10,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ちゅう".into(),
+                surface: "中".into(),
+                left_id: 20,
+                right_id: 20,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 2000,
+    }];
+
+    let result = rw.generate(&paths, "てすとちゅう");
+
+    assert_eq!(result.len(), 1);
+    assert_eq!(result[0].surface_key(), "テストちゅう");
+}
+
+// ── PartialHiraganaRewriter tests ──────────────────────────────
+
+#[test]
+fn test_partial_hiragana_basic() {
+    let rw = PartialHiraganaRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "した".into(),
+                surface: "下".into(),
+                left_id: 10,
+                right_id: 10,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ほう".into(),
+                surface: "方".into(),
+                left_id: 20,
+                right_id: 20,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 3000,
+    }];
+
+    let result = rw.generate(&paths, "したほう");
+
+    // Should produce 2 variants: した|方 and 下|ほう
+    assert_eq!(result.len(), 2);
+    assert!(result.iter().any(|p| p.surface_key() == "した方"));
+    assert!(result.iter().any(|p| p.surface_key() == "下ほう"));
+    assert!(result.iter().all(|p| p.viterbi_cost == 5000));
+}
+
+#[test]
+fn test_partial_hiragana_multiple_kanji() {
+    let rw = PartialHiraganaRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "した".into(),
+                surface: "舌".into(),
+                left_id: 10,
+                right_id: 10,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ほう".into(),
+                surface: "法".into(),
+                left_id: 20,
+                right_id: 20,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "が".into(),
+                surface: "が".into(),
+                left_id: 30,
+                right_id: 30,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 1000,
+    }];
+
+    let result = rw.generate(&paths, "したほうが");
+
+    // Two kanji segments → 2 variants: した|法|が and 舌|ほう|が
+    assert_eq!(result.len(), 2);
+    assert!(result.iter().any(|p| p.surface_key() == "した法が"));
+    assert!(result.iter().any(|p| p.surface_key() == "舌ほうが"));
+}
+
+#[test]
+fn test_partial_hiragana_dedup_via_run_rewriters() {
+    let rw = PartialHiraganaRewriter;
+    let mut paths = vec![
+        ScoredPath {
+            segments: vec![
+                RichSegment {
+                    reading: "した".into(),
+                    surface: "下".into(),
+                    left_id: 10,
+                    right_id: 10,
+                    word_cost: 0,
+                },
+                RichSegment {
+                    reading: "ほう".into(),
+                    surface: "方".into(),
+                    left_id: 20,
+                    right_id: 20,
+                    word_cost: 0,
+                },
+            ],
+            viterbi_cost: 3000,
+        },
+        // This path already has the surface "した方"
+        ScoredPath {
+            segments: vec![
+                RichSegment {
+                    reading: "した".into(),
+                    surface: "した".into(),
+                    left_id: 0,
+                    right_id: 0,
+                    word_cost: 0,
+                },
+                RichSegment {
+                    reading: "ほう".into(),
+                    surface: "方".into(),
+                    left_id: 20,
+                    right_id: 20,
+                    word_cost: 0,
+                },
+            ],
+            viterbi_cost: 5000,
+        },
+    ];
+
+    run_rewriters(&[&rw], &mut paths, "したほう");
+
+    // "した方" already exists in paths, should not be duplicated
+    let count = paths.iter().filter(|p| p.surface_key() == "した方").count();
+    assert_eq!(count, 1, "should not add duplicate した方");
+}
+
+#[test]
+fn test_partial_hiragana_all_hiragana_no_variants() {
+    let rw = PartialHiraganaRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "した".into(),
+                surface: "した".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ほう".into(),
+                surface: "ほう".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 1000,
+    }];
+
+    let result = rw.generate(&paths, "したほう");
+
+    assert!(
+        result.is_empty(),
+        "all-hiragana path should produce no variants"
+    );
+}
+
+#[test]
+fn test_partial_hiragana_keeps_katakana() {
+    let rw = PartialHiraganaRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "てすと".into(),
+                surface: "テスト".into(),
+                left_id: 10,
+                right_id: 10,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ちゅう".into(),
+                surface: "中".into(),
+                left_id: 20,
+                right_id: 20,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 2000,
+    }];
+
+    let result = rw.generate(&paths, "てすとちゅう");
+
+    // Only 中→ちゅう variant, katakana テスト should NOT be replaced
+    assert_eq!(result.len(), 1);
+    assert_eq!(result[0].surface_key(), "テストちゅう");
+}
+
+#[test]
+fn test_partial_hiragana_single_segment_skip() {
+    let rw = PartialHiraganaRewriter;
+    let paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "した".into(),
+            surface: "下".into(),
+            left_id: 10,
+            right_id: 10,
+            word_cost: 0,
+        }],
+        viterbi_cost: 1000,
+    }];
+
+    let result = rw.generate(&paths, "した");
+
+    assert!(result.is_empty(), "single-segment path should be skipped");
+}
+
+// ── KanjiVariantRewriter tests ────────────────────────────────────
+
+fn make_lattice(input: &str, nodes: Vec<LatticeNode>) -> Lattice {
+    let char_count = input.chars().count();
+    let mut nodes_by_start: Vec<Vec<usize>> = vec![Vec::new(); char_count];
+    let mut nodes_by_end: Vec<Vec<usize>> = vec![Vec::new(); char_count + 1];
+    for (i, node) in nodes.iter().enumerate() {
+        nodes_by_start[node.start].push(i);
+        nodes_by_end[node.end].push(i);
+    }
+    Lattice {
+        input: input.to_string(),
+        nodes,
+        nodes_by_start,
+        nodes_by_end,
+        char_count,
+    }
+}
+
+fn lattice_node(start: usize, end: usize, reading: &str, surface: &str, cost: i16) -> LatticeNode {
+    LatticeNode {
+        start,
+        end,
+        reading: reading.into(),
+        surface: surface.into(),
+        cost,
+        left_id: 0,
+        right_id: 0,
+    }
+}
+
+#[test]
+fn test_kanji_variant_replaces_2char_hiragana() {
+    // Lattice has ほう → 方 (cost=733) at position [3,5)
+    let lattice = make_lattice(
+        "あったほうが",
+        vec![
+            lattice_node(3, 5, "ほう", "ほう", 0),
+            lattice_node(3, 5, "ほう", "方", 733),
+            lattice_node(3, 5, "ほう", "法", 2181),
+        ],
+    );
+    let rw = KanjiVariantRewriter { lattice: &lattice };
+
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "あっ".into(),
+                surface: "あっ".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "た".into(),
+                surface: "た".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ほう".into(),
+                surface: "ほう".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "が".into(),
+                surface: "が".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 20000,
+    }];
+
+    let result = rw.generate(&paths, "あったほうが");
+
+    // Should produce variants for 方 and 法 (top 3, but only 2 kanji available)
+    assert_eq!(result.len(), 2);
+    assert!(result.iter().any(|p| p.surface_key() == "あった方が"));
+    assert!(result.iter().any(|p| p.surface_key() == "あった法が"));
+    // All variants should have +2000 penalty
+    assert!(result.iter().all(|p| p.viterbi_cost == 22000));
+}
+
+#[test]
+fn test_kanji_variant_skips_single_char() {
+    // Single-char hiragana (し) should NOT be replaced
+    let lattice = make_lattice("した", vec![lattice_node(0, 1, "し", "死", 500)]);
+    let rw = KanjiVariantRewriter { lattice: &lattice };
+
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "し".into(),
+                surface: "し".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "た".into(),
+                surface: "た".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 1000,
+    }];
+
+    let result = rw.generate(&paths, "した");
+
+    assert!(result.is_empty(), "single-char hiragana should be skipped");
+}
+
+#[test]
+fn test_kanji_variant_skips_single_segment() {
+    let lattice = make_lattice("ほう", vec![lattice_node(0, 2, "ほう", "方", 733)]);
+    let rw = KanjiVariantRewriter { lattice: &lattice };
+
+    let paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "ほう".into(),
+            surface: "ほう".into(),
+            left_id: 0,
+            right_id: 0,
+            word_cost: 0,
+        }],
+        viterbi_cost: 1000,
+    }];
+
+    let result = rw.generate(&paths, "ほう");
+
+    assert!(result.is_empty(), "single-segment path should be skipped");
+}
+
+#[test]
+fn test_kanji_variant_skips_kanji_segments() {
+    // Segments already containing kanji should not be processed
+    let lattice = make_lattice("したほう", vec![lattice_node(2, 4, "ほう", "方", 733)]);
+    let rw = KanjiVariantRewriter { lattice: &lattice };
+
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "した".into(),
+                surface: "下".into(), // kanji — should skip
+                left_id: 10,
+                right_id: 10,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ほう".into(),
+                surface: "方".into(), // kanji — should skip
+                left_id: 20,
+                right_id: 20,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 3000,
+    }];
+
+    let result = rw.generate(&paths, "したほう");
+
+    assert!(
+        result.is_empty(),
+        "kanji segments should not produce variants"
+    );
+}
+
+#[test]
+fn test_kanji_variant_skips_3char_segments_no_2char_kanji() {
+    // 3-char hiragana segment "たほう" — lattice has 他方 at [0,3) but
+    // no 2-char kanji split, so subsplit produces nothing.
+    let lattice = make_lattice(
+        "たほうが",
+        vec![
+            lattice_node(0, 3, "たほう", "他方", 5290),
+            // No 2-char kanji at [0,2) ("たほ" has no kanji)
+        ],
+    );
+    let rw = KanjiVariantRewriter { lattice: &lattice };
+
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "たほう".into(),
+                surface: "たほう".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "が".into(),
+                surface: "が".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 5000,
+    }];
+
+    let result = rw.generate(&paths, "たほうが");
+
+    assert!(
+        result.is_empty(),
+        "3-char segment without 2-char kanji split should produce nothing"
+    );
+}
+
+#[test]
+fn test_kanji_variant_subsplit_3char_segment() {
+    // 3-char hiragana segment "ほうが" [3,6) — lattice has 方 at [3,5)
+    // and が at [5,6). Subsplit should produce "方が".
+    let lattice = make_lattice(
+        "あったほうが",
+        vec![
+            lattice_node(3, 5, "ほう", "方", 733),
+            lattice_node(3, 5, "ほう", "法", 2181),
+            lattice_node(5, 6, "が", "が", 0),
+        ],
+    );
+    let rw = KanjiVariantRewriter { lattice: &lattice };
+
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "あっ".into(),
+                surface: "あっ".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "た".into(),
+                surface: "た".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "ほうが".into(),
+                surface: "ほうが".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 20000,
+    }];
+
+    let result = rw.generate(&paths, "あったほうが");
+
+    // Should produce variants for 方+が and 法+が
+    assert_eq!(result.len(), 2, "should produce 2 subsplit variants");
+    assert!(result.iter().any(|p| p.surface_key() == "あった方が"));
+    assert!(result.iter().any(|p| p.surface_key() == "あった法が"));
+    // Check segment count increased by 1 (split added a segment)
+    assert!(result.iter().all(|p| p.segments.len() == 4));
+    assert!(result.iter().all(|p| p.viterbi_cost == 22000));
+}
+
+#[test]
+fn test_kanji_variant_subsplit_only_2char_prefix() {
+    // 4-char hiragana segment "ほうがく" — should only try 2-char prefix split.
+    // Lattice has 方 at [0,2) and がく at [2,4) (hiragana).
+    let lattice = make_lattice(
+        "ほうがくが",
+        vec![
+            lattice_node(0, 2, "ほう", "方", 733),
+            // がく has kanji 学 but that's for the right side — we need hiragana
+            lattice_node(2, 4, "がく", "がく", 0),
+        ],
+    );
+    let rw = KanjiVariantRewriter { lattice: &lattice };
+
+    let paths = vec![ScoredPath {
+        segments: vec![
+            RichSegment {
+                reading: "ほうがく".into(),
+                surface: "ほうがく".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+            RichSegment {
+                reading: "が".into(),
+                surface: "が".into(),
+                left_id: 0,
+                right_id: 0,
+                word_cost: 0,
+            },
+        ],
+        viterbi_cost: 10000,
+    }];
+
+    let result = rw.generate(&paths, "ほうがくが");
+
+    // Should produce 方+がく variant
+    assert_eq!(result.len(), 1);
+    assert_eq!(result[0].surface_key(), "方がくが");
+}
+
+#[test]
+fn test_kanji_variant_reading_scan_single_segment() {
+    // Single-segment hiragana path "しておいたほうが" — reading scan should
+    // find 方 at [5,7) and produce a 3-segment variant.
+    let lattice = make_lattice(
+        "しておいたほうが",
+        vec![
+            lattice_node(5, 7, "ほう", "方", 733),
+            lattice_node(5, 7, "ほう", "法", 2181),
+        ],
+    );
+    let rw = KanjiVariantRewriter { lattice: &lattice };
+
+    // Single-segment hiragana path (as produced by HiraganaVariantRewriter)
+    let paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "しておいたほうが".into(),
+            surface: "しておいたほうが".into(),
+            left_id: 0,
+            right_id: 0,
+            word_cost: 0,
+        }],
+        viterbi_cost: 30000,
+    }];
+
+    let result = rw.generate(&paths, "しておいたほうが");
+
+    // Should produce 方 and 法 variants
+    assert_eq!(result.len(), 2, "should produce 2 reading-scan variants");
+    assert!(result.iter().any(|p| p.surface_key() == "しておいた方が"));
+    assert!(result.iter().any(|p| p.surface_key() == "しておいた法が"));
+    // Each variant should have 3 segments: prefix + kanji + suffix
+    assert!(result.iter().all(|p| p.segments.len() == 3));
+    assert!(result.iter().all(|p| p.viterbi_cost == 32000));
+}
+
+#[test]
+fn test_kanji_variant_reading_scan_skips_edges() {
+    // Reading scan should skip positions at start (pos=0) and end
+    // where the remaining prefix/suffix would be empty.
+    let lattice = make_lattice("ほうが", vec![lattice_node(0, 2, "ほう", "方", 733)]);
+    let rw = KanjiVariantRewriter { lattice: &lattice };
+
+    let paths = vec![ScoredPath {
+        segments: vec![RichSegment {
+            reading: "ほうが".into(),
+            surface: "ほうが".into(),
+            left_id: 0,
+            right_id: 0,
+            word_cost: 0,
+        }],
+        viterbi_cost: 10000,
+    }];
+
+    let result = rw.generate(&paths, "ほうが");
+
+    // pos=0 is skipped (no prefix), end=3 doesn't happen (2-char only goes to pos=1)
+    // pos=1 → [1,3) "うが" — no kanji
+    assert!(
+        result.is_empty(),
+        "should not produce variants at reading edges"
+    );
+}
diff --git a/engine/crates/lex-core/src/default_settings.toml b/engine/crates/lex-core/src/default_settings.toml
index 05c94ef..f6c0d2e 100644
--- a/engine/crates/lex-core/src/default_settings.toml
+++ b/engine/crates/lex-core/src/default_settings.toml
@@ -9,7 +9,7 @@ unknown_word_cost = 10000
 [reranker]
 length_variance_weight = 2000
 structure_cost_filter = 6000
-non_independent_kanji_penalty = 3000
+non_independent_kanji_penalty = 1500
 te_form_kanji_penalty = 3500
 pronoun_cost_bonus = 3500
 single_char_kanji_penalty = 4000
diff --git a/engine/crates/lex-core/src/settings.rs b/engine/crates/lex-core/src/settings.rs
index 73ecdc6..2a21d93 100644
--- a/engine/crates/lex-core/src/settings.rs
+++ b/engine/crates/lex-core/src/settings.rs
@@ -332,7 +332,7 @@ mod tests {
         assert_eq!(s.cost.unknown_word_cost, 10000);
         assert_eq!(s.reranker.length_variance_weight, 2000);
         assert_eq!(s.reranker.structure_cost_filter, 6000);
-        assert_eq!(s.reranker.non_independent_kanji_penalty, 3000);
+        assert_eq!(s.reranker.non_independent_kanji_penalty, 1500);
         assert_eq!(s.reranker.te_form_kanji_penalty, 3500);
         assert_eq!(s.reranker.pronoun_cost_bonus, 3500);
         assert_eq!(s.reranker.single_char_kanji_penalty, 4000);
@@ -375,7 +375,7 @@ unknown_word_cost = 5000
 [reranker]
 length_variance_weight = 1000
 structure_cost_filter = 2000
-non_independent_kanji_penalty = 3000
+non_independent_kanji_penalty = 1500
 
 [history]
 boost_per_use = 1500
@@ -407,7 +407,7 @@ unknown_word_cost = 10000
 [reranker]
 length_variance_weight = 2000
 structure_cost_filter = 6000
-non_independent_kanji_penalty = 3000
+non_independent_kanji_penalty = 1500
 
 [history]
 boost_per_use = 3000
@@ -439,7 +439,7 @@ unknown_word_cost = 10000
 [reranker]
 length_variance_weight = 2000
 structure_cost_filter = 6000
-non_independent_kanji_penalty = 3000
+non_independent_kanji_penalty = 1500
 
 [history]
 boost_per_use = 3000
@@ -470,7 +470,7 @@ unknown_word_cost = 10000
 [reranker]
 length_variance_weight = 2000
 structure_cost_filter = 6000
-non_independent_kanji_penalty = 3000
+non_independent_kanji_penalty = 1500
 
 [history]
 boost_per_use = 3000
@@ -501,7 +501,7 @@ unknown_word_cost = 10000
 [reranker]
 length_variance_weight = 2000
 structure_cost_filter = 6000
-non_independent_kanji_penalty = 3000
+non_independent_kanji_penalty = 1500
 
 [history]
 boost_per_use = 3000
@@ -533,7 +533,7 @@ unknown_word_cost = 10000
 [reranker]
 length_variance_weight = 2000
 structure_cost_filter = 6000
-non_independent_kanji_penalty = 3000
+non_independent_kanji_penalty = 1500
 
 [history]
 boost_per_use = 3000
diff --git a/mise.toml b/mise.toml
index 83ff9a6..74c8ed4 100644
--- a/mise.toml
+++ b/mise.toml
@@ -1,6 +1,6 @@
 [tasks.engine-lib]
 description = "Build universal static library (x86_64 + aarch64)"
-sources = ["engine/src/**/*.rs", "engine/crates/**/*.rs", "engine/Cargo.toml", "engine/Cargo.lock"]
+sources = ["engine/src/**/*.rs", "engine/crates/**/*.rs", "engine/crates/**/*.toml", "engine/Cargo.toml", "engine/Cargo.lock"]
 outputs = ["build/liblex_engine.a"]
 run = """
 #!/usr/bin/env bash

From 76d526d3e199e36c17424aaabcacf202d16ed5fd Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 21:22:03 +0900
Subject: [PATCH 2/5] refactor: simplify subsplit loop and reading-scan range
 in KanjiVariantRewriter

- subsplit: replace loop with single `let mid = seg_start + 2` since
  only left_len==2 was accepted
- reading-scan: tighten loop range to `1..char_count-2` to eliminate
  redundant break/continue guards

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../crates/lex-core/src/converter/rewriter.rs | 113 ++++++++----------
 1 file changed, 52 insertions(+), 61 deletions(-)

diff --git a/engine/crates/lex-core/src/converter/rewriter.rs b/engine/crates/lex-core/src/converter/rewriter.rs
index 2f50295..4c58d7c 100644
--- a/engine/crates/lex-core/src/converter/rewriter.rs
+++ b/engine/crates/lex-core/src/converter/rewriter.rs
@@ -251,61 +251,58 @@ impl KanjiVariantRewriter<'_> {
         seg_end: usize,
         new_paths: &mut Vec<ScoredPath>,
     ) {
-        // Try each internal split point
-        for mid in (seg_start + 2)..seg_end {
-            let left_len = mid - seg_start;
-            // Only consider 2-char kanji prefixes to avoid incorrect boundaries
-            // (e.g. たほう → 他方)
-            if left_len != 2 {
-                continue;
-            }
+        // Split at the 2-char boundary only (to avoid incorrect boundaries
+        // like たほう → 他方).
+        let mid = seg_start + 2;
+        if mid >= seg_end {
+            return;
+        }
 
-            // Find kanji nodes for the left part [seg_start, mid)
-            let left_indices = match self.lattice.nodes_by_start.get(seg_start) {
-                Some(indices) => indices,
-                None => continue,
-            };
-            let mut kanji_nodes: Vec<_> = left_indices
-                .iter()
-                .map(|&idx| &self.lattice.nodes[idx])
-                .filter(|node| node.end == mid && node.surface.chars().any(is_kanji))
-                .collect();
-            kanji_nodes.sort_by_key(|n| n.cost);
-            kanji_nodes.truncate(MAX_KANJI_PER_SEGMENT);
+        // Find kanji nodes for the left part [seg_start, mid)
+        let left_indices = match self.lattice.nodes_by_start.get(seg_start) {
+            Some(indices) => indices,
+            None => return,
+        };
+        let mut kanji_nodes: Vec<_> = left_indices
+            .iter()
+            .map(|&idx| &self.lattice.nodes[idx])
+            .filter(|node| node.end == mid && node.surface.chars().any(is_kanji))
+            .collect();
+        kanji_nodes.sort_by_key(|n| n.cost);
+        kanji_nodes.truncate(MAX_KANJI_PER_SEGMENT);
 
-            if kanji_nodes.is_empty() {
-                continue;
-            }
+        if kanji_nodes.is_empty() {
+            return;
+        }
 
-            // Find a hiragana node for the right part [mid, seg_end)
-            let right_indices = match self.lattice.nodes_by_start.get(mid) {
-                Some(indices) => indices,
-                None => continue,
-            };
-            // Pick the lowest-cost hiragana node for the remainder
-            let right_node = right_indices
-                .iter()
-                .map(|&idx| &self.lattice.nodes[idx])
-                .filter(|node| {
-                    node.end == seg_end
-                        && node.surface == node.reading
-                        && node.surface.chars().all(is_hiragana)
-                })
-                .min_by_key(|n| n.cost);
-            let Some(right_node) = right_node else {
-                continue;
-            };
+        // Find a hiragana node for the right part [mid, seg_end)
+        let right_indices = match self.lattice.nodes_by_start.get(mid) {
+            Some(indices) => indices,
+            None => return,
+        };
+        // Pick the lowest-cost hiragana node for the remainder
+        let right_node = right_indices
+            .iter()
+            .map(|&idx| &self.lattice.nodes[idx])
+            .filter(|node| {
+                node.end == seg_end
+                    && node.surface == node.reading
+                    && node.surface.chars().all(is_hiragana)
+            })
+            .min_by_key(|n| n.cost);
+        let Some(right_node) = right_node else {
+            return;
+        };
 
-            for kanji_node in kanji_nodes {
-                let mut new_segments = path.segments.clone();
-                let right_seg = super::viterbi::RichSegment::from(right_node);
-                new_segments[seg_idx] = super::viterbi::RichSegment::from(kanji_node);
-                new_segments.insert(seg_idx + 1, right_seg);
-                new_paths.push(ScoredPath {
-                    segments: new_segments,
-                    viterbi_cost: path.viterbi_cost.saturating_add(2000),
-                });
-            }
+        for kanji_node in kanji_nodes {
+            let mut new_segments = path.segments.clone();
+            let right_seg = super::viterbi::RichSegment::from(right_node);
+            new_segments[seg_idx] = super::viterbi::RichSegment::from(kanji_node);
+            new_segments.insert(seg_idx + 1, right_seg);
+            new_paths.push(ScoredPath {
+                segments: new_segments,
+                viterbi_cost: path.viterbi_cost.saturating_add(2000),
+            });
         }
     }
 
@@ -329,17 +326,11 @@ impl KanjiVariantRewriter<'_> {
             return;
         }
 
-        for pos in 0..char_count.saturating_sub(1) {
+        // Start at pos=1 (skip pos=0 — no prefix) and stop when end would
+        // reach char_count (no suffix). This also ensures prefix/suffix are
+        // at least 1-char each.
+        for pos in 1..char_count.saturating_sub(2) {
             let end = pos + 2;
-            if end > char_count {
-                break;
-            }
-
-            // Skip positions at the start or end (single-char prefix/suffix
-            // would be a function morpheme — let segment-based rewriter handle those)
-            if pos == 0 || end == char_count {
-                continue;
-            }
 
             let node_indices = match self.lattice.nodes_by_start.get(pos) {
                 Some(indices) => indices,

From 525d28132db3674277b7a3354aa146a2e81fe56f Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 21:27:45 +0900
Subject: [PATCH 3/5] fix: emit reading-scan variants as single segment to
 avoid POS misclassification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 2 reading-scan variants had dummy left_id/right_id=0 on
prefix/suffix segments, causing group_segments to misclassify
particles (e.g. が) as content words. Emit as ScoredPath::single()
instead so group_segments skips them (len<=1).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../crates/lex-core/src/converter/rewriter.rs | 30 ++++++-------------
 .../lex-core/src/converter/tests/rewriter.rs  |  6 ++--
 2 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/engine/crates/lex-core/src/converter/rewriter.rs b/engine/crates/lex-core/src/converter/rewriter.rs
index 4c58d7c..879aeef 100644
--- a/engine/crates/lex-core/src/converter/rewriter.rs
+++ b/engine/crates/lex-core/src/converter/rewriter.rs
@@ -354,27 +354,15 @@ impl KanjiVariantRewriter<'_> {
             let suffix_reading: String = reading.chars().skip(end).collect();
 
             for node in kanji_nodes {
-                let segments = vec![
-                    super::viterbi::RichSegment {
-                        reading: prefix_reading.clone(),
-                        surface: prefix_reading.clone(),
-                        left_id: 0,
-                        right_id: 0,
-                        word_cost: 0,
-                    },
-                    super::viterbi::RichSegment::from(node),
-                    super::viterbi::RichSegment {
-                        reading: suffix_reading.clone(),
-                        surface: suffix_reading.clone(),
-                        left_id: 0,
-                        right_id: 0,
-                        word_cost: 0,
-                    },
-                ];
-                new_paths.push(ScoredPath {
-                    segments,
-                    viterbi_cost: base_cost.saturating_add(2000),
-                });
+                // Emit as a single segment so that group_segments (which uses
+                // left_id to classify morpheme roles) doesn't mis-group
+                // prefix/suffix with dummy POS IDs.
+                let surface = format!("{}{}{}", prefix_reading, node.surface, suffix_reading);
+                new_paths.push(ScoredPath::single(
+                    reading.to_string(),
+                    surface,
+                    base_cost.saturating_add(2000),
+                ));
             }
         }
     }
diff --git a/engine/crates/lex-core/src/converter/tests/rewriter.rs b/engine/crates/lex-core/src/converter/tests/rewriter.rs
index 9718b0d..8147a43 100644
--- a/engine/crates/lex-core/src/converter/tests/rewriter.rs
+++ b/engine/crates/lex-core/src/converter/tests/rewriter.rs
@@ -931,12 +931,12 @@ fn test_kanji_variant_reading_scan_single_segment() {
 
     let result = rw.generate(&paths, "しておいたほうが");
 
-    // Should produce 方 and 法 variants
+    // Should produce 方 and 法 variants as single-segment paths
     assert_eq!(result.len(), 2, "should produce 2 reading-scan variants");
     assert!(result.iter().any(|p| p.surface_key() == "しておいた方が"));
     assert!(result.iter().any(|p| p.surface_key() == "しておいた法が"));
-    // Each variant should have 3 segments: prefix + kanji + suffix
-    assert!(result.iter().all(|p| p.segments.len() == 3));
+    // Single-segment to avoid group_segments POS misclassification
+    assert!(result.iter().all(|p| p.segments.len() == 1));
     assert!(result.iter().all(|p| p.viterbi_cost == 32000));
 }
 

From 69579f695f1d4f6a6a6e61b249954ad852bcf53f Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 21:34:52 +0900
Subject: [PATCH 4/5] fix: align doc comments with single-segment output and
 optimize O(n^2) allocation

- Update doc comments for Phase 2 and kanji_variants_from_reading to
  reflect single-segment output instead of 3-segment
- Update kanji_variants_subsplit doc to say "2-char boundary" not
  "each internal boundary"
- Precompute byte offsets in kanji_variants_from_reading to avoid
  O(n^2) allocations from repeated chars().take/skip
- Fix test comment for reading-scan test

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../crates/lex-core/src/converter/rewriter.rs | 24 ++++++++++++-------
 .../lex-core/src/converter/tests/rewriter.rs  |  2 +-
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/engine/crates/lex-core/src/converter/rewriter.rs b/engine/crates/lex-core/src/converter/rewriter.rs
index 879aeef..5ad8a42 100644
--- a/engine/crates/lex-core/src/converter/rewriter.rs
+++ b/engine/crates/lex-core/src/converter/rewriter.rs
@@ -193,7 +193,7 @@ impl Rewriter for KanjiVariantRewriter<'_> {
         // When HiraganaVariantRewriter produces a single-segment all-hiragana
         // path, the segment-based approach above can't find kanji sub-spans.
         // Scan the reading directly to find 2-char kanji alternatives in the
-        // lattice and build 3-segment variants (prefix + kanji + suffix).
+        // lattice and build single-segment variants with the kanji inlined.
         if let Some(base) = paths.iter().find(|p| {
             p.segments.len() == 1
                 && p.segments[0].surface == p.segments[0].reading
@@ -239,7 +239,7 @@ impl KanjiVariantRewriter<'_> {
         }
     }
 
-    /// For a 3+ char hiragana segment, try splitting at each internal boundary
+    /// For a 3+ char hiragana segment, try splitting at the 2-char boundary
     /// to find a 2-char kanji prefix with a hiragana remainder.
     ///
     /// Example: segment "ほうが" [5,8) → split at 7 → kanji "方" [5,7) + "が" [7,8)
@@ -307,21 +307,28 @@ impl KanjiVariantRewriter<'_> {
     }
 
     /// Scan the full reading for 2-char positions that have kanji alternatives
-    /// in the lattice, and build 3-segment variants (hiragana prefix + kanji + hiragana suffix).
+    /// in the lattice, and build single-segment variants with the kanji inlined.
     ///
     /// This handles cases where the only hiragana path is single-segment
     /// (from HiraganaVariantRewriter) and the multi-segment paths all have
     /// kanji/compound segments that don't expose the 2-char hiragana sub-span.
     ///
     /// Example: reading "しておいたほうが" → finds 方 at [5,7) →
-    /// builds "しておいた" + "方" + "が"
+    /// emits "しておいた方が" as a single segment
     fn kanji_variants_from_reading(
         &self,
         reading: &str,
         base_cost: i64,
         new_paths: &mut Vec<ScoredPath>,
     ) {
-        let char_count = reading.chars().count();
+        // Precompute byte offsets for each char boundary to avoid O(n^2)
+        // allocations from repeated chars().take/skip.
+        let byte_offsets: Vec<usize> = reading
+            .char_indices()
+            .map(|(i, _)| i)
+            .chain(std::iter::once(reading.len()))
+            .collect();
+        let char_count = byte_offsets.len() - 1;
         if char_count < 3 {
             return;
         }
@@ -349,15 +356,14 @@ impl KanjiVariantRewriter<'_> {
                 continue;
             }
 
-            // Build prefix reading [0, pos) and suffix reading [end, char_count)
-            let prefix_reading: String = reading.chars().take(pos).collect();
-            let suffix_reading: String = reading.chars().skip(end).collect();
+            let prefix = &reading[..byte_offsets[pos]];
+            let suffix = &reading[byte_offsets[end]..];
 
             for node in kanji_nodes {
                 // Emit as a single segment so that group_segments (which uses
                 // left_id to classify morpheme roles) doesn't mis-group
                 // prefix/suffix with dummy POS IDs.
-                let surface = format!("{}{}{}", prefix_reading, node.surface, suffix_reading);
+                let surface = format!("{}{}{}", prefix, node.surface, suffix);
                 new_paths.push(ScoredPath::single(
                     reading.to_string(),
                     surface,
diff --git a/engine/crates/lex-core/src/converter/tests/rewriter.rs b/engine/crates/lex-core/src/converter/tests/rewriter.rs
index 8147a43..04af8dd 100644
--- a/engine/crates/lex-core/src/converter/tests/rewriter.rs
+++ b/engine/crates/lex-core/src/converter/tests/rewriter.rs
@@ -907,7 +907,7 @@ fn test_kanji_variant_subsplit_only_2char_prefix() {
 #[test]
 fn test_kanji_variant_reading_scan_single_segment() {
     // Single-segment hiragana path "しておいたほうが" — reading scan should
-    // find 方 at [5,7) and produce a 3-segment variant.
+    // find 方 at [5,7) and produce a single-segment variant with kanji inlined.
     let lattice = make_lattice(
         "しておいたほうが",
         vec![

From 94778da583e42a850e6b0de228859eefd2c45c40 Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 21:49:46 +0900
Subject: [PATCH 5/5] fix: update serde default for
 non_independent_kanji_penalty to 1500

The serde fallback function still returned 3000 while default_settings.toml
was updated to 1500, causing inconsistency when user TOML omits the key.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 engine/crates/lex-core/src/settings.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/engine/crates/lex-core/src/settings.rs b/engine/crates/lex-core/src/settings.rs
index 2a21d93..4203ad4 100644
--- a/engine/crates/lex-core/src/settings.rs
+++ b/engine/crates/lex-core/src/settings.rs
@@ -118,7 +118,7 @@ pub struct RerankerSettings {
 }
 
 fn default_non_independent_kanji_penalty() -> i64 {
-    3000
+    1500
 }
 
 fn default_te_form_kanji_penalty() -> i64 {