1- import { PATCH_OP_TYPE , type Patch , diff , diffEdit , overlap , normalize , apply , src , dst , invert } from '../str' ;
1+ import { PATCH_OP_TYPE , type Patch , diff , diffEdit , overlap , normalize , apply , src , dst , invert , pfx , sfx } from '../str' ;
22import { assertPatch } from './util' ;
33
4+ describe ( 'pfx()' , ( ) => {
5+ test ( 'finds common prefixes' , ( ) => {
6+ expect ( pfx ( 'abc' , 'b' ) ) . toEqual ( 0 ) ;
7+ expect ( pfx ( 'abc' , 'a' ) ) . toEqual ( 1 ) ;
8+ expect ( pfx ( 'abc' , 'ab' ) ) . toEqual ( 2 ) ;
9+ expect ( pfx ( 'abc' , 'abc' ) ) . toEqual ( 3 ) ;
10+ expect ( pfx ( 'abc' , 'abcd' ) ) . toEqual ( 3 ) ;
11+ expect ( pfx ( 'abc' , 'abcde' ) ) . toEqual ( 3 ) ;
12+ expect ( pfx ( '👨🍳' , '👨🍳' ) ) . toEqual ( 5 ) ;
13+ expect ( pfx ( '👨🍳' , '👨🍳chef' ) ) . toEqual ( 5 ) ;
14+ expect ( pfx ( '👨🍳chef' , '👨🍳' ) ) . toEqual ( 5 ) ;
15+ expect ( pfx ( '👨🍳👨🍳' , '👨🍳' ) ) . toEqual ( 5 ) ;
16+ expect ( '👨🍳chef' . slice ( 0 , 5 ) ) . toBe ( '👨🍳' ) ;
17+ } ) ;
18+
19+ test ( 'handles grapheme clusters with ZWJ (Zero Width Joiner)' , ( ) => {
20+ const family = '👨👩👧👦' ;
21+ expect ( pfx ( family , family ) ) . toEqual ( 11 ) ;
22+ expect ( pfx ( family + 'abc' , family ) ) . toEqual ( 11 ) ;
23+ expect ( pfx ( family + 'abc' , family + 'xyz' ) ) . toEqual ( 11 ) ;
24+ expect ( pfx ( 'prefix' + family , 'prefix' + family ) ) . toEqual ( 6 + 11 ) ;
25+ const womanTech = '👩🏽💻' ;
26+ expect ( pfx ( womanTech , womanTech ) ) . toEqual ( 7 ) ;
27+ expect ( pfx ( womanTech + 'code' , womanTech ) ) . toEqual ( 7 ) ;
28+ expect ( pfx ( 'hello' + womanTech , 'hello' + womanTech ) ) . toEqual ( 5 + 7 ) ;
29+ } ) ;
30+
31+ test ( 'handles flag emojis (regional indicators)' , ( ) => {
32+ const usFlag = '🇺🇸' ;
33+ const ukFlag = '🇬🇧' ;
34+ expect ( pfx ( usFlag , usFlag ) ) . toEqual ( 4 ) ;
35+ expect ( pfx ( usFlag + 'USA' , usFlag ) ) . toEqual ( 4 ) ;
36+ expect ( pfx ( usFlag , ukFlag ) ) . toEqual ( 0 ) ;
37+ expect ( pfx ( 'hello' + usFlag , 'hello' + usFlag ) ) . toEqual ( 5 + 4 ) ;
38+ } ) ;
39+
40+ test ( 'handles combining diacritical marks' , ( ) => {
41+ const combining = 'e\u0301' ; // e + combining acute accent
42+ expect ( pfx ( combining , combining ) ) . toEqual ( 2 ) ;
43+ expect ( pfx ( combining + 'llo' , combining ) ) . toEqual ( 2 ) ;
44+ expect ( pfx ( 'hello' + combining , 'hello' + combining ) ) . toEqual ( 5 + 2 ) ;
45+
46+ // Multiple combining marks
47+ const multiCombining = 'a\u0301\u0302\u0303' ;
48+ expect ( pfx ( multiCombining , multiCombining ) ) . toEqual ( 4 ) ;
49+ } ) ;
50+
51+ test ( 'handles variation selectors' , ( ) => {
52+ const heartText = '❤\uFE0E' ; // text style
53+ const heartEmoji = '❤\uFE0F' ; // emoji style
54+ expect ( pfx ( heartText , heartText ) ) . toEqual ( 2 ) ;
55+ expect ( pfx ( heartEmoji , heartEmoji ) ) . toEqual ( 2 ) ;
56+ expect ( pfx ( heartText , heartEmoji ) ) . toEqual ( 1 ) ; // Only the base character matches
57+ } ) ;
58+
59+ test ( 'handles mixed grapheme clusters' , ( ) => {
60+ const chef = '👨🍳' ;
61+ const family = '👨👩👧👦' ;
62+ const combined = chef + family ;
63+ expect ( pfx ( combined , combined ) ) . toEqual ( 16 ) ;
64+ expect ( pfx ( combined + 'text' , combined ) ) . toEqual ( 16 ) ;
65+ expect ( pfx ( 'abc' + combined , 'abc' + combined ) ) . toEqual ( 3 + 16 ) ;
66+ } ) ;
67+ } ) ;
68+
69+ describe ( 'sfx()' , ( ) => {
70+ test ( 'finds common suffixes' , ( ) => {
71+ expect ( sfx ( 'abc' , 'b' ) ) . toEqual ( 0 ) ;
72+ expect ( sfx ( 'abc' , 'c' ) ) . toEqual ( 1 ) ;
73+ expect ( sfx ( 'abc' , 'bc' ) ) . toEqual ( 2 ) ;
74+ expect ( sfx ( 'abc' , 'abc' ) ) . toEqual ( 3 ) ;
75+ expect ( sfx ( 'abc' , '_abc' ) ) . toEqual ( 3 ) ;
76+ expect ( sfx ( 'abc' , 'abcd' ) ) . toEqual ( 0 ) ;
77+ expect ( sfx ( '👨🍳' , '👨🍳' ) ) . toEqual ( 5 ) ;
78+ expect ( sfx ( '👨🍳' , '👨🍳chef' ) ) . toEqual ( 0 ) ;
79+ expect ( sfx ( '👨🍳chef' , '👨🍳' ) ) . toEqual ( 0 ) ;
80+ expect ( sfx ( '👨🍳' , 'chef👨🍳' ) ) . toEqual ( 5 ) ;
81+ expect ( sfx ( 'chef👨🍳' , '👨🍳' ) ) . toEqual ( 5 ) ;
82+ expect ( sfx ( '👨🍳👨🍳' , '👨🍳' ) ) . toEqual ( 5 ) ;
83+ } ) ;
84+
85+ test ( 'handles grapheme clusters with ZWJ (Zero Width Joiner)' , ( ) => {
86+ const family = '👨👩👧👦' ;
87+ expect ( sfx ( family , family ) ) . toEqual ( 11 ) ;
88+ expect ( sfx ( 'abc' + family , family ) ) . toEqual ( 11 ) ;
89+ expect ( sfx ( 'xyz' + family , 'abc' + family ) ) . toEqual ( 11 ) ;
90+ expect ( sfx ( family + 'suffix' , family + 'suffix' ) ) . toEqual ( 6 + 11 ) ;
91+ const womanTech = '👩🏽💻' ;
92+ expect ( sfx ( womanTech , womanTech ) ) . toEqual ( 7 ) ;
93+ expect ( sfx ( 'code' + womanTech , womanTech ) ) . toEqual ( 7 ) ;
94+ expect ( sfx ( womanTech + 'hello' , womanTech + 'hello' ) ) . toEqual ( 5 + 7 ) ;
95+ } ) ;
96+
97+ test ( 'handles flag emojis (regional indicators)' , ( ) => {
98+ const usFlag = '🇺🇸' ;
99+ const ukFlag = '🇬🇧' ;
100+ expect ( sfx ( usFlag , usFlag ) ) . toEqual ( 4 ) ;
101+ expect ( sfx ( 'USA' + usFlag , usFlag ) ) . toEqual ( 4 ) ;
102+ expect ( sfx ( usFlag , ukFlag ) ) . toEqual ( 0 ) ;
103+ expect ( sfx ( usFlag + 'hello' , usFlag + 'hello' ) ) . toEqual ( 5 + 4 ) ;
104+ } ) ;
105+
106+ test ( 'handles combining diacritical marks' , ( ) => {
107+ const combining = 'e\u0301' ; // e + combining acute accent
108+ expect ( sfx ( combining , combining ) ) . toEqual ( 2 ) ;
109+ expect ( sfx ( 'ell' + combining , combining ) ) . toEqual ( 2 ) ;
110+ expect ( sfx ( combining + 'hello' , combining + 'hello' ) ) . toEqual ( 5 + 2 ) ;
111+ const multiCombining = 'a\u0301\u0302\u0303' ; // a with multiple accents
112+ expect ( sfx ( multiCombining , multiCombining ) ) . toEqual ( 4 ) ;
113+ expect ( sfx ( 'text' + multiCombining , multiCombining ) ) . toEqual ( 4 ) ;
114+ } ) ;
115+
116+ test ( 'handles variation selectors' , ( ) => {
117+ const heartText = '❤\uFE0E' ; // text style
118+ const heartEmoji = '❤\uFE0F' ; // emoji style
119+ expect ( sfx ( heartText , heartText ) ) . toEqual ( 2 ) ;
120+ expect ( sfx ( heartEmoji , heartEmoji ) ) . toEqual ( 2 ) ;
121+ expect ( sfx ( heartText , heartEmoji ) ) . toEqual ( 0 ) ;
122+ expect ( sfx ( 'love' + heartEmoji , heartEmoji ) ) . toEqual ( 2 ) ;
123+ } ) ;
124+
125+ test ( 'handles mixed grapheme clusters' , ( ) => {
126+ const chef = '👨🍳' ;
127+ const family = '👨👩👧👦' ;
128+ const combined = family + chef ;
129+ expect ( sfx ( combined , combined ) ) . toEqual ( 16 ) ;
130+ expect ( sfx ( 'text' + combined , combined ) ) . toEqual ( 16 ) ;
131+ expect ( sfx ( combined + 'abc' , combined + 'abc' ) ) . toEqual ( 3 + 16 ) ;
132+ } ) ;
133+
134+ test ( 'does not split grapheme clusters at boundaries' , ( ) => {
135+ const chef = '👨🍳' ;
136+ const family = '👨👩👧👦' ;
137+
138+ // Ensure we don't split in the middle of a grapheme cluster
139+ expect ( sfx ( 'x' + chef , chef ) ) . toEqual ( 5 ) ; // full chef emoji
140+ expect ( sfx ( 'xy' + family , family ) ) . toEqual ( 11 ) ; // full family emoji
141+
142+ // When the suffix is part of a larger grapheme, it should not match partially
143+ expect ( sfx ( '👨🍳👩' , '👩' ) ) . toEqual ( 2 ) ; // Just the woman emoji at end
144+ expect ( sfx ( 'text👨🍳' , '👨🍳' ) ) . toEqual ( 5 ) ; // Full chef emoji
145+ } ) ;
146+ } ) ;
147+
4148describe ( 'normalize()' , ( ) => {
5149 test ( 'joins consecutive same type operations' , ( ) => {
6150 expect (
@@ -240,6 +384,75 @@ describe('diff()', () => {
240384 assertPatch ( 'a🙃b' , 'a👋b' ) ;
241385 } ) ;
242386
387+ test ( 'grapheme clusters with ZWJ (Zero Width Joiner)' , ( ) => {
388+ const chef = '👨🍳' ;
389+ const family = '👨👩👧👦' ;
390+ const womanTech = '👩🏽💻' ;
391+ assertPatch ( chef , family ) ;
392+ assertPatch ( family , chef ) ;
393+ assertPatch ( womanTech , chef ) ;
394+ assertPatch ( 'hello' , 'hello' + chef ) ;
395+ assertPatch ( 'hello' , chef + 'hello' ) ;
396+ assertPatch ( 'hello world' , 'hello' + family + 'world' ) ;
397+ assertPatch ( 'hello' + chef , 'hello' ) ;
398+ assertPatch ( chef + 'hello' , 'hello' ) ;
399+ assertPatch ( 'hello' + family + 'world' , 'helloworld' ) ;
400+ assertPatch ( chef + family , family + chef ) ;
401+ assertPatch ( 'a' + chef + 'b' + family + 'c' , 'x' + family + 'y' + chef + 'z' ) ;
402+ assertPatch ( 'The ' + chef + ' cooks' , 'A ' + chef + ' bakes' ) ;
403+ assertPatch ( 'Team: ' + family , 'Group: ' + womanTech ) ;
404+ } ) ;
405+
406+ test ( 'flag emojis (regional indicators)' , ( ) => {
407+ const ruFlag = '🇷🇺' ;
408+ const chFlag = '🇨🇳' ;
409+ const inFlag = '🇮🇳' ;
410+ assertPatch ( ruFlag , chFlag ) ;
411+ assertPatch ( chFlag , inFlag ) ;
412+ assertPatch ( 'Made in ' + ruFlag , 'Made in ' + chFlag ) ;
413+ assertPatch ( ruFlag + ' USA' , chFlag + ' UK' ) ;
414+ assertPatch ( 'Hello ' + ruFlag + ' world' , 'Hello ' + inFlag + ' world' ) ;
415+ assertPatch ( ruFlag + chFlag , chFlag + ruFlag ) ;
416+ assertPatch ( 'Flags: ' + ruFlag + chFlag + inFlag , 'Flags: ' + inFlag + chFlag + ruFlag ) ;
417+ } ) ;
418+
419+ test ( 'combining diacritical marks' , ( ) => {
420+ const combining1 = 'e\u0301' ;
421+ const combining2 = 'e\u0300' ;
422+ const precomposed = 'é' ;
423+ assertPatch ( combining1 , combining2 ) ;
424+ assertPatch ( combining1 , precomposed ) ;
425+ assertPatch ( precomposed , combining1 ) ;
426+ assertPatch ( 'cafe\u0301' , 'café' ) ;
427+ assertPatch ( 'naïve' , 'naive' ) ;
428+ assertPatch ( 'résumé' , 'resume' ) ;
429+ const multiCombining = 'a\u0301\u0302\u0303' ;
430+ assertPatch ( 'test' + multiCombining , 'test' ) ;
431+ assertPatch ( 'test' , 'test' + multiCombining ) ;
432+ } ) ;
433+
434+ test ( 'variation selectors' , ( ) => {
435+ const heartText = '❤\uFE0E' ; // text style
436+ const heartEmoji = '❤\uFE0F' ; // emoji style
437+ assertPatch ( heartText , heartEmoji ) ;
438+ assertPatch ( heartEmoji , heartText ) ;
439+ assertPatch ( 'I ' + heartText + ' code' , 'I ' + heartEmoji + ' code' ) ;
440+ assertPatch ( 'Love ' + heartEmoji , 'Love ' + heartText ) ;
441+ } ) ;
442+
443+ test ( 'complex grapheme clusters in real scenarios' , ( ) => {
444+ const chef = '👨🍳' ;
445+ const family = '👨👩👧👦' ;
446+ const womanTech = '👩🏽💻' ;
447+ const usFlag = '🇺🇸' ;
448+ assertPatch ( 'Hey ' + chef + ', dinner ready?' , 'Hi ' + womanTech + ', code ready?' ) ;
449+ assertPatch ( family + ' going to ' + usFlag , family + ' staying home' ) ;
450+ assertPatch (
451+ 'The ' + chef + ' from ' + usFlag + ' is amazing' ,
452+ 'A ' + womanTech + ' from ' + usFlag + ' is brilliant' ,
453+ ) ;
454+ } ) ;
455+
243456 test ( 'same strings' , ( ) => {
244457 assertPatch ( '' , '' ) ;
245458 assertPatch ( '1' , '1' ) ;
@@ -331,6 +544,33 @@ describe('diffEdit()', () => {
331544 assertDiffEdit ( 'aaa' , 'bbb' , 'ccc' ) ;
332545 assertDiffEdit ( '1' , '2' , '3' ) ;
333546 } ) ;
547+
548+ test ( 'handles grapheme cluster inserts and deletes' , ( ) => {
549+ const chef = '👨🍳' ;
550+ const family = '👨👩👧👦' ;
551+ const womanTech = '👩🏽💻' ;
552+ const usFlag = '🇺🇸' ;
553+
554+ // Insert grapheme clusters
555+ assertDiffEdit ( '' , chef , '' ) ;
556+ assertDiffEdit ( 'Hello ' , chef , '' ) ;
557+ assertDiffEdit ( '' , chef , ' world' ) ;
558+ assertDiffEdit ( 'Hello ' , chef , ' world' ) ;
559+ assertDiffEdit ( 'Team: ' , family , ' rocks!' ) ;
560+
561+ // Insert multiple grapheme clusters
562+ assertDiffEdit ( '' , chef + family , '' ) ;
563+ assertDiffEdit ( 'Coders: ' , womanTech + chef , ' win' ) ;
564+
565+ // Insert with flags
566+ assertDiffEdit ( 'Made in ' , usFlag , '' ) ;
567+ assertDiffEdit ( '' , usFlag , ' USA' ) ;
568+
569+ // Combining characters
570+ const combining = 'e\u0301' ;
571+ assertDiffEdit ( 'caf' , combining , '' ) ;
572+ assertDiffEdit ( '' , combining , ' accent' ) ;
573+ } ) ;
334574} ) ;
335575
336576describe ( 'overlap()' , ( ) => {
@@ -353,6 +593,21 @@ describe('overlap()', () => {
353593 expect ( overlap ( 'abc' , 'abc' ) ) . toEqual ( 3 ) ;
354594 expect ( overlap ( 'a' , 'a' ) ) . toEqual ( 1 ) ;
355595 } ) ;
596+
597+ test ( 'handles grapheme clusters' , ( ) => {
598+ const chef = '👨🍳' ;
599+ const family = '👨👩👧👦' ;
600+
601+ // Overlap with grapheme clusters
602+ expect ( overlap ( 'hello' + chef , chef + 'world' ) ) . toEqual ( 5 ) ;
603+ expect ( overlap ( 'abc' + family , family + 'xyz' ) ) . toEqual ( 11 ) ;
604+
605+ // No overlap when grapheme differs
606+ expect ( overlap ( 'hello' + chef , family + 'world' ) ) . toEqual ( 0 ) ;
607+
608+ // Text overlap with grapheme clusters
609+ expect ( overlap ( 'prefix' + chef , chef + 'suffix' ) ) . toEqual ( 5 ) ;
610+ } ) ;
356611} ) ;
357612
358613describe ( 'Unicode edge cases' , ( ) => {
@@ -404,6 +659,18 @@ describe('Unicode edge cases', () => {
404659 assertPatch ( nfd , nfc ) ;
405660 assertPatch ( `hello ${ nfc } ` , `hello ${ nfd } ` ) ;
406661 } ) ;
662+
663+ test ( 'handles complex emoji with ZWJ sequences' , ( ) => {
664+ const chefEmoji = '👨🍳' ; // chef emoji (man + ZWJ + cooking)
665+ const src = chefEmoji ;
666+ const dst = 'chef' + chefEmoji ;
667+ const patch = normalize ( diff ( src , dst ) ) ;
668+ assertPatch ( src , dst , patch ) ;
669+ expect ( patch ) . toEqual ( [
670+ [ PATCH_OP_TYPE . INS , 'chef' ] ,
671+ [ PATCH_OP_TYPE . EQL , chefEmoji ] ,
672+ ] ) ;
673+ } ) ;
407674} ) ;
408675
409676describe ( 'Algorithm edge cases' , ( ) => {
0 commit comments