From 66a27828cfe6b72cb0df8a77cc54c5b4820825e7 Mon Sep 17 00:00:00 2001 From: streamich Date: Fri, 24 Oct 2025 22:10:59 +0200 Subject: [PATCH 1/4] =?UTF-8?q?test(util):=20=F0=9F=92=8D=20add=20failing?= =?UTF-8?q?=20sfx()=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/util/diff/__tests__/str.spec.ts | 33 ++++++++++++++++++- packages/json-joy/src/util/diff/str.ts | 2 +- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/packages/json-joy/src/util/diff/__tests__/str.spec.ts b/packages/json-joy/src/util/diff/__tests__/str.spec.ts index 417c2d493f..c319e39f49 100644 --- a/packages/json-joy/src/util/diff/__tests__/str.spec.ts +++ b/packages/json-joy/src/util/diff/__tests__/str.spec.ts @@ -1,6 +1,37 @@ -import {PATCH_OP_TYPE, type Patch, diff, diffEdit, overlap, normalize, apply, src, dst, invert} from '../str'; +import {PATCH_OP_TYPE, type Patch, diff, diffEdit, overlap, normalize, apply, src, dst, invert, pfx, sfx} from '../str'; import {assertPatch} from './util'; +describe('pfx()', () => { + test('finds common prefixes', () => { + expect(pfx('abc', 'b')).toEqual(0); + expect(pfx('abc', 'a')).toEqual(1); + expect(pfx('abc', 'ab')).toEqual(2); + expect(pfx('abc', 'abc')).toEqual(3); + expect(pfx('abc', 'abcd')).toEqual(3); + expect(pfx('abc', 'abcde')).toEqual(3); + expect(pfx('๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); + expect(pfx('๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณchef')).toEqual(5); + expect(pfx('๐Ÿ‘จโ€๐Ÿณchef', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); + expect(pfx('๐Ÿ‘จโ€๐Ÿณ๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); + expect('๐Ÿ‘จโ€๐Ÿณchef'.slice(0, 5)).toBe('๐Ÿ‘จโ€๐Ÿณ'); + }); +}); + +describe('sfx()', () => { + test('finds common suffixes', () => { + expect(sfx('abc', 'b')).toEqual(0); + expect(sfx('abc', 'c')).toEqual(1); + expect(sfx('abc', 'bc')).toEqual(2); + expect(sfx('abc', 'abc')).toEqual(3); + expect(sfx('abc', '_abc')).toEqual(3); + expect(sfx('abc', 'abcd')).toEqual(0); + expect(sfx('๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); + // expect(sfx('๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณchef')).toEqual(5); + // expect(sfx('๐Ÿ‘จโ€๐Ÿณchef', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); + // expect(sfx('๐Ÿ‘จโ€๐Ÿณ๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); + }); +}); + describe('normalize()', () => { test('joins consecutive same type operations', () => { expect( diff --git a/packages/json-joy/src/util/diff/str.ts b/packages/json-joy/src/util/diff/str.ts index 07e9b2d605..8cdc245d29 100644 --- a/packages/json-joy/src/util/diff/str.ts +++ b/packages/json-joy/src/util/diff/str.ts @@ -388,7 +388,7 @@ const diffNoCommonAffix = (src: string, dst: string): Patch => { * @param txt2 Second string. * @return The number of characters common to the start of each string. */ -export const pfx = (txt1: string, txt2: string) => { +export const pfx = (txt1: string, txt2: string): number => { if (!txt1 || !txt2 || txt1.charAt(0) !== txt2.charAt(0)) return 0; let min = 0; let max = Math.min(txt1.length, txt2.length); From dd5c5c95b8ba005aef7cb979002a3ac08c3dfc14 Mon Sep 17 00:00:00 2001 From: streamich Date: Fri, 24 Oct 2025 22:33:08 +0200 Subject: [PATCH 2/4] =?UTF-8?q?fix(util):=20=F0=9F=90=9B=20check=20for=20g?= =?UTF-8?q?rapheme=20cluster=20in=20sfx()=20computation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/util/diff/__tests__/str.spec.ts | 20 ++++++++++-- packages/json-joy/src/util/diff/str.ts | 32 +++++++++++++++++-- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/packages/json-joy/src/util/diff/__tests__/str.spec.ts b/packages/json-joy/src/util/diff/__tests__/str.spec.ts index c319e39f49..dc08a9a196 100644 --- a/packages/json-joy/src/util/diff/__tests__/str.spec.ts +++ b/packages/json-joy/src/util/diff/__tests__/str.spec.ts @@ -26,9 +26,11 @@ describe('sfx()', () => { expect(sfx('abc', '_abc')).toEqual(3); expect(sfx('abc', 'abcd')).toEqual(0); expect(sfx('๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); - // expect(sfx('๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณchef')).toEqual(5); - // expect(sfx('๐Ÿ‘จโ€๐Ÿณchef', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); - // expect(sfx('๐Ÿ‘จโ€๐Ÿณ๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); + expect(sfx('๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณchef')).toEqual(0); + expect(sfx('๐Ÿ‘จโ€๐Ÿณchef', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(0); + expect(sfx('๐Ÿ‘จโ€๐Ÿณ', 'chef๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); + expect(sfx('chef๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); + expect(sfx('๐Ÿ‘จโ€๐Ÿณ๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); }); }); @@ -435,6 +437,18 @@ describe('Unicode edge cases', () => { assertPatch(nfd, nfc); assertPatch(`hello ${nfc}`, `hello ${nfd}`); }); + + test('handles complex emoji with ZWJ sequences', () => { + const chefEmoji = '๐Ÿ‘จโ€๐Ÿณ'; // chef emoji (man + ZWJ + cooking) + const src = chefEmoji; + const dst = 'chef' + chefEmoji; + const patch = normalize(diff(src, dst)); + assertPatch(src, dst, patch); + expect(patch).toEqual([ + [PATCH_OP_TYPE.INS, 'chef'], + [PATCH_OP_TYPE.EQL, chefEmoji], + ]); + }); }); describe('Algorithm edge cases', () => { diff --git a/packages/json-joy/src/util/diff/str.ts b/packages/json-joy/src/util/diff/str.ts index 8cdc245d29..c76ff0e6ea 100644 --- a/packages/json-joy/src/util/diff/str.ts +++ b/packages/json-joy/src/util/diff/str.ts @@ -427,9 +427,35 @@ export const sfx = (txt1: string, txt2: string): number => { } else max = mid; mid = Math.floor((max - min) / 2 + min); } - const code = txt1.charCodeAt(txt1.length - mid); - const isSurrogatePairEnd = code >= 0xd800 && code <= 0xdbff; - if (isSurrogatePairEnd) mid--; + // Check if we're splitting a surrogate pair or combining character sequence + // We need to check the character BEFORE the matched suffix to see if we're + // splitting a grapheme cluster. + if (mid > 0 && mid < txt1.length) { + const boundaryPos = txt1.length - mid - 1; + const code = txt1.charCodeAt(boundaryPos); + const isHighSurrogate = code >= 0xd800 && code <= 0xdbff; + const isCombining = + code === 0x200d || // ZWJ + (code >= 0xfe00 && code <= 0xfe0f) || // Variation selectors + (code >= 0x0300 && code <= 0x036f); // Combining diacritical marks + + if (isHighSurrogate || isCombining) { + // We're splitting a grapheme cluster. Walk backwards to include the full cluster. + mid--; + while (mid > 0) { + const pos = txt1.length - mid - 1; + if (pos < 0) break; + const prevCode = txt1.charCodeAt(pos); + const isPrevHighSurrogate = prevCode >= 0xd800 && prevCode <= 0xdbff; + const isPrevCombining = + prevCode === 0x200d || + (prevCode >= 0xfe00 && prevCode <= 0xfe0f) || + (prevCode >= 0x0300 && prevCode <= 0x036f); + if (!isPrevHighSurrogate && !isPrevCombining) break; + mid--; + } + } + } return mid; }; From 28145d35e8cfa20809db41cca7849b8b1003c9b2 Mon Sep 17 00:00:00 2001 From: streamich Date: Sat, 25 Oct 2025 00:04:09 +0200 Subject: [PATCH 3/4] =?UTF-8?q?test:=20=F0=9F=92=8D=20add=20more=20diff=20?= =?UTF-8?q?tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/util/diff/__tests__/str.spec.ts | 228 ++++++++++++++++++ 1 file changed, 228 insertions(+) diff --git a/packages/json-joy/src/util/diff/__tests__/str.spec.ts b/packages/json-joy/src/util/diff/__tests__/str.spec.ts index dc08a9a196..d5829a1f08 100644 --- a/packages/json-joy/src/util/diff/__tests__/str.spec.ts +++ b/packages/json-joy/src/util/diff/__tests__/str.spec.ts @@ -15,6 +15,55 @@ describe('pfx()', () => { expect(pfx('๐Ÿ‘จโ€๐Ÿณ๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); expect('๐Ÿ‘จโ€๐Ÿณchef'.slice(0, 5)).toBe('๐Ÿ‘จโ€๐Ÿณ'); }); + + test('handles grapheme clusters with ZWJ (Zero Width Joiner)', () => { + const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; + expect(pfx(family, family)).toEqual(11); + expect(pfx(family + 'abc', family)).toEqual(11); + expect(pfx(family + 'abc', family + 'xyz')).toEqual(11); + expect(pfx('prefix' + family, 'prefix' + family)).toEqual(6 + 11); + const womanTech = '๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ป'; + expect(pfx(womanTech, womanTech)).toEqual(7); + expect(pfx(womanTech + 'code', womanTech)).toEqual(7); + expect(pfx('hello' + womanTech, 'hello' + womanTech)).toEqual(5 + 7); + }); + + test('handles flag emojis (regional indicators)', () => { + const usFlag = '๐Ÿ‡บ๐Ÿ‡ธ'; + const ukFlag = '๐Ÿ‡ฌ๐Ÿ‡ง'; + expect(pfx(usFlag, usFlag)).toEqual(4); + expect(pfx(usFlag + 'USA', usFlag)).toEqual(4); + expect(pfx(usFlag, ukFlag)).toEqual(0); + expect(pfx('hello' + usFlag, 'hello' + usFlag)).toEqual(5 + 4); + }); + + test('handles combining diacritical marks', () => { + const combining = 'e\u0301'; // e + combining acute accent + expect(pfx(combining, combining)).toEqual(2); + expect(pfx(combining + 'llo', combining)).toEqual(2); + expect(pfx('hello' + combining, 'hello' + combining)).toEqual(5 + 2); + + // Multiple combining marks + const multiCombining = 'a\u0301\u0302\u0303'; + expect(pfx(multiCombining, multiCombining)).toEqual(4); + }); + + test('handles variation selectors', () => { + const heartText = 'โค\uFE0E'; // text style + const heartEmoji = 'โค\uFE0F'; // emoji style + expect(pfx(heartText, heartText)).toEqual(2); + expect(pfx(heartEmoji, heartEmoji)).toEqual(2); + expect(pfx(heartText, heartEmoji)).toEqual(1); // Only the base character matches + }); + + test('handles mixed grapheme clusters', () => { + const chef = '๐Ÿ‘จโ€๐Ÿณ'; + const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; + const combined = chef + family; + expect(pfx(combined, combined)).toEqual(16); + expect(pfx(combined + 'text', combined)).toEqual(16); + expect(pfx('abc' + combined, 'abc' + combined)).toEqual(3 + 16); + }); }); describe('sfx()', () => { @@ -32,6 +81,68 @@ describe('sfx()', () => { expect(sfx('chef๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); expect(sfx('๐Ÿ‘จโ€๐Ÿณ๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); }); + + test('handles grapheme clusters with ZWJ (Zero Width Joiner)', () => { + const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; + expect(sfx(family, family)).toEqual(11); + expect(sfx('abc' + family, family)).toEqual(11); + expect(sfx('xyz' + family, 'abc' + family)).toEqual(11); + expect(sfx(family + 'suffix', family + 'suffix')).toEqual(6 + 11); + const womanTech = '๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ป'; + expect(sfx(womanTech, womanTech)).toEqual(7); + expect(sfx('code' + womanTech, womanTech)).toEqual(7); + expect(sfx(womanTech + 'hello', womanTech + 'hello')).toEqual(5 + 7); + }); + + test('handles flag emojis (regional indicators)', () => { + const usFlag = '๐Ÿ‡บ๐Ÿ‡ธ'; + const ukFlag = '๐Ÿ‡ฌ๐Ÿ‡ง'; + expect(sfx(usFlag, usFlag)).toEqual(4); + expect(sfx('USA' + usFlag, usFlag)).toEqual(4); + expect(sfx(usFlag, ukFlag)).toEqual(0); + expect(sfx(usFlag + 'hello', usFlag + 'hello')).toEqual(5 + 4); + }); + + test('handles combining diacritical marks', () => { + const combining = 'e\u0301'; // e + combining acute accent + expect(sfx(combining, combining)).toEqual(2); + expect(sfx('ell' + combining, combining)).toEqual(2); + expect(sfx(combining + 'hello', combining + 'hello')).toEqual(5 + 2); + const multiCombining = 'a\u0301\u0302\u0303'; // a with multiple accents + expect(sfx(multiCombining, multiCombining)).toEqual(4); + expect(sfx('text' + multiCombining, multiCombining)).toEqual(4); + }); + + test('handles variation selectors', () => { + const heartText = 'โค\uFE0E'; // text style + const heartEmoji = 'โค\uFE0F'; // emoji style + expect(sfx(heartText, heartText)).toEqual(2); + expect(sfx(heartEmoji, heartEmoji)).toEqual(2); + expect(sfx(heartText, heartEmoji)).toEqual(0); + expect(sfx('love' + heartEmoji, heartEmoji)).toEqual(2); + }); + + test('handles mixed grapheme clusters', () => { + const chef = '๐Ÿ‘จโ€๐Ÿณ'; + const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; + const combined = family + chef; + expect(sfx(combined, combined)).toEqual(16); + expect(sfx('text' + combined, combined)).toEqual(16); + expect(sfx(combined + 'abc', combined + 'abc')).toEqual(3 + 16); + }); + + test('does not split grapheme clusters at boundaries', () => { + const chef = '๐Ÿ‘จโ€๐Ÿณ'; + const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; + + // Ensure we don't split in the middle of a grapheme cluster + expect(sfx('x' + chef, chef)).toEqual(5); // full chef emoji + expect(sfx('xy' + family, family)).toEqual(11); // full family emoji + + // When the suffix is part of a larger grapheme, it should not match partially + expect(sfx('๐Ÿ‘จโ€๐Ÿณ๐Ÿ‘ฉ', '๐Ÿ‘ฉ')).toEqual(2); // Just the woman emoji at end + expect(sfx('text๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); // Full chef emoji + }); }); describe('normalize()', () => { @@ -273,6 +384,81 @@ describe('diff()', () => { assertPatch('a๐Ÿ™ƒb', 'a๐Ÿ‘‹b'); }); + test('grapheme clusters with ZWJ (Zero Width Joiner)', () => { + const chef = '๐Ÿ‘จโ€๐Ÿณ'; + const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; + const womanTech = '๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ป'; + assertPatch(chef, family); + assertPatch(family, chef); + assertPatch(womanTech, chef); + assertPatch('hello', 'hello' + chef); + assertPatch('hello', chef + 'hello'); + assertPatch('hello world', 'hello' + family + 'world'); + assertPatch('hello' + chef, 'hello'); + assertPatch(chef + 'hello', 'hello'); + assertPatch('hello' + family + 'world', 'helloworld'); + assertPatch(chef + family, family + chef); + assertPatch('a' + chef + 'b' + family + 'c', 'x' + family + 'y' + chef + 'z'); + assertPatch('The ' + chef + ' cooks', 'A ' + chef + ' bakes'); + assertPatch('Team: ' + family, 'Group: ' + womanTech); + }); + + test('flag emojis (regional indicators)', () => { + const ruFlag = '๐Ÿ‡ท๐Ÿ‡บ'; + const chFlag = '๐Ÿ‡จ๐Ÿ‡ณ'; + const inFlag = '๐Ÿ‡ฎ๐Ÿ‡ณ'; + assertPatch(ruFlag, chFlag); + assertPatch(chFlag, inFlag); + assertPatch('Made in ' + ruFlag, 'Made in ' + chFlag); + assertPatch(ruFlag + ' USA', chFlag + ' UK'); + assertPatch('Hello ' + ruFlag + ' world', 'Hello ' + inFlag + ' world'); + assertPatch(ruFlag + chFlag, chFlag + ruFlag); + assertPatch('Flags: ' + ruFlag + chFlag + inFlag, 'Flags: ' + inFlag + chFlag + ruFlag); + }); + + test('combining diacritical marks', () => { + const combining1 = 'e\u0301'; + const combining2 = 'e\u0300'; + const precomposed = 'รฉ'; + assertPatch(combining1, combining2); + assertPatch(combining1, precomposed); + assertPatch(precomposed, combining1); + assertPatch('cafe\u0301', 'cafรฉ'); + assertPatch('naรฏve', 'naive'); + assertPatch('rรฉsumรฉ', 'resume'); + const multiCombining = 'a\u0301\u0302\u0303'; + assertPatch('test' + multiCombining, 'test'); + assertPatch('test', 'test' + multiCombining); + }); + + test('variation selectors', () => { + const heartText = 'โค\uFE0E'; // text style + const heartEmoji = 'โค\uFE0F'; // emoji style + assertPatch(heartText, heartEmoji); + assertPatch(heartEmoji, heartText); + assertPatch('I ' + heartText + ' code', 'I ' + heartEmoji + ' code'); + assertPatch('Love ' + heartEmoji, 'Love ' + heartText); + }); + + test('complex grapheme clusters in real scenarios', () => { + const chef = '๐Ÿ‘จโ€๐Ÿณ'; + const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; + const womanTech = '๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ป'; + const usFlag = '๐Ÿ‡บ๐Ÿ‡ธ'; + assertPatch( + 'Hey ' + chef + ', dinner ready?', + 'Hi ' + womanTech + ', code ready?' + ); + assertPatch( + family + ' going to ' + usFlag, + family + ' staying home' + ); + assertPatch( + 'The ' + chef + ' from ' + usFlag + ' is amazing', + 'A ' + womanTech + ' from ' + usFlag + ' is brilliant' + ); + }); + test('same strings', () => { assertPatch('', ''); assertPatch('1', '1'); @@ -364,6 +550,33 @@ describe('diffEdit()', () => { assertDiffEdit('aaa', 'bbb', 'ccc'); assertDiffEdit('1', '2', '3'); }); + + test('handles grapheme cluster inserts and deletes', () => { + const chef = '๐Ÿ‘จโ€๐Ÿณ'; + const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; + const womanTech = '๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ป'; + const usFlag = '๐Ÿ‡บ๐Ÿ‡ธ'; + + // Insert grapheme clusters + assertDiffEdit('', chef, ''); + assertDiffEdit('Hello ', chef, ''); + assertDiffEdit('', chef, ' world'); + assertDiffEdit('Hello ', chef, ' world'); + assertDiffEdit('Team: ', family, ' rocks!'); + + // Insert multiple grapheme clusters + assertDiffEdit('', chef + family, ''); + assertDiffEdit('Coders: ', womanTech + chef, ' win'); + + // Insert with flags + assertDiffEdit('Made in ', usFlag, ''); + assertDiffEdit('', usFlag, ' USA'); + + // Combining characters + const combining = 'e\u0301'; + assertDiffEdit('caf', combining, ''); + assertDiffEdit('', combining, ' accent'); + }); }); describe('overlap()', () => { @@ -386,6 +599,21 @@ describe('overlap()', () => { expect(overlap('abc', 'abc')).toEqual(3); expect(overlap('a', 'a')).toEqual(1); }); + + test('handles grapheme clusters', () => { + const chef = '๐Ÿ‘จโ€๐Ÿณ'; + const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; + + // Overlap with grapheme clusters + expect(overlap('hello' + chef, chef + 'world')).toEqual(5); + expect(overlap('abc' + family, family + 'xyz')).toEqual(11); + + // No overlap when grapheme differs + expect(overlap('hello' + chef, family + 'world')).toEqual(0); + + // Text overlap with grapheme clusters + expect(overlap('prefix' + chef, chef + 'suffix')).toEqual(5); + }); }); describe('Unicode edge cases', () => { From 84abf6cc9a85f7813d085c1ac6dbcc070c42b4f4 Mon Sep 17 00:00:00 2001 From: streamich Date: Sat, 25 Oct 2025 00:06:09 +0200 Subject: [PATCH 4/4] =?UTF-8?q?style:=20=F0=9F=92=84=20run=20formatter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/util/diff/__tests__/str.spec.ts | 34 ++++++++----------- packages/json-joy/src/util/diff/str.ts | 6 ++-- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/packages/json-joy/src/util/diff/__tests__/str.spec.ts b/packages/json-joy/src/util/diff/__tests__/str.spec.ts index d5829a1f08..8e606d490d 100644 --- a/packages/json-joy/src/util/diff/__tests__/str.spec.ts +++ b/packages/json-joy/src/util/diff/__tests__/str.spec.ts @@ -42,7 +42,7 @@ describe('pfx()', () => { expect(pfx(combining, combining)).toEqual(2); expect(pfx(combining + 'llo', combining)).toEqual(2); expect(pfx('hello' + combining, 'hello' + combining)).toEqual(5 + 2); - + // Multiple combining marks const multiCombining = 'a\u0301\u0302\u0303'; expect(pfx(multiCombining, multiCombining)).toEqual(4); @@ -134,11 +134,11 @@ describe('sfx()', () => { test('does not split grapheme clusters at boundaries', () => { const chef = '๐Ÿ‘จโ€๐Ÿณ'; const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; - + // Ensure we don't split in the middle of a grapheme cluster expect(sfx('x' + chef, chef)).toEqual(5); // full chef emoji expect(sfx('xy' + family, family)).toEqual(11); // full family emoji - + // When the suffix is part of a larger grapheme, it should not match partially expect(sfx('๐Ÿ‘จโ€๐Ÿณ๐Ÿ‘ฉ', '๐Ÿ‘ฉ')).toEqual(2); // Just the woman emoji at end expect(sfx('text๐Ÿ‘จโ€๐Ÿณ', '๐Ÿ‘จโ€๐Ÿณ')).toEqual(5); // Full chef emoji @@ -445,17 +445,11 @@ describe('diff()', () => { const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; const womanTech = '๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ป'; const usFlag = '๐Ÿ‡บ๐Ÿ‡ธ'; - assertPatch( - 'Hey ' + chef + ', dinner ready?', - 'Hi ' + womanTech + ', code ready?' - ); - assertPatch( - family + ' going to ' + usFlag, - family + ' staying home' - ); + assertPatch('Hey ' + chef + ', dinner ready?', 'Hi ' + womanTech + ', code ready?'); + assertPatch(family + ' going to ' + usFlag, family + ' staying home'); assertPatch( 'The ' + chef + ' from ' + usFlag + ' is amazing', - 'A ' + womanTech + ' from ' + usFlag + ' is brilliant' + 'A ' + womanTech + ' from ' + usFlag + ' is brilliant', ); }); @@ -556,22 +550,22 @@ describe('diffEdit()', () => { const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; const womanTech = '๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ป'; const usFlag = '๐Ÿ‡บ๐Ÿ‡ธ'; - + // Insert grapheme clusters assertDiffEdit('', chef, ''); assertDiffEdit('Hello ', chef, ''); assertDiffEdit('', chef, ' world'); assertDiffEdit('Hello ', chef, ' world'); assertDiffEdit('Team: ', family, ' rocks!'); - + // Insert multiple grapheme clusters assertDiffEdit('', chef + family, ''); assertDiffEdit('Coders: ', womanTech + chef, ' win'); - + // Insert with flags assertDiffEdit('Made in ', usFlag, ''); assertDiffEdit('', usFlag, ' USA'); - + // Combining characters const combining = 'e\u0301'; assertDiffEdit('caf', combining, ''); @@ -603,14 +597,14 @@ describe('overlap()', () => { test('handles grapheme clusters', () => { const chef = '๐Ÿ‘จโ€๐Ÿณ'; const family = '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ'; - + // Overlap with grapheme clusters expect(overlap('hello' + chef, chef + 'world')).toEqual(5); expect(overlap('abc' + family, family + 'xyz')).toEqual(11); - + // No overlap when grapheme differs expect(overlap('hello' + chef, family + 'world')).toEqual(0); - + // Text overlap with grapheme clusters expect(overlap('prefix' + chef, chef + 'suffix')).toEqual(5); }); @@ -666,7 +660,7 @@ describe('Unicode edge cases', () => { assertPatch(`hello ${nfc}`, `hello ${nfd}`); }); - test('handles complex emoji with ZWJ sequences', () => { + test('handles complex emoji with ZWJ sequences', () => { const chefEmoji = '๐Ÿ‘จโ€๐Ÿณ'; // chef emoji (man + ZWJ + cooking) const src = chefEmoji; const dst = 'chef' + chefEmoji; diff --git a/packages/json-joy/src/util/diff/str.ts b/packages/json-joy/src/util/diff/str.ts index c76ff0e6ea..26cd4fcddc 100644 --- a/packages/json-joy/src/util/diff/str.ts +++ b/packages/json-joy/src/util/diff/str.ts @@ -434,11 +434,11 @@ export const sfx = (txt1: string, txt2: string): number => { const boundaryPos = txt1.length - mid - 1; const code = txt1.charCodeAt(boundaryPos); const isHighSurrogate = code >= 0xd800 && code <= 0xdbff; - const isCombining = + const isCombining = code === 0x200d || // ZWJ (code >= 0xfe00 && code <= 0xfe0f) || // Variation selectors (code >= 0x0300 && code <= 0x036f); // Combining diacritical marks - + if (isHighSurrogate || isCombining) { // We're splitting a grapheme cluster. Walk backwards to include the full cluster. mid--; @@ -447,7 +447,7 @@ export const sfx = (txt1: string, txt2: string): number => { if (pos < 0) break; const prevCode = txt1.charCodeAt(pos); const isPrevHighSurrogate = prevCode >= 0xd800 && prevCode <= 0xdbff; - const isPrevCombining = + const isPrevCombining = prevCode === 0x200d || (prevCode >= 0xfe00 && prevCode <= 0xfe0f) || (prevCode >= 0x0300 && prevCode <= 0x036f);