diff --git a/src/lib/zplParser.test.ts b/src/lib/zplParser.test.ts index d54a3ce..712bc7c 100644 --- a/src/lib/zplParser.test.ts +++ b/src/lib/zplParser.test.ts @@ -257,6 +257,69 @@ describe('parseZPL — ^FH hex escape', () => { const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_41BC^FS^XZ', 8); expect(props(objects[0]).content).toBe('ABC'); }); + + it('decodes UTF-8 multibyte escapes (German umlauts)', () => { + // _C3_A4 = ä, _C3_B6 = ö, _C3_BC = ü + const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3_A4_C3_B6_C3_BC^FS^XZ', 8); + expect(props(objects[0]).content).toBe('äöü'); + }); + + it('decodes UTF-8 multibyte escapes (Nordic)', () => { + // _C3_A6 = æ, _C3_B8 = ø, _C3_A5 = å + const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3_A6_C3_B8_C3_A5^FS^XZ', 8); + expect(props(objects[0]).content).toBe('æøå'); + }); + + it('decodes 3-byte UTF-8 escapes (Euro sign)', () => { + // _E2_82_AC = € + const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_E2_82_AC^FS^XZ', 8); + expect(props(objects[0]).content).toBe('€'); + }); + + it('decodes mixed ASCII and UTF-8 escapes in one field', () => { + // _48 = H, _69 = i, then ä + const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_48_69 _C3_A4^FS^XZ', 8); + expect(props(objects[0]).content).toBe('Hi ä'); + }); + + it('replaces invalid UTF-8 byte sequences with U+FFFD', () => { + // _C3 alone is a truncated 2-byte sequence + const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3^FS^XZ', 8); + expect(props(objects[0]).content).toBe('�'); + }); + + it('decodes ^CI27 (Windows-1252) single-byte escapes', () => { + // _E4 = 0xE4 = ä in CP1252 (in UTF-8 this would be invalid → U+FFFD) + const { objects } = parseZPL('^XA^CI27^FH_^FO0,0^A0N,30,0^FD_E4_F6_FC^FS^XZ', 8); + expect(props(objects[0]).content).toBe('äöü'); + }); + + it('switches encoding mid-label on ^CI', () => { + // first field UTF-8 (default), second field CP1252 + const zpl = + '^XA^FH_^FO0,0^A0N,30,0^FD_C3_A4^FS' + + '^CI27^FH_^FO0,50^A0N,30,0^FD_E4^FS^XZ'; + const { objects } = parseZPL(zpl, 8); + expect(props(objects[0]).content).toBe('ä'); + expect(props(objects[1]).content).toBe('ä'); + }); + + it('reports unsupported ^CI N as partial import', () => { + // ^CI50 is not a real Zebra encoding — falls back to UTF-8 default + const { importReport } = parseZPL('^XA^CI50^FH_^FO0,0^A0N,30,0^FDx^FS^XZ', 8); + expect(importReport.partial).toContain('^CI50'); + }); + + it('resets decoder to UTF-8 default on unsupported ^CI', () => { + // After ^CI27 sets CP1252, an unknown ^CI50 must fall back to UTF-8 + // (not keep CP1252) so behaviour is predictable. + const zpl = + '^XA^CI27^FH_^FO0,0^A0N,30,0^FD_E4^FS' + + '^CI50^FH_^FO0,50^A0N,30,0^FD_C3_A4^FS^XZ'; + const { objects } = parseZPL(zpl, 8); + expect(props(objects[0]).content).toBe('ä'); // CP1252 + expect(props(objects[1]).content).toBe('ä'); // UTF-8 (after reset) + }); }); // ── ^FB field block ─────────────────────────────────────────────────────────── diff --git a/src/lib/zplParser.ts b/src/lib/zplParser.ts index d605056..02a6c1f 100644 --- a/src/lib/zplParser.ts +++ b/src/lib/zplParser.ts @@ -89,12 +89,49 @@ function makeObj( } as unknown as LabelObject; } -/** Decode ^FH hex escapes: replaces {delimiter}XX with the character for hex XX */ -function decodeFH(text: string, delimiter: string): string { +/** + * Map a ^CI N parameter to a TextDecoder label. Most labels printed by this + * app use ^CI28 (UTF-8); ^CI27 is Windows-1252 (Zebra default for many EU + * setups); legacy ^CI0..13 are 7-bit-ASCII-compatible code-page variants for + * which Windows-1252 is a safe superset for the purposes of `^FH` decoding. + * Unsupported encodings (multi-byte UTF-16/32 variants, code page 850, …) + * fall back to UTF-8 with the command surfaced via importReport.partial. + */ +function ciToEncoding(n: number): { label: string; supported: boolean } { + if (n === 28) return { label: "utf-8", supported: true }; + if (n === 27) return { label: "windows-1252", supported: true }; + if (n >= 0 && n <= 13) return { label: "windows-1252", supported: true }; + return { label: "utf-8", supported: false }; +} + +const decoderCache = new Map(); +function getDecoder(label: string): TextDecoder { + let dec = decoderCache.get(label); + if (!dec) { + dec = new TextDecoder(label); + decoderCache.set(label, dec); + } + return dec; +} + +/** + * Decode ^FH hex escapes: replaces runs of {delimiter}XX with the string for + * the byte sequence XX XX … under the active ^CI encoding. A single non-ASCII + * glyph may span multiple escape pairs (e.g. `_C3_A4` → `ä` under UTF-8), so + * we collect contiguous pairs into a Uint8Array and run one TextDecoder pass + * per run. Invalid byte sequences become U+FFFD (decoder default). + */ +function decodeFH(text: string, delimiter: string, decoder: TextDecoder): string { const escaped = delimiter.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); - return text.replace(new RegExp(`${escaped}([0-9A-Fa-f]{2})`, "g"), (_, hex) => - String.fromCharCode(parseInt(hex, 16)), - ); + const runRe = new RegExp(`(?:${escaped}[0-9A-Fa-f]{2})+`, "g"); + const stride = delimiter.length + 2; + return text.replace(runRe, (run) => { + const bytes = new Uint8Array(run.length / stride); + for (let i = 0, b = 0; i < run.length; i += stride, b++) { + bytes[b] = parseInt(run.slice(i + delimiter.length, i + stride), 16); + } + return decoder.decode(bytes); + }); } /** @@ -242,6 +279,11 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL { let fhActive = false; let fhDelimiter = "_"; + // ^CI state (character set / encoding for ^FH byte decoding). Default UTF-8 + // matches our generator output; legacy ZPL using ^CI27 / ^CI0..13 sets a + // single-byte decoder before ^FH escapes are processed. + let fhDecoder = getDecoder("utf-8"); + // ^FT vs ^FO: store position type so we can reproduce exactly in re-export. let positionIsFT = false; @@ -285,7 +327,7 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL { const flushField = () => { if (!fieldType || pendingFD === null) return; - const content = fhActive ? decodeFH(pendingFD, fhDelimiter) : pendingFD; + const content = fhActive ? decodeFH(pendingFD, fhDelimiter, fhDecoder) : pendingFD; const posType: "FT" | "FO" = positionIsFT ? "FT" : "FO"; const comment = takeComment(); @@ -1170,9 +1212,17 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL { // assembled text reaches the next field object as one multi-line comment. FX: appendComment, + // ^CI N: character set / encoding for ^FH byte decoding. Mapped to a + // TextDecoder; unsupported variants (UTF-16/32, code page 850) keep the + // current decoder and surface as a partial import. + CI: (p) => { + const enc = ciToEncoding(int(p[0])); + fhDecoder = getDecoder(enc.label); + if (!enc.supported) partialCmds.add(`^CI${int(p[0])}`); + }, + // These commands carry no canvas-design information and are silently // discarded so they do not pollute importReport.unknown. - CI: noop, // character set encoding (^CI28 = UTF-8 is the browser default) FN: noop, // field number — variable data placeholder (template feature) FV: noop, // field variable — supplies data for ^FN at print time FC: noop, // field clock — inserts date/time (requires printer RTC)