u8array · u8array · May 8, 2026 · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/src/lib/zplParser.test.ts b/src/lib/zplParser.test.ts
@@ -257,6 +257,69 @@ describe('parseZPL — ^FH hex escape', () => {
     const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_41BC^FS^XZ', 8);
     expect(props(objects[0]).content).toBe('ABC');
   });
+
+  it('decodes UTF-8 multibyte escapes (German umlauts)', () => {
+    // _C3_A4 = ä, _C3_B6 = ö, _C3_BC = ü
+    const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3_A4_C3_B6_C3_BC^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('äöü');
+  });
+
+  it('decodes UTF-8 multibyte escapes (Nordic)', () => {
+    // _C3_A6 = æ, _C3_B8 = ø, _C3_A5 = å
+    const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3_A6_C3_B8_C3_A5^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('æøå');
+  });
+
+  it('decodes 3-byte UTF-8 escapes (Euro sign)', () => {
+    // _E2_82_AC = €
+    const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_E2_82_AC^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('€');
+  });
+
+  it('decodes mixed ASCII and UTF-8 escapes in one field', () => {
+    // _48 = H, _69 = i, then ä
+    const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_48_69 _C3_A4^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('Hi ä');
+  });
+
+  it('replaces invalid UTF-8 byte sequences with U+FFFD', () => {
+    // _C3 alone is a truncated 2-byte sequence
+    const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('�');
+  });
+
+  it('decodes ^CI27 (Windows-1252) single-byte escapes', () => {
+    // _E4 = 0xE4 = ä in CP1252 (in UTF-8 this would be invalid → U+FFFD)
+    const { objects } = parseZPL('^XA^CI27^FH_^FO0,0^A0N,30,0^FD_E4_F6_FC^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('äöü');
+  });
+
+  it('switches encoding mid-label on ^CI', () => {
+    // first field UTF-8 (default), second field CP1252
+    const zpl =
+      '^XA^FH_^FO0,0^A0N,30,0^FD_C3_A4^FS' +
+      '^CI27^FH_^FO0,50^A0N,30,0^FD_E4^FS^XZ';
+    const { objects } = parseZPL(zpl, 8);
+    expect(props(objects[0]).content).toBe('ä');
+    expect(props(objects[1]).content).toBe('ä');
+  });
+
+  it('reports unsupported ^CI N as partial import', () => {
+    // ^CI50 is not a real Zebra encoding — falls back to UTF-8 default
+    const { importReport } = parseZPL('^XA^CI50^FH_^FO0,0^A0N,30,0^FDx^FS^XZ', 8);
+    expect(importReport.partial).toContain('^CI50');
+  });
+
+  it('resets decoder to UTF-8 default on unsupported ^CI', () => {
+    // After ^CI27 sets CP1252, an unknown ^CI50 must fall back to UTF-8
+    // (not keep CP1252) so behaviour is predictable.
+    const zpl =
+      '^XA^CI27^FH_^FO0,0^A0N,30,0^FD_E4^FS' +
+      '^CI50^FH_^FO0,50^A0N,30,0^FD_C3_A4^FS^XZ';
+    const { objects } = parseZPL(zpl, 8);
+    expect(props(objects[0]).content).toBe('ä');  // CP1252
+    expect(props(objects[1]).content).toBe('ä');  // UTF-8 (after reset)
+  });
 });
 
 // ── ^FB field block ───────────────────────────────────────────────────────────

diff --git a/src/lib/zplParser.ts b/src/lib/zplParser.ts
@@ -89,12 +89,49 @@ function makeObj(
   } as unknown as LabelObject;
 }
 
-/** Decode ^FH hex escapes: replaces {delimiter}XX with the character for hex XX */
-function decodeFH(text: string, delimiter: string): string {
+/**
+ * Map a ^CI N parameter to a TextDecoder label. Most labels printed by this
+ * app use ^CI28 (UTF-8); ^CI27 is Windows-1252 (Zebra default for many EU
+ * setups); legacy ^CI0..13 are 7-bit-ASCII-compatible code-page variants for
+ * which Windows-1252 is a safe superset for the purposes of `^FH` decoding.
+ * Unsupported encodings (multi-byte UTF-16/32 variants, code page 850, …)
+ * fall back to UTF-8 with the command surfaced via importReport.partial.
+ */
+function ciToEncoding(n: number): { label: string; supported: boolean } {
+  if (n === 28) return { label: "utf-8", supported: true };
+  if (n === 27) return { label: "windows-1252", supported: true };
+  if (n >= 0 && n <= 13) return { label: "windows-1252", supported: true };
+  return { label: "utf-8", supported: false };
+}
+
+const decoderCache = new Map<string, TextDecoder>();
+function getDecoder(label: string): TextDecoder {
+  let dec = decoderCache.get(label);
+  if (!dec) {
+    dec = new TextDecoder(label);
+    decoderCache.set(label, dec);
+  }
+  return dec;
+}
+
+/**
+ * Decode ^FH hex escapes: replaces runs of {delimiter}XX with the string for
+ * the byte sequence XX XX … under the active ^CI encoding. A single non-ASCII
+ * glyph may span multiple escape pairs (e.g. `_C3_A4` → `ä` under UTF-8), so
+ * we collect contiguous pairs into a Uint8Array and run one TextDecoder pass
+ * per run. Invalid byte sequences become U+FFFD (decoder default).
+ */
+function decodeFH(text: string, delimiter: string, decoder: TextDecoder): string {
   const escaped = delimiter.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-  return text.replace(new RegExp(`${escaped}([0-9A-Fa-f]{2})`, "g"), (_, hex) =>
-    String.fromCharCode(parseInt(hex, 16)),
-  );
+  const runRe = new RegExp(`(?:${escaped}[0-9A-Fa-f]{2})+`, "g");
+  const stride = delimiter.length + 2;
+  return text.replace(runRe, (run) => {
+    const bytes = new Uint8Array(run.length / stride);
+    for (let i = 0, b = 0; i < run.length; i += stride, b++) {
+      bytes[b] = parseInt(run.slice(i + delimiter.length, i + stride), 16);
+    }
+    return decoder.decode(bytes);
+  });
 }
 
 /**
@@ -242,6 +279,11 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL {
   let fhActive = false;
   let fhDelimiter = "_";
 
+  // ^CI state (character set / encoding for ^FH byte decoding). Default UTF-8
+  // matches our generator output; legacy ZPL using ^CI27 / ^CI0..13 sets a
+  // single-byte decoder before ^FH escapes are processed.
+  let fhDecoder = getDecoder("utf-8");
+
   // ^FT vs ^FO: store position type so we can reproduce exactly in re-export.
   let positionIsFT = false;
 
@@ -285,7 +327,7 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL {
 
   const flushField = () => {
     if (!fieldType || pendingFD === null) return;
-    const content = fhActive ? decodeFH(pendingFD, fhDelimiter) : pendingFD;
+    const content = fhActive ? decodeFH(pendingFD, fhDelimiter, fhDecoder) : pendingFD;
     const posType: "FT" | "FO" = positionIsFT ? "FT" : "FO";
     const comment = takeComment();
 
@@ -1170,9 +1212,17 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL {
     // assembled text reaches the next field object as one multi-line comment.
     FX: appendComment,
 
+    // ^CI N: character set / encoding for ^FH byte decoding. Mapped to a
+    // TextDecoder; unsupported variants (UTF-16/32, code page 850) keep the
+    // current decoder and surface as a partial import.
+    CI: (p) => {
+      const enc = ciToEncoding(int(p[0]));
+      fhDecoder = getDecoder(enc.label);
+      if (!enc.supported) partialCmds.add(`^CI${int(p[0])}`);
+    },
+
     // These commands carry no canvas-design information and are silently
     // discarded so they do not pollute importReport.unknown.
-    CI: noop, // character set encoding (^CI28 = UTF-8 is the browser default)
     FN: noop, // field number — variable data placeholder (template feature)
     FV: noop, // field variable — supplies data for ^FN at print time
     FC: noop, // field clock — inserts date/time (requires printer RTC)