|
| 1 | +import { spawnSync } from "child_process"; |
| 2 | + |
| 3 | +const IMPORT = `{ Tokenizer }`; |
| 4 | +const MODULE_NAME = "@huggingface/tokenizers"; |
| 5 | + |
| 6 | +const CODE_BODY = ` |
| 7 | +const modelId = "hf-internal-testing/tiny-random-LlamaForCausalLM"; |
| 8 | +const tokenizerJson = await fetch(\`https://huggingface.co/\${modelId}/resolve/main/tokenizer.json\`).then(res => res.json()); |
| 9 | +const tokenizerConfig = await fetch(\`https://huggingface.co/\${modelId}/resolve/main/tokenizer_config.json\`).then(res => res.json()); |
| 10 | +
|
| 11 | +// Create tokenizer |
| 12 | +const tokenizer = new Tokenizer(tokenizerJson, tokenizerConfig); |
| 13 | +
|
| 14 | +// Tokenize text |
| 15 | +const tokens = tokenizer.tokenize('Hello World'); |
| 16 | +const encoded = tokenizer.encode('Hello World'); |
| 17 | +const decoded = tokenizer.decode(encoded); |
| 18 | +
|
| 19 | +console.log(tokens); |
| 20 | +console.log(encoded); |
| 21 | +console.log(decoded); |
| 22 | +`; |
| 23 | + |
| 24 | +const TARGET_OUTPUT = "[ '▁Hello', '▁World' ]\n[ 1, 15043, 2787 ]\n<s> Hello World\n"; |
| 25 | + |
| 26 | +const wrap_async_iife = (code: string) => `(async function() { ${code} })();`; |
| 27 | + |
| 28 | +const check = (code: string, module = false) => { |
| 29 | + const args = ["-e", code]; |
| 30 | + if (module) args.push("--input-type=module"); |
| 31 | + const { status, stdout, stderr } = spawnSync("node", args); |
| 32 | + expect(stderr.toString()).toEqual(""); // No warnings or errors are printed |
| 33 | + expect(stdout.toString()).toEqual(TARGET_OUTPUT); // The output should match |
| 34 | + expect(status).toEqual(0); // The process should exit cleanly |
| 35 | +}; |
| 36 | + |
| 37 | +describe("Testing the bundle", () => { |
| 38 | + it("ECMAScript Module (ESM)", () => { |
| 39 | + check(`import ${IMPORT} from "${MODULE_NAME}";${CODE_BODY}`, true); |
| 40 | + }); |
| 41 | + |
| 42 | + it("CommonJS (CJS) with require", () => { |
| 43 | + check(`const ${IMPORT} = require("${MODULE_NAME}");${wrap_async_iife(CODE_BODY)}`); |
| 44 | + }); |
| 45 | + |
| 46 | + it("CommonJS (CJS) with dynamic import", () => { |
| 47 | + check(`${wrap_async_iife(`const ${IMPORT} = await import("${MODULE_NAME}");${CODE_BODY}`)}`); |
| 48 | + }); |
| 49 | +}); |
0 commit comments