Skip to content

Commit bfd24cb

Browse files
authored
Merge pull request #1 from huggingface/types
package.json updates & move built types to separate folder
2 parents c139d2b + 5a1687f commit bfd24cb

File tree

7 files changed

+104
-41
lines changed

7 files changed

+104
-41
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ node_modules
1212
dist
1313
dist-ssr
1414
*.local
15+
types
1516

1617
# Editor directories and files
1718
.vscode/*

.prettierrc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"overrides": [
3+
{
4+
"files": ["tests/**/*.ts"],
5+
"options": {
6+
"printWidth": 10000000
7+
}
8+
}
9+
]
10+
}

package.json

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,33 @@
11
{
22
"name": "@huggingface/tokenizers",
33
"version": "0.0.1",
4-
"description": "",
5-
"main": "dist/tokenizers.cjs",
4+
"description": "🤗 Tokenizers.js: A pure JS/TS implementation of today's most used tokenizers",
65
"type": "module",
7-
"module": "dist/tokenizers.mjs",
8-
"types": "dist/index.d.ts",
6+
"main": "dist/tokenizers.min.mjs",
7+
"browser": "dist/tokenizers.min.mjs",
8+
"module": "dist/tokenizers.min.mjs",
9+
"types": "types/index.d.ts",
910
"exports": {
1011
".": {
11-
"types": "./dist/index.d.ts",
12+
"types": "./types/index.d.ts",
1213
"node": {
13-
"require": "./dist/tokenizers.cjs",
14-
"import": "./dist/tokenizers.mjs"
14+
"require": "./dist/tokenizers.min.cjs",
15+
"import": "./dist/tokenizers.min.mjs"
1516
},
1617
"browser": {
17-
"import": "./dist/tokenizers.mjs"
18+
"import": "./dist/tokenizers.min.mjs"
1819
},
19-
"default": "./dist/tokenizers.mjs"
20+
"default": "./dist/tokenizers.min.mjs"
2021
}
2122
},
2223
"files": [
2324
"dist",
24-
"README.md"
25+
"types",
26+
"README.md",
27+
"LICENSE"
2528
],
2629
"scripts": {
27-
"clean": "rimraf dist",
30+
"clean": "rimraf dist types",
2831
"build": "npm run clean && node scripts/build.mjs",
2932
"dev": "npm run clean && node scripts/dev.mjs",
3033
"lint": "eslint src --ext .ts,.tsx",

scripts/build.mjs

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,11 @@
1-
import { build } from "esbuild";
1+
import { build as esbuild } from "esbuild";
22
import { execSync } from "node:child_process";
33
import { readFileSync } from "node:fs";
44
import { gzipSync } from "node:zlib";
55

66
console.log("Generating TypeScript declarations...");
77
execSync("tsc -p tsconfig.build.json", { stdio: "inherit" });
88

9-
const config = {
10-
bundle: true,
11-
minify: true,
12-
minifySyntax: true,
13-
treeShaking: true,
14-
logLevel: "silent",
15-
entryPoints: ["src/index.ts"],
16-
platform: "neutral",
17-
metafile: true,
18-
};
19-
209
const formatSize = (bytes) => {
2110
if (bytes < 1024) return `${bytes}b`;
2211
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}kb`;
@@ -34,16 +23,27 @@ const reportSize = (outfile) => {
3423
console.log(`⚡ Done\n`);
3524
};
3625

37-
await build({
38-
...config,
39-
format: "esm",
40-
outfile: "dist/tokenizers.mjs",
41-
});
42-
reportSize("dist/tokenizers.mjs");
26+
const build = async (outfile) => {
27+
const format = outfile.endsWith(".mjs") ? "esm" : "cjs";
28+
const minifyOptions = /\.min\.[cm]js$/.test(outfile)
29+
? { minify: true, minifySyntax: true }
30+
: {};
31+
32+
await esbuild({
33+
bundle: true,
34+
treeShaking: true,
35+
logLevel: "silent",
36+
entryPoints: ["src/index.ts"],
37+
platform: "neutral",
38+
metafile: true,
39+
format,
40+
outfile,
41+
...minifyOptions,
42+
});
43+
reportSize(outfile);
44+
}
4345

44-
await build({
45-
...config,
46-
format: "cjs",
47-
outfile: "dist/tokenizers.cjs",
48-
});
49-
reportSize("dist/tokenizers.cjs");
46+
await build("dist/tokenizers.mjs");
47+
await build("dist/tokenizers.cjs");
48+
await build("dist/tokenizers.min.mjs");
49+
await build("dist/tokenizers.min.cjs");

tests/bundle.test.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { spawnSync } from "child_process";
2+
3+
const IMPORT = `{ Tokenizer }`;
4+
const MODULE_NAME = "@huggingface/tokenizers";
5+
6+
const CODE_BODY = `
7+
const modelId = "hf-internal-testing/tiny-random-LlamaForCausalLM";
8+
const tokenizerJson = await fetch(\`https://huggingface.co/\${modelId}/resolve/main/tokenizer.json\`).then(res => res.json());
9+
const tokenizerConfig = await fetch(\`https://huggingface.co/\${modelId}/resolve/main/tokenizer_config.json\`).then(res => res.json());
10+
11+
// Create tokenizer
12+
const tokenizer = new Tokenizer(tokenizerJson, tokenizerConfig);
13+
14+
// Tokenize text
15+
const tokens = tokenizer.tokenize('Hello World');
16+
const encoded = tokenizer.encode('Hello World');
17+
const decoded = tokenizer.decode(encoded);
18+
19+
console.log(tokens);
20+
console.log(encoded);
21+
console.log(decoded);
22+
`;
23+
24+
const TARGET_OUTPUT = "[ '▁Hello', '▁World' ]\n[ 1, 15043, 2787 ]\n<s> Hello World\n";
25+
26+
const wrap_async_iife = (code: string) => `(async function() { ${code} })();`;
27+
28+
const check = (code: string, module = false) => {
29+
const args = ["-e", code];
30+
if (module) args.push("--input-type=module");
31+
const { status, stdout, stderr } = spawnSync("node", args);
32+
expect(stderr.toString()).toEqual(""); // No warnings or errors are printed
33+
expect(stdout.toString()).toEqual(TARGET_OUTPUT); // The output should match
34+
expect(status).toEqual(0); // The process should exit cleanly
35+
};
36+
37+
describe("Testing the bundle", () => {
38+
it("ECMAScript Module (ESM)", () => {
39+
check(`import ${IMPORT} from "${MODULE_NAME}";${CODE_BODY}`, true);
40+
});
41+
42+
it("CommonJS (CJS) with require", () => {
43+
check(`const ${IMPORT} = require("${MODULE_NAME}");${wrap_async_iife(CODE_BODY)}`);
44+
});
45+
46+
it("CommonJS (CJS) with dynamic import", () => {
47+
check(`${wrap_async_iife(`const ${IMPORT} = await import("${MODULE_NAME}");${CODE_BODY}`)}`);
48+
});
49+
});

tsconfig.build.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
"extends": "./tsconfig.json",
44
"compilerOptions": {
55
"declaration": true,
6-
"declarationDir": "dist",
6+
"declarationDir": "types",
77
"emitDeclarationOnly": true,
8-
"outDir": "dist",
8+
"outDir": "types",
99
"noEmit": false
1010
},
11-
"include": ["src/index.ts", "types"]
11+
"include": ["src/index.ts"]
1212
}

tsconfig.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
"esModuleInterop": true,
99
"skipLibCheck": true,
1010
"declaration": true,
11-
"declarationDir": "dist",
12-
"outDir": "dist",
11+
"declarationDir": "types",
12+
"outDir": "types",
1313
"strict": true,
1414
"sourceMap": true,
1515
"strictNullChecks": false,
@@ -21,5 +21,5 @@
2121
"@static/*": ["src/static/*"]
2222
}
2323
},
24-
"include": ["src/**/*", "types"]
24+
"include": ["src/**/*"]
2525
}

0 commit comments

Comments
 (0)