diff --git a/.changeset/odd-insects-vanish.md b/.changeset/odd-insects-vanish.md index 542af1e97e0009f5209622c21805d02a80a5ebad..ac0fc1a82bf74bb6792a2b4f79091709cdaface1 100644 --- a/.changeset/odd-insects-vanish.md +++ b/.changeset/odd-insects-vanish.md @@ -2,4 +2,4 @@ "@llamaindex/env": patch --- -Fix TypeError: Module "..." needs an import attribute of "type: json" +Use tiktoken instead of tiktoken/lite and disable WASM tiktoken for non-Node environments diff --git a/packages/env/src/index.workerd.ts b/packages/env/src/index.workerd.ts index 1b0d683862ae150bc646dcd79552826f85f2d9db..4f4a5a9f4992ddda585157e13ea4eefdfd8f6694 100644 --- a/packages/env/src/index.workerd.ts +++ b/packages/env/src/index.workerd.ts @@ -13,4 +13,4 @@ export function getEnv(name: string): string | undefined { return INTERNAL_ENV[name]; } -export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/node.js"; +export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js"; diff --git a/packages/env/src/tokenizers/node.ts b/packages/env/src/tokenizers/node.ts index 4a6e6c9f7b935302db9626662986fbb7159bba5f..0485194aa88a78b5ab0316eb692dead78f95dcc7 100644 --- a/packages/env/src/tokenizers/node.ts +++ b/packages/env/src/tokenizers/node.ts @@ -1,18 +1,14 @@ // Note: This is using th WASM implementation of tiktoken which is 60x faster -import cl100k_base from "tiktoken/encoders/cl100k_base.json" with { type: "json" }; -import { Tiktoken } from "tiktoken/lite"; import type { Tokenizer } from "./types.js"; import { Tokenizers } from "./types.js"; +import { get_encoding } from "tiktoken"; + class TokenizerSingleton { private defaultTokenizer: Tokenizer; constructor() { - const encoding = new Tiktoken( - cl100k_base.bpe_ranks, - cl100k_base.special_tokens, - cl100k_base.pat_str, - ); + const encoding = get_encoding("cl100k_base"); this.defaultTokenizer = { encode: (text: string) => {