Skip to content
Snippets Groups Projects
Unverified Commit 20498a2b authored by Marcus Schiesser's avatar Marcus Schiesser Committed by GitHub
Browse files

fix: use tiktoken instead of tiktoken/lite (#967)

parent 0730140e
No related branches found
No related tags found
No related merge requests found
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
"@llamaindex/env": patch "@llamaindex/env": patch
--- ---
Fix TypeError: Module "..." needs an import attribute of "type: json" Use tiktoken instead of tiktoken/lite and disable WASM tiktoken for non-Node environments
...@@ -13,4 +13,4 @@ export function getEnv(name: string): string | undefined { ...@@ -13,4 +13,4 @@ export function getEnv(name: string): string | undefined {
return INTERNAL_ENV[name]; return INTERNAL_ENV[name];
} }
export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/node.js"; export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";
// Note: This is using th WASM implementation of tiktoken which is 60x faster // Note: This is using th WASM implementation of tiktoken which is 60x faster
import cl100k_base from "tiktoken/encoders/cl100k_base.json" with { type: "json" };
import { Tiktoken } from "tiktoken/lite";
import type { Tokenizer } from "./types.js"; import type { Tokenizer } from "./types.js";
import { Tokenizers } from "./types.js"; import { Tokenizers } from "./types.js";
import { get_encoding } from "tiktoken";
class TokenizerSingleton { class TokenizerSingleton {
private defaultTokenizer: Tokenizer; private defaultTokenizer: Tokenizer;
constructor() { constructor() {
const encoding = new Tiktoken( const encoding = get_encoding("cl100k_base");
cl100k_base.bpe_ranks,
cl100k_base.special_tokens,
cl100k_base.pat_str,
);
this.defaultTokenizer = { this.defaultTokenizer = {
encode: (text: string) => { encode: (text: string) => {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment