Skip to content
Snippets Groups Projects
Commit 8a1385b9 authored by Yi Ding's avatar Yi Ding
Browse files

migrated to tiktoken lite

Hopefully fixes the Windows issue
parent a52143b0
Branches
Tags
No related merge requests found
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
"pdf-parse": "^1.1.1", "pdf-parse": "^1.1.1",
"rake-modified": "^1.0.8", "rake-modified": "^1.0.8",
"replicate": "^0.18.1", "replicate": "^0.18.1",
"tiktoken-node": "^0.0.6", "tiktoken": "^1.0.10",
"uuid": "^9.0.1", "uuid": "^9.0.1",
"wink-nlp": "^1.14.3" "wink-nlp": "^1.14.3"
}, },
......
import cl100k_base from "tiktoken/encoders/cl100k_base.json";
import { Tiktoken } from "tiktoken/lite";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Event, EventTag, EventType } from "./callbacks/CallbackManager"; import { Event, EventTag, EventType } from "./callbacks/CallbackManager";
...@@ -6,14 +9,30 @@ import { Event, EventTag, EventType } from "./callbacks/CallbackManager"; ...@@ -6,14 +9,30 @@ import { Event, EventTag, EventType } from "./callbacks/CallbackManager";
*/ */
class GlobalsHelper { class GlobalsHelper {
defaultTokenizer: { defaultTokenizer: {
encode: (text: string) => number[]; encode: (text: string) => Uint32Array;
decode: (tokens: number[]) => string; decode: (tokens: Uint32Array) => string;
} | null = null; } | null = null;
private initDefaultTokenizer() {
const encoding = new Tiktoken(
cl100k_base.bpe_ranks,
cl100k_base.special_tokens,
cl100k_base.pat_str,
);
this.defaultTokenizer = {
encode: (text: string) => {
return encoding.encode(text);
},
decode: (tokens: Uint32Array) => {
return new TextDecoder().decode(encoding.decode(tokens));
},
};
}
tokenizer() { tokenizer() {
if (!this.defaultTokenizer) { if (!this.defaultTokenizer) {
const tiktoken = require("tiktoken-node"); this.initDefaultTokenizer();
this.defaultTokenizer = tiktoken.getEncoding("gpt2");
} }
return this.defaultTokenizer!.encode.bind(this.defaultTokenizer); return this.defaultTokenizer!.encode.bind(this.defaultTokenizer);
...@@ -21,8 +40,7 @@ class GlobalsHelper { ...@@ -21,8 +40,7 @@ class GlobalsHelper {
tokenizerDecoder() { tokenizerDecoder() {
if (!this.defaultTokenizer) { if (!this.defaultTokenizer) {
const tiktoken = require("tiktoken-node"); this.initDefaultTokenizer();
this.defaultTokenizer = tiktoken.getEncoding("gpt2");
} }
return this.defaultTokenizer!.decode.bind(this.defaultTokenizer); return this.defaultTokenizer!.decode.bind(this.defaultTokenizer);
......
...@@ -34,7 +34,7 @@ export class PromptHelper { ...@@ -34,7 +34,7 @@ export class PromptHelper {
numOutput = DEFAULT_NUM_OUTPUTS; numOutput = DEFAULT_NUM_OUTPUTS;
chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO; chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO;
chunkSizeLimit?: number; chunkSizeLimit?: number;
tokenizer: (text: string) => number[]; tokenizer: (text: string) => Uint32Array;
separator = " "; separator = " ";
constructor( constructor(
...@@ -42,7 +42,7 @@ export class PromptHelper { ...@@ -42,7 +42,7 @@ export class PromptHelper {
numOutput = DEFAULT_NUM_OUTPUTS, numOutput = DEFAULT_NUM_OUTPUTS,
chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO, chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO,
chunkSizeLimit?: number, chunkSizeLimit?: number,
tokenizer?: (text: string) => number[], tokenizer?: (text: string) => Uint32Array,
separator = " ", separator = " ",
) { ) {
this.contextWindow = contextWindow; this.contextWindow = contextWindow;
......
...@@ -63,6 +63,9 @@ export interface VectorStore { ...@@ -63,6 +63,9 @@ export interface VectorStore {
client(): any; client(): any;
add(embeddingResults: BaseNode[]): Promise<string[]>; add(embeddingResults: BaseNode[]): Promise<string[]>;
delete(refDocId: string, deleteKwargs?: any): Promise<void>; delete(refDocId: string, deleteKwargs?: any): Promise<void>;
query(query: VectorStoreQuery, kwargs?: any): Promise<VectorStoreQueryResult>; query(
query: VectorStoreQuery,
options?: any,
): Promise<VectorStoreQueryResult>;
persist(persistPath: string, fs?: GenericFileSystem): Promise<void>; persist(persistPath: string, fs?: GenericFileSystem): Promise<void>;
} }
...@@ -9,7 +9,8 @@ ...@@ -9,7 +9,8 @@
"noEmit": true, "noEmit": true,
"strict": true, "strict": true,
"lib": ["es2015", "dom"], "lib": ["es2015", "dom"],
"target": "ES2015" "target": "ES2015",
"resolveJsonModule": true
}, },
"exclude": ["node_modules"] "exclude": ["node_modules"]
} }
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment