Skip to content
Snippets Groups Projects
Commit 6f2cb31d authored by Yi Ding's avatar Yi Ding
Browse files

fixed tokenizer decoder

parent cc881417
Branches
Tags llamaindex@0.0.10
No related merge requests found
---
"llamaindex": patch
---
Fixed tokenizer decoder
......@@ -5,19 +5,27 @@ import { v4 as uuidv4 } from "uuid";
* Helper class singleton
*/
class GlobalsHelper {
defaultTokenizer: ((text: string) => string[]) | null = null;
defaultTokenizer: {
encode: (text: string) => number[];
decode: (tokens: number[]) => string;
} | null = null;
tokenizer() {
if (this.defaultTokenizer) {
return this.defaultTokenizer;
if (!this.defaultTokenizer) {
const tiktoken = require("tiktoken-node");
this.defaultTokenizer = tiktoken.getEncoding("gpt2");
}
const tiktoken = require("tiktoken-node");
let enc = new tiktoken.getEncoding("gpt2");
this.defaultTokenizer = (text: string) => {
return enc.encode(text);
};
return this.defaultTokenizer;
return this.defaultTokenizer!.encode.bind(this.defaultTokenizer);
}
tokenizerDecoder() {
if (!this.defaultTokenizer) {
const tiktoken = require("tiktoken-node");
this.defaultTokenizer = tiktoken.getEncoding("gpt2");
}
return this.defaultTokenizer!.decode.bind(this.defaultTokenizer);
}
createEvent({
......
......@@ -34,7 +34,7 @@ export class PromptHelper {
numOutput = DEFAULT_NUM_OUTPUTS;
chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO;
chunkSizeLimit?: number;
tokenizer: (text: string) => string[];
tokenizer: (text: string) => number[];
separator = " ";
constructor(
......@@ -42,7 +42,7 @@ export class PromptHelper {
numOutput = DEFAULT_NUM_OUTPUTS,
chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO,
chunkSizeLimit?: number,
tokenizer?: (text: string) => string[],
tokenizer?: (text: string) => number[],
separator = " "
) {
this.contextWindow = contextWindow;
......
......@@ -60,7 +60,7 @@ export class SentenceSplitter {
if (tokenizer == undefined || tokenizerDecoder == undefined) {
tokenizer = globalsHelper.tokenizer();
tokenizerDecoder = globalsHelper.tokenizer;
tokenizerDecoder = globalsHelper.tokenizerDecoder();
}
this.tokenizer = tokenizer;
this.tokenizerDecoder = tokenizerDecoder;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment