diff --git a/package.json b/package.json
index 5bf808928028a6b3065e1ed206c16e13b796e777..e177cc22af4e303b477911fd422ae619ee7a2135 100644
--- a/package.json
+++ b/package.json
@@ -16,7 +16,9 @@
     "prettier": "^2.5.1",
     "prettier-plugin-tailwindcss": "^0.3.0",
     "ts-jest": "^29.1.0",
-    "turbo": "latest"
+    "turbo": "latest",
+    "wink-nlp": "latest",
+    "tiktoken-node": "latest"
   },
   "packageManager": "pnpm@7.15.0",
   "name": "llamascript"
diff --git a/packages/core/package.json b/packages/core/package.json
index fe9910ac054a566b0d6ddffaf372ec97495539ad..6df77fdcd9a96187d27c0baffa920f2181072dbe 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -5,6 +5,9 @@
     "js-tiktoken": "^1.0.7",
     "openai": "^3.3.0"
   },
+  "devDependencies": {
+    "@types/node": "^18"
+  },
   "main": "src/index.ts",
   "types": "src/index.ts",
   "scripts": {
diff --git a/packages/core/src/TextSplitter.ts b/packages/core/src/TextSplitter.ts
index ee86d1e80cb76ac693404d446eebbc2a90133c1d..ba188df5b11a353aa2d127361e0e40fe8557d7e0 100644
--- a/packages/core/src/TextSplitter.ts
+++ b/packages/core/src/TextSplitter.ts
@@ -2,145 +2,241 @@
 
 import { DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP } from "./constants";
 
-class TokenTextSplitter {
-  private _separator: string;
-  private _chunk_size: number;
-  private _chunk_overlap: number;
+class TextSplit {
+  textChunk: string;
+  numCharOverlap: number | undefined;
+
+  constructor(
+    textChunk: string,
+    numCharOverlap: number | undefined = undefined
+  ) {
+    this.textChunk = textChunk;
+    this.numCharOverlap = numCharOverlap;
+  }
+}
+
+type SplitRep = [text: string, numTokens: number];
+
+export class SentenceSplitter {
+  private chunkSize: number;
+  private chunkOverlap: number;
   private tokenizer: any;
-  private _backup_separators: string[];
-  private callback_manager: any;
+  private tokenizerDecoder: any;
+  private paragraphSeparator: string;
+  private chunkingTokenizerFn: any;
+  // private _callback_manager: any;
 
   constructor(
-    separator: string = " ",
-    chunk_size: number = DEFAULT_CHUNK_SIZE,
-    chunk_overlap: number = DEFAULT_CHUNK_OVERLAP,
+    chunkSize: number = DEFAULT_CHUNK_SIZE,
+    chunkOverlap: number = DEFAULT_CHUNK_OVERLAP,
     tokenizer: any = null,
-    backup_separators: string[] = ["\n"]
-    // callback_manager: any = null
+    tokenizerDecoder: any = null,
+    paragraphSeparator: string = "\n\n\n",
+    chunkingTokenizerFn: any = undefined
+    // callback_manager: any = undefined
   ) {
-    if (chunk_overlap > chunk_size) {
+    if (chunkOverlap > chunkSize) {
       throw new Error(
-        `Got a larger chunk overlap (${chunk_overlap}) than chunk size (${chunk_size}), should be smaller.`
+        `Got a larger chunk overlap (${chunkOverlap}) than chunk size (${chunkSize}), should be smaller.`
       );
     }
-    this._separator = separator;
-    this._chunk_size = chunk_size;
-    this._chunk_overlap = chunk_overlap;
-    this.tokenizer = tokenizer || globals_helper.tokenizer;
-    this._backup_separators = backup_separators;
-    // this.callback_manager = callback_manager || new CallbackManager([]);
+    this.chunkSize = chunkSize;
+    this.chunkOverlap = chunkOverlap;
+    // this._callback_manager = callback_manager || new CallbackManager([]);
+
+    if (chunkingTokenizerFn == undefined) {
+      // define a callable mapping a string to a list of strings
+      const defaultChunkingTokenizerFn = (text: string) => {
+        var result = text.match(/[^.?!]+[.!?]+[\])'"`’”]*|.+/g);
+        return result;
+      };
+
+      chunkingTokenizerFn = defaultChunkingTokenizerFn;
+    }
+
+    if (tokenizer == undefined || tokenizerDecoder == undefined) {
+      const tiktoken = require("tiktoken-node");
+      let enc = new tiktoken.getEncoding("gpt2");
+      const default_tokenizer = (text: string) => {
+        return enc.encode(text);
+      };
+      const defaultTokenizerDecoder = (text: string) => {
+        return enc.decode(text);
+      };
+      tokenizer = default_tokenizer;
+      tokenizerDecoder = defaultTokenizerDecoder;
+    }
+    this.tokenizer = tokenizer;
+    this.tokenizerDecoder = tokenizerDecoder;
+
+    this.paragraphSeparator = paragraphSeparator;
+    this.chunkingTokenizerFn = chunkingTokenizerFn;
   }
 
-  private _reduceChunkSize(
-    start_idx: number,
-    cur_idx: number,
-    splits: string[]
-  ): number {
-    let current_doc_total = this.tokenizer(
-      splits.slice(start_idx, cur_idx).join(this._separator)
-    ).length;
-    while (current_doc_total > this._chunk_size) {
-      const percent_to_reduce =
-        (current_doc_total - this._chunk_size) / current_doc_total;
-      const num_to_reduce =
-        parseInt(percent_to_reduce.toString()) * (cur_idx - start_idx) + 1;
-      cur_idx -= num_to_reduce;
-      current_doc_total = this.tokenizer(
-        splits.slice(start_idx, cur_idx).join(this._separator)
-      ).length;
+  private getEffectiveChunkSize(extraInfoStr?: string): number {
+    // get "effective" chunk size by removing the metadata
+    let effectiveChunkSize;
+    if (extraInfoStr != undefined) {
+      const numExtraTokens = this.tokenizer(`${extraInfoStr}\n\n`).length + 1;
+      effectiveChunkSize = this.chunkSize - numExtraTokens;
+      if (effectiveChunkSize <= 0) {
+        throw new Error(
+          "Effective chunk size is non positive after considering extra_info"
+        );
+      }
+    } else {
+      effectiveChunkSize = this.chunkSize;
     }
-    return cur_idx;
+    return effectiveChunkSize;
   }
 
-  _preprocessSplits(splits: Array<string>, chunk_size: number): Array<string> {
-    const new_splits: Array<string> = [];
-    for (const split of splits) {
-      const num_cur_tokens = tokenizer(split).length;
-      if (num_cur_tokens <= chunk_size) {
-        new_splits.push(split);
+  getParagraphSplits(text: string, effectiveChunkSize?: number): string[] {
+    // get paragraph splits
+    let paragraphSplits: string[] = text.split(this.paragraphSeparator);
+    let idx = 0;
+    if (effectiveChunkSize == undefined) {
+      return paragraphSplits;
+    }
+
+    // merge paragraphs that are too small
+    while (idx < paragraphSplits.length) {
+      if (
+        idx < paragraphSplits.length - 1 &&
+        paragraphSplits[idx].length < effectiveChunkSize
+      ) {
+        paragraphSplits[idx] = [
+          paragraphSplits[idx],
+          paragraphSplits[idx + 1],
+        ].join(this.paragraphSeparator);
+        paragraphSplits.splice(idx + 1, 1);
       } else {
-        let cur_splits: Array<string> = [split];
-        if (backup_separators) {
-          for (const sep of backup_separators) {
-            if (split.includes(sep)) {
-              cur_splits = split.split(sep);
-              break;
-            }
-          }
-        } else {
-          cur_splits = [split];
-        }
+        idx += 1;
+      }
+    }
+    return paragraphSplits;
+  }
 
-        const cur_splits2: Array<string> = [];
-        for (const cur_split of cur_splits) {
-          const num_cur_tokens = tokenizer(cur_split).length;
-          if (num_cur_tokens <= chunk_size) {
-            cur_splits2.push(cur_split);
-          } else {
-            // split cur_split according to chunk size of the token numbers
-            const cur_split_chunks: Array<string> = [];
-            let end_idx = cur_split.length;
-            while (tokenizer(cur_split.slice(0, end_idx)).length > chunk_size) {
-              for (let i = 1; i < end_idx; i++) {
-                const tmp_split = cur_split.slice(0, end_idx - i);
-                if (tokenizer(tmp_split).length <= chunk_size) {
-                  cur_split_chunks.push(tmp_split);
-                  cur_splits2.push(cur_split.slice(end_idx - i, end_idx));
-                  end_idx = cur_split.length;
-                  break;
-                }
-              }
-            }
-            cur_split_chunks.push(cur_split);
-            cur_splits2.push(...cur_split_chunks);
-          }
-        }
-        new_splits.push(...cur_splits2);
+  getSentenceSplits(text: string, effectiveChunkSize?: number): string[] {
+    let paragraphSplits = this.getParagraphSplits(text, effectiveChunkSize);
+    // Next we split the text using the chunk tokenizer fn/
+    let splits = [];
+    for (const parText of paragraphSplits) {
+      let sentenceSplits = this.chunkingTokenizerFn(parText);
+      for (const sentence_split of sentenceSplits) {
+        splits.push(sentence_split.trim());
       }
     }
-    return new_splits;
+    return splits;
   }
 
-  _postprocessSplits(docs: TextSplit[]): TextSplit[] {
-    const new_docs: TextSplit[] = [];
-    for (const doc of docs) {
-      if (doc.text_chunk.replace(" ", "") == "") {
-        continue;
+  private processSentenceSplits(
+    sentenceSplits: string[],
+    effectiveChunkSize: number
+  ): SplitRep[] {
+    // Process entence splits
+    // Primarily check if any sentences exceed the chunk size. If they don't,
+    // force split by tokenizer
+    let newSplits: SplitRep[] = [];
+    for (const split of sentenceSplits) {
+      let splitTokens = this.tokenizer(split);
+      const split_len = splitTokens.length;
+      if (split_len <= effectiveChunkSize) {
+        newSplits.push([split, split_len]);
+      } else {
+        for (let i = 0; i < split_len; i += effectiveChunkSize) {
+          const cur_split = this.tokenizerDecoder(
+            splitTokens.slice(i, i + effectiveChunkSize)
+          );
+          newSplits.push([cur_split, effectiveChunkSize]);
+        }
       }
-      new_docs.push(doc);
     }
-    return new_docs;
+    return newSplits;
   }
 
-  splitText(text: string, extra_info_str?: string): string[] {
-    const text_splits = this.splitTextWithOverlaps(text);
-    const chunks = text_splits.map((text_split) => text_split.text_chunk);
-    return chunks;
+  combineTextSplits(
+    newSentenceSplits: SplitRep[],
+    effectiveChunkSize: number
+  ): TextSplit[] {
+    // go through sentence splits, combien to chunks that are within the chunk size
+
+    // docs represents final list of text chunks
+    let docs: TextSplit[] = [];
+    // curDocList represents the current list of sentence splits (that)
+    // will be merged into a chunk
+    let curDocList: string[] = [];
+    let bufferTokens = 0;
+    let curDocTokens = 0;
+    // curDocBuffer represents the current document buffer
+    let curDocBuffer: SplitRep[] = [];
+
+    for (let i = 0; i < newSentenceSplits.length; i++) {
+      // update buffer
+      curDocBuffer.push(newSentenceSplits[i]);
+      bufferTokens += newSentenceSplits[i][1] + 1;
+
+      while (bufferTokens > this.chunkOverlap) {
+        // remove first element from curDocBuffer
+        let first_element = curDocBuffer.shift();
+        if (first_element == undefined) {
+          throw new Error("first_element is undefined");
+        }
+        bufferTokens -= first_element[1];
+        bufferTokens -= 1;
+      }
+
+      // if adding newSentenceSplits[i] to curDocBuffer would exceed effectiveChunkSize,
+      // then we need to add the current curDocBuffer to docs
+      if (curDocTokens + newSentenceSplits[i][1] > effectiveChunkSize) {
+        // push curent doc list to docs
+        docs.push(new TextSplit(curDocList.join(" ").trim()));
+        // reset docs list with buffer
+        curDocTokens = 0;
+        curDocList = [];
+        for (let j = 0; j < curDocBuffer.length; j++) {
+          curDocList.push(curDocBuffer[j][0]);
+          curDocTokens += curDocBuffer[j][1] + 1;
+        }
+      }
+
+      curDocList.push(newSentenceSplits[i][0]);
+      curDocTokens += newSentenceSplits[i][1] + 1;
+    }
+    docs.push(new TextSplit(curDocList.join(" ").trim()));
+    return docs;
   }
 
-  splitTextWithOverlaps(text: string) {}
+  splitTextWithOverlaps(text: string, extraInfoStr?: string): TextSplit[] {
+    // Split incoming text and return chunks with overlap size.
+    // Has a preference for complete sentences, phrases, and minimal overlap.
 
-  truncateText(text: string, separator: string, chunk_size: number): string {
+    // here is the typescript code (skip callback manager)
     if (text == "") {
-      return "";
-    }
-    // First we naively split the large input into a bunch of smaller ones.
-    let splits: string[] = text.split(separator);
-    splits = preprocessSplits(splits, chunk_size);
-
-    let start_idx = 0;
-    let cur_idx = 0;
-    let cur_total = 0;
-    while (cur_idx < splits.length) {
-      let cur_token = splits[cur_idx];
-      let num_cur_tokens = Math.max(tokenizer(cur_token).length, 1);
-      if (cur_total + num_cur_tokens > chunk_size) {
-        cur_idx = reduce_chunk_size(start_idx, cur_idx, splits);
-        break;
-      }
-      cur_total += num_cur_tokens;
-      cur_idx += 1;
+      return [];
     }
-    return splits.slice(start_idx, cur_idx).join(separator);
+
+    let effectiveChunkSize = this.getEffectiveChunkSize(extraInfoStr);
+    let sentenceSplits = this.getSentenceSplits(text, effectiveChunkSize);
+
+    // Check if any sentences exceed the chunk size. If they don't,
+    // force split by tokenizer
+    let newSentenceSplits = this.processSentenceSplits(
+      sentenceSplits,
+      effectiveChunkSize
+    );
+
+    // combine sentence splits into chunks of text that can then be returned
+    let combinedTextSplits = this.combineTextSplits(
+      newSentenceSplits,
+      effectiveChunkSize
+    );
+
+    return combinedTextSplits;
+  }
+
+  splitText(text: string, extraInfoStr?: string): string[] {
+    const text_splits = this.splitTextWithOverlaps(text);
+    const chunks = text_splits.map((text_split) => text_split.textChunk);
+    return chunks;
   }
 }
diff --git a/packages/core/src/tests/TextSplitter.test.ts b/packages/core/src/tests/TextSplitter.test.ts
new file mode 100644
index 0000000000000000000000000000000000000000..1cfd7abf614ffc4db55507b4052e677893dc7166
--- /dev/null
+++ b/packages/core/src/tests/TextSplitter.test.ts
@@ -0,0 +1,43 @@
+import { SentenceSplitter } from "../TextSplitter";
+
+describe("SentenceSplitter", () => {
+  test("initializes", () => {
+    const sentenceSplitter = new SentenceSplitter();
+    expect(sentenceSplitter).toBeDefined();
+  });
+
+  test("splits paragraphs w/o effective chunk size", () => {
+    const sentenceSplitter = new SentenceSplitter(
+      undefined, undefined, undefined, undefined, "\n"
+    );
+    // generate the same line as above but correct syntax errors
+    let splits = sentenceSplitter.getParagraphSplits("This is a paragraph.\nThis is another paragraph.", undefined);
+    expect(splits).toEqual(["This is a paragraph.", "This is another paragraph."]);
+  });
+
+  test("splits paragraphs with effective chunk size", () => {
+    const sentenceSplitter = new SentenceSplitter(
+      undefined, undefined, undefined, undefined, "\n"
+    );
+    // generate the same line as above but correct syntax errors
+    let splits = sentenceSplitter.getParagraphSplits("This is a paragraph.\nThis is another paragraph.", 1000);
+    expect(splits).toEqual(["This is a paragraph.\nThis is another paragraph."]);
+  });
+  
+  test("splits sentences", () => {
+    const sentenceSplitter = new SentenceSplitter();
+    let splits = sentenceSplitter.getSentenceSplits("This is a sentence. This is another sentence.", undefined);
+    expect(splits).toEqual(["This is a sentence.", "This is another sentence."]);
+  });
+
+  test("overall split text", () => {
+    let sentenceSplitter = new SentenceSplitter(5, 0);
+    let splits = sentenceSplitter.splitText("This is a sentence. This is another sentence.");
+    expect(splits).toEqual(["This is a sentence.", "This is another sentence."]);
+
+    sentenceSplitter = new SentenceSplitter(1000);
+    splits = sentenceSplitter.splitText("This is a sentence. This is another sentence.");
+    expect(splits).toEqual(["This is a sentence. This is another sentence."]);
+  });
+
+});
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 0dd717a424f7be485e041adf75188102f36f7b99..af9f950080dba1b67693cb936e6dd25694066d5b 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -29,12 +29,18 @@ importers:
       prettier-plugin-tailwindcss:
         specifier: ^0.3.0
         version: 0.3.0(prettier@2.8.8)
+      tiktoken-node:
+        specifier: latest
+        version: 0.0.6
       ts-jest:
         specifier: ^29.1.0
         version: 29.1.0(@babel/core@7.22.5)(jest@29.5.0)(typescript@4.9.5)
       turbo:
         specifier: latest
         version: 1.10.3
+      wink-nlp:
+        specifier: latest
+        version: 1.14.1
 
   apps/docs:
     dependencies:
@@ -116,9 +122,6 @@ importers:
 
   packages/core:
     dependencies:
-      '@vespaiach/axios-fetch-adapter':
-        specifier: ^0.3.1
-        version: 0.3.1(axios@0.26.1)
       axios:
         specifier: ^0.26.1
         version: 0.26.1
@@ -128,6 +131,10 @@ importers:
       openai:
         specifier: ^3.3.0
         version: 3.3.0
+    devDependencies:
+      '@types/node':
+        specifier: ^18
+        version: 18.6.0
 
   packages/eslint-config-custom:
     dependencies:
@@ -1207,14 +1214,6 @@ packages:
       eslint-visitor-keys: 3.4.0
     dev: false
 
-  /@vespaiach/axios-fetch-adapter@0.3.1(axios@0.26.1):
-    resolution: {integrity: sha512-+1F52VWXmQHSRFSv4/H0wtnxfvjRMPK5531e880MIjypPdUSX6QZuoDgEVeCE1vjhzDdxCVX7rOqkub7StEUwQ==}
-    peerDependencies:
-      axios: '>=0.26.0'
-    dependencies:
-      axios: 0.26.1
-    dev: false
-
   /acorn-jsx@5.3.2(acorn@7.4.1):
     resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==}
     peerDependencies:
@@ -4763,6 +4762,11 @@ packages:
     resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==}
     dev: true
 
+  /tiktoken-node@0.0.6:
+    resolution: {integrity: sha512-MiprfzPhoKhCflzl0Jyds0VKibAgUGHfJLvBCAXPpum6Lru6ZoKQGsl8lJP0B94LPpby2B2WveOB2tZVfEZQOQ==}
+    engines: {node: '>= 14'}
+    dev: true
+
   /title-case@2.1.1:
     resolution: {integrity: sha512-EkJoZ2O3zdCz3zJsYCsxyq2OC5hrxR9mfdd5I+w8h/tmFfeOxJ+vvkxsKxdmN0WtS9zLdHEgfgVOiMVgv+Po4Q==}
     dependencies:
@@ -5114,6 +5118,10 @@ packages:
     dependencies:
       isexe: 2.0.0
 
+  /wink-nlp@1.14.1:
+    resolution: {integrity: sha512-RIdUZI3ei3OB6OY5f3jNo74fmsfPV7cfwiJ2fvBM1xzGnnl2CjRJmwGwsO04n0xl28vDTtxj6AlhIb74XQLoqQ==}
+    dev: true
+
   /word-wrap@1.2.3:
     resolution: {integrity: sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==}
     engines: {node: '>=0.10.0'}