diff --git a/packages/core/package.json b/packages/core/package.json
index 5d04303ac8ac5af8065f079c84256e2403facc66..881f4686d800fe5ac72464077730642ee4afc3b1 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -9,6 +9,7 @@
     "@notionhq/client": "^2.2.14",
     "@xenova/transformers": "^2.10.0",
     "assemblyai": "^4.0.0",
+    "compromise": "^14.10.1",
     "crypto-js": "^4.2.0",
     "file-type": "^18.7.0",
     "js-tiktoken": "^1.0.8",
diff --git a/packages/core/src/TextSplitter.ts b/packages/core/src/TextSplitter.ts
index 4931bcd0d9c04376ea67dc7f4ab5acf5d5cf4ce0..36708004c59ea5931dfdc17acc99336dee2bcc20 100644
--- a/packages/core/src/TextSplitter.ts
+++ b/packages/core/src/TextSplitter.ts
@@ -1,3 +1,4 @@
+import nlp from 'compromise'
 import { EOL } from 'node:os'
 // GitHub translated
 import { globalsHelper } from "./GlobalsHelper";
@@ -18,28 +19,32 @@ class TextSplit {
 
 type SplitRep = { text: string; numTokens: number };
 
-/**
- * Tokenizes sentences. Suitable for English and most European languages.
- * @param text
- * @returns
- */
-export const englishSentenceTokenizer = (text: string) => {
-  // The first part is a lazy match for any character.
-  return text.match(/.+?[.?!]+[\])'"`’”]*(?:\s|$)|.+/g);
+export const defaultSentenceTokenizer = (text: string): string[] => {
+  return nlp(text).sentences().json().map((sentence: any) => sentence.text);
 };
 
+// Refs: https://github.com/fxsjy/jieba/issues/575#issuecomment-359637511
+const resentencesp = /([﹒﹔﹖﹗.;。!?]["’”」』]{0,2}|:(?=["‘“「『]{1,2}|$))/;
 /**
- * Tokenizes sentences. Suitable for Chinese, Japanese, and Korean.
+ * Tokenizes sentences. Suitable for Chinese, Japanese, and Korean. Use instead of `defaultSentenceTokenizer`.
  * @param text
- * @returns
+ * @returns string[]
  */
-export const cjkSentenceTokenizer = (text: string) => {
-  // Accepts english style sentence endings with space and
-  // CJK style sentence endings with no space.
-  return text.match(
-    /.+?[.?!]+[\])'"`’”]*(?:\s|$)|.+?[。?!]+[\])'"`’”]*(?:\s|$)?|.+/g,
-  );
-};
+export function cjkSentenceTokenizer(sentence: string): string[] {
+  const slist = [];
+  const parts = sentence.split(resentencesp);
+
+  for (let i = 0; i < parts.length; i++) {
+    const part = parts[i];
+    if (resentencesp.test(part) && slist.length > 0) {
+      slist[slist.length - 1] += part;
+    } else if (part) {
+      slist.push(part);
+    }
+  }
+
+  return slist.filter((s) => s.length > 0);
+}
 
 export const defaultParagraphSeparator = EOL + EOL + EOL
 
@@ -57,7 +62,7 @@ export class SentenceSplitter {
   private tokenizer: any;
   private tokenizerDecoder: any;
   private paragraphSeparator: string;
-  private chunkingTokenizerFn: (text: string) => RegExpMatchArray | null;
+  private chunkingTokenizerFn: (text: string) => string[];
   private splitLongSentences: boolean;
 
   constructor(options?: {
@@ -66,7 +71,7 @@ export class SentenceSplitter {
     tokenizer?: any;
     tokenizerDecoder?: any;
     paragraphSeparator?: string;
-    chunkingTokenizerFn?: (text: string) => RegExpMatchArray | null;
+    chunkingTokenizerFn?: (text: string) => string[];
     splitLongSentences?: boolean;
   }) {
     const {
@@ -75,7 +80,7 @@ export class SentenceSplitter {
       tokenizer = null,
       tokenizerDecoder = null,
       paragraphSeparator = defaultParagraphSeparator,
-      chunkingTokenizerFn = undefined,
+      chunkingTokenizerFn,
       splitLongSentences = false,
     } = options ?? {};
 
@@ -93,7 +98,7 @@ export class SentenceSplitter {
       tokenizerDecoder ?? globalsHelper.tokenizerDecoder();
 
     this.paragraphSeparator = paragraphSeparator;
-    this.chunkingTokenizerFn = chunkingTokenizerFn ?? englishSentenceTokenizer;
+    this.chunkingTokenizerFn = chunkingTokenizerFn ?? defaultSentenceTokenizer;
     this.splitLongSentences = splitLongSentences;
   }
 
@@ -218,15 +223,16 @@ export class SentenceSplitter {
         curChunkTokens + newSentenceSplits[i].numTokens >
         effectiveChunkSize
       ) {
-        // push curent doc list to docs
-        docs.push(
-          new TextSplit(
-            curChunkSentences
-              .map((sentence) => sentence.text)
-              .join(" ")
-              .trim(),
-          ),
-        );
+        if (curChunkSentences.length > 0) {
+          // push curent doc list to docs
+          docs.push(
+            new TextSplit(
+              curChunkSentences.map((sentence) => sentence.text).
+                join(" ").
+                trim(),
+            ),
+          );
+        }
 
         const lastChunkSentences = curChunkSentences;
 
diff --git a/packages/core/src/tests/TextSplitter.test.ts b/packages/core/src/tests/TextSplitter.test.ts
index 0b176293a363aca9af221ce8f2d9d4f6b07efc29..591fd493c1c96127458443cb4d41f1d02a878872 100644
--- a/packages/core/src/tests/TextSplitter.test.ts
+++ b/packages/core/src/tests/TextSplitter.test.ts
@@ -1,4 +1,4 @@
-import { SentenceSplitter, cjkSentenceTokenizer } from "../TextSplitter";
+import { cjkSentenceTokenizer, SentenceSplitter } from '../TextSplitter'
 
 describe("SentenceSplitter", () => {
   test("initializes", () => {
@@ -88,7 +88,12 @@ describe("SentenceSplitter", () => {
       chunkingTokenizerFn: cjkSentenceTokenizer,
     });
 
-    const splits = sentenceSplitter.splitText("这是一个句子!这是另一个句子。");
-    expect(splits).toEqual(["这是一个句子!", "这是另一个句子。"]);
+    const splits = sentenceSplitter.splitText("此后如竟没有炬火:我便是唯一的光。倘若有了炬火,出了太阳,我们自然心悦诚服的消失。不但毫无不平,而且还要随喜赞美这炬火或太阳;因为他照了人类,连我都在内。");
+    expect(splits).toEqual([
+      "此后如竟没有炬火:我便是唯一的光。",
+      "倘若有了炬火,出了太阳,我们自然心悦诚服的消失。",
+      "不但毫无不平,而且还要随喜赞美这炬火或太阳;",
+      "因为他照了人类,连我都在内。",
+    ]);
   });
 });
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index f107577c1d269910c891467bc58fec4d602bfbae..c68f43736929dfc09837a5ffaa3da695e24daf5f 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -17,7 +17,7 @@ importers:
         version: 2.26.2
       '@turbo/gen':
         specifier: ^1.10.16
-        version: 1.10.16(@types/node@20.10.5)(typescript@5.3.3)
+        version: 1.10.16(@types/node@18.19.2)(typescript@5.3.3)
       '@types/jest':
         specifier: ^29.5.10
         version: 29.5.10
@@ -32,7 +32,7 @@ importers:
         version: 8.0.3
       jest:
         specifier: ^29.7.0
-        version: 29.7.0(@types/node@20.10.5)
+        version: 29.7.0(@types/node@18.19.2)
       lint-staged:
         specifier: ^15.1.0
         version: 15.1.0
@@ -155,6 +155,9 @@ importers:
       assemblyai:
         specifier: ^4.0.0
         version: 4.0.0
+      compromise:
+        specifier: ^14.10.1
+        version: 14.10.1
       crypto-js:
         specifier: ^4.2.0
         version: 4.2.0
@@ -4289,7 +4292,7 @@ packages:
     resolution: {integrity: sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==}
     dev: true
 
-  /@turbo/gen@1.10.16(@types/node@20.10.5)(typescript@5.3.3):
+  /@turbo/gen@1.10.16(@types/node@18.19.2)(typescript@5.3.3):
     resolution: {integrity: sha512-PzyluADjVuy5OcIi+/aRcD70OElQpRVRDdfZ9fH8G5Fv75lQcNrjd1bBGKmhjSw+g+eTEkXMGnY7s6gsCYjYTQ==}
     hasBin: true
     dependencies:
@@ -4301,7 +4304,7 @@ packages:
       minimatch: 9.0.3
       node-plop: 0.26.3
       proxy-agent: 6.3.1
-      ts-node: 10.9.1(@types/node@20.10.5)(typescript@5.3.3)
+      ts-node: 10.9.1(@types/node@18.19.2)(typescript@5.3.3)
       update-check: 1.5.4
       validate-npm-package-name: 5.0.0
     transitivePeerDependencies:
@@ -4626,12 +4629,6 @@ packages:
     dependencies:
       undici-types: 5.26.5
 
-  /@types/node@20.10.5:
-    resolution: {integrity: sha512-nNPsNE65wjMxEKI93yOP+NPGGBJz/PoN3kZsVLee0XMiJolxSekEVD8wRwBUBqkwc7UWop0edW50yrCQW4CyRw==}
-    dependencies:
-      undici-types: 5.26.5
-    dev: true
-
   /@types/node@20.9.0:
     resolution: {integrity: sha512-nekiGu2NDb1BcVofVcEKMIwzlx4NjHlcjhoxxKBNLtz15Y1z7MYf549DFvkHSId02Ax6kGwWntIBPC3l/JZcmw==}
     dependencies:
@@ -6513,6 +6510,15 @@ packages:
       - supports-color
     dev: false
 
+  /compromise@14.10.1:
+    resolution: {integrity: sha512-GX91lZfJsma34HHifGlmnoWdu45PreuRFjrccCSAZq+r7Jb0wdKxKZWhyi8OSPvZ0+xk7LclDakUnd/Np57ZRQ==}
+    engines: {node: '>=12.0.0'}
+    dependencies:
+      efrt: 2.7.0
+      grad-school: 0.0.5
+      suffix-thumb: 5.0.2
+    dev: false
+
   /concat-map@0.0.1:
     resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==}
 
@@ -6718,7 +6724,7 @@ packages:
       sha.js: 2.4.11
     dev: true
 
-  /create-jest@29.7.0(@types/node@20.10.5):
+  /create-jest@29.7.0(@types/node@18.19.2):
     resolution: {integrity: sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==}
     engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
     hasBin: true
@@ -6727,7 +6733,7 @@ packages:
       chalk: 4.1.2
       exit: 0.1.2
       graceful-fs: 4.2.11
-      jest-config: 29.7.0(@types/node@20.10.5)
+      jest-config: 29.7.0(@types/node@18.19.2)
       jest-util: 29.7.0
       prompts: 2.4.2
     transitivePeerDependencies:
@@ -7514,6 +7520,11 @@ packages:
     resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
     dev: false
 
+  /efrt@2.7.0:
+    resolution: {integrity: sha512-/RInbCy1d4P6Zdfa+TMVsf/ufZVotat5hCw3QXmWtjU+3pFEOvOQ7ibo3aIxyCJw2leIeAMjmPj+1SLJiCpdrQ==}
+    engines: {node: '>=12.0.0'}
+    dev: false
+
   /electron-to-chromium@1.4.530:
     resolution: {integrity: sha512-rsJ9O8SCI4etS8TBsXuRfHa2eZReJhnGf5MHZd3Vo05PukWHKXhk3VQGbHHnDLa8nZz9woPCpLCMQpLGgkGNRA==}
 
@@ -9094,6 +9105,11 @@ packages:
   /graceful-fs@4.2.11:
     resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
 
+  /grad-school@0.0.5:
+    resolution: {integrity: sha512-rXunEHF9M9EkMydTBux7+IryYXEZinRk6g8OBOGDBzo/qWJjhTxy86i5q7lQYpCLHN8Sqv1XX3OIOc7ka2gtvQ==}
+    engines: {node: '>=8.0.0'}
+    dev: false
+
   /gradient-string@2.0.2:
     resolution: {integrity: sha512-rEDCuqUQ4tbD78TpzsMtt5OIf0cBCSDWSJtUDaF6JsAh+k0v9r++NzxNEG87oDZx9ZwGhD8DaezR2L/yrw0Jdw==}
     engines: {node: '>=10'}
@@ -10222,7 +10238,7 @@ packages:
       - supports-color
     dev: true
 
-  /jest-cli@29.7.0(@types/node@20.10.5):
+  /jest-cli@29.7.0(@types/node@18.19.2):
     resolution: {integrity: sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==}
     engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
     hasBin: true
@@ -10236,10 +10252,10 @@ packages:
       '@jest/test-result': 29.7.0
       '@jest/types': 29.6.3
       chalk: 4.1.2
-      create-jest: 29.7.0(@types/node@20.10.5)
+      create-jest: 29.7.0(@types/node@18.19.2)
       exit: 0.1.2
       import-local: 3.1.0
-      jest-config: 29.7.0(@types/node@20.10.5)
+      jest-config: 29.7.0(@types/node@18.19.2)
       jest-util: 29.7.0
       jest-validate: 29.7.0
       yargs: 17.7.2
@@ -10250,7 +10266,7 @@ packages:
       - ts-node
     dev: true
 
-  /jest-config@29.7.0(@types/node@20.10.3):
+  /jest-config@29.7.0(@types/node@18.19.2):
     resolution: {integrity: sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==}
     engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
     peerDependencies:
@@ -10265,7 +10281,7 @@ packages:
       '@babel/core': 7.23.3
       '@jest/test-sequencer': 29.7.0
       '@jest/types': 29.6.3
-      '@types/node': 20.10.3
+      '@types/node': 18.19.2
       babel-jest: 29.7.0(@babel/core@7.23.3)
       chalk: 4.1.2
       ci-info: 3.9.0
@@ -10290,7 +10306,7 @@ packages:
       - supports-color
     dev: true
 
-  /jest-config@29.7.0(@types/node@20.10.5):
+  /jest-config@29.7.0(@types/node@20.10.3):
     resolution: {integrity: sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==}
     engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
     peerDependencies:
@@ -10305,7 +10321,7 @@ packages:
       '@babel/core': 7.23.3
       '@jest/test-sequencer': 29.7.0
       '@jest/types': 29.6.3
-      '@types/node': 20.10.5
+      '@types/node': 20.10.3
       babel-jest: 29.7.0(@babel/core@7.23.3)
       chalk: 4.1.2
       ci-info: 3.9.0
@@ -10619,7 +10635,7 @@ packages:
       merge-stream: 2.0.0
       supports-color: 8.1.1
 
-  /jest@29.7.0(@types/node@20.10.5):
+  /jest@29.7.0(@types/node@18.19.2):
     resolution: {integrity: sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==}
     engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
     hasBin: true
@@ -10632,7 +10648,7 @@ packages:
       '@jest/core': 29.7.0
       '@jest/types': 29.6.3
       import-local: 3.1.0
-      jest-cli: 29.7.0(@types/node@20.10.5)
+      jest-cli: 29.7.0(@types/node@18.19.2)
     transitivePeerDependencies:
       - '@types/node'
       - babel-plugin-macros
@@ -14854,6 +14870,10 @@ packages:
       ts-interface-checker: 0.1.13
     dev: true
 
+  /suffix-thumb@5.0.2:
+    resolution: {integrity: sha512-I5PWXAFKx3FYnI9a+dQMWNqTxoRt6vdBdb0O+BJ1sxXCWtSoQCusc13E58f+9p4MYx/qCnEMkD5jac6K2j3dgA==}
+    dev: false
+
   /supports-color@5.5.0:
     resolution: {integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==}
     engines: {node: '>=4'}
@@ -15253,7 +15273,7 @@ packages:
       '@babel/core': 7.23.3
       bs-logger: 0.2.6
       fast-json-stable-stringify: 2.1.0
-      jest: 29.7.0(@types/node@20.10.5)
+      jest: 29.7.0(@types/node@18.19.2)
       jest-util: 29.7.0
       json5: 2.2.3
       lodash.memoize: 4.1.2
@@ -15294,7 +15314,7 @@ packages:
       yn: 3.1.1
     dev: true
 
-  /ts-node@10.9.1(@types/node@20.10.5)(typescript@5.3.3):
+  /ts-node@10.9.1(@types/node@18.19.2)(typescript@5.3.3):
     resolution: {integrity: sha512-NtVysVPkxxrwFGUUxGYhfux8k78pQB3JqYBXlLRZgdGUqTO5wU/UyHop5p70iEbGhB7q5KmiZiU0Y3KlJrScEw==}
     hasBin: true
     peerDependencies:
@@ -15313,7 +15333,7 @@ packages:
       '@tsconfig/node12': 1.0.11
       '@tsconfig/node14': 1.0.3
       '@tsconfig/node16': 1.0.4
-      '@types/node': 20.10.5
+      '@types/node': 18.19.2
       acorn: 8.11.2
       acorn-walk: 8.3.0
       arg: 4.1.3