diff --git a/.changeset/quick-seas-hug.md b/.changeset/quick-seas-hug.md
new file mode 100644
index 0000000000000000000000000000000000000000..683ebc437484037c85d1ec12cab560ce27caee84
--- /dev/null
+++ b/.changeset/quick-seas-hug.md
@@ -0,0 +1,5 @@
+---
+"@llamaindex/core": patch
+---
+
+chore: bump `natural` to 8.0.1
diff --git a/packages/core/package.json b/packages/core/package.json
index 3a79ef3f272e46869bdc8f9d88850e0ff44a5e29..38e4df3cc0e605b52e0a0c8d5e79ff5986c59ec8 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -132,7 +132,7 @@
   "devDependencies": {
     "ajv": "^8.16.0",
     "bunchee": "5.3.1",
-    "natural": "^7.1.0"
+    "natural": "^8.0.1"
   },
   "dependencies": {
     "@llamaindex/env": "workspace:*",
diff --git a/packages/core/src/node-parser/sentence-tokenizer-parser.js b/packages/core/src/node-parser/sentence-tokenizer-parser.js
deleted file mode 100644
index ea052634362b11cd49ff532cc45ebc2b76890e90..0000000000000000000000000000000000000000
--- a/packages/core/src/node-parser/sentence-tokenizer-parser.js
+++ /dev/null
@@ -1,1571 +0,0 @@
-var __getOwnPropNames = Object.getOwnPropertyNames;
-var cjs = (cb, mod) =>
-  function _r() {
-    return (
-      mod ||
-        (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod),
-      mod.exports
-    );
-  };
-
-// lib/natural/util/abbreviations_en.js
-var require_abbreviations_en = cjs({
-  "lib/natural/util/abbreviations_en.js"(exports) {
-    "use strict";
-    var knownAbbreviations = [
-      "approx.",
-      "appt.",
-      "apt.",
-      "A.S.A.P.",
-      "B.Y.O.B.",
-      "c/o",
-      "dept.",
-      "D.I.Y.",
-      "est.",
-      "E.T.A.",
-      "Inc.",
-      "min.",
-      "misc.",
-      "Mr.",
-      "Mrs.",
-      "no.",
-      "R.S.V.P.",
-      "tel.",
-      "temp.",
-      "vet.",
-      "vs.",
-    ];
-    exports.knownAbbreviations = knownAbbreviations;
-  },
-});
-
-// lib/natural/tokenizers/parser_sentence_tokenizer.js
-var require_parser_sentence_tokenizer = cjs({
-  "lib/natural/tokenizers/parser_sentence_tokenizer.js"(exports, module) {
-    "use strict";
-    function peg$subclass(child, parent) {
-      function ctor() {
-        this.constructor = child;
-      }
-      ctor.prototype = parent.prototype;
-      child.prototype = new ctor();
-    }
-    function peg$SyntaxError(message, expected, found, location) {
-      this.message = message;
-      this.expected = expected;
-      this.found = found;
-      this.location = location;
-      this.name = "SyntaxError";
-      if (typeof Error.captureStackTrace === "function") {
-        Error.captureStackTrace(this, peg$SyntaxError);
-      }
-    }
-    peg$subclass(peg$SyntaxError, Error);
-    peg$SyntaxError.buildMessage = function (expected, found) {
-      var DESCRIBE_EXPECTATION_FNS = {
-        literal: function (expectation) {
-          return '"' + literalEscape(expectation.text) + '"';
-        },
-        class: function (expectation) {
-          var escapedParts = "",
-            i;
-          for (i = 0; i < expectation.parts.length; i++) {
-            escapedParts +=
-              expectation.parts[i] instanceof Array
-                ? classEscape(expectation.parts[i][0]) +
-                  "-" +
-                  classEscape(expectation.parts[i][1])
-                : classEscape(expectation.parts[i]);
-          }
-          return "[" + (expectation.inverted ? "^" : "") + escapedParts + "]";
-        },
-        any: function (expectation) {
-          return "any character";
-        },
-        end: function (expectation) {
-          return "end of input";
-        },
-        other: function (expectation) {
-          return expectation.description;
-        },
-      };
-      function hex(ch) {
-        return ch.charCodeAt(0).toString(16).toUpperCase();
-      }
-      function literalEscape(s) {
-        return s
-          .replace(/\\/g, "\\\\")
-          .replace(/"/g, '\\"')
-          .replace(/\0/g, "\\0")
-          .replace(/\t/g, "\\t")
-          .replace(/\n/g, "\\n")
-          .replace(/\r/g, "\\r")
-          .replace(/[\x00-\x0F]/g, function (ch) {
-            return "\\x0" + hex(ch);
-          })
-          .replace(/[\x10-\x1F\x7F-\x9F]/g, function (ch) {
-            return "\\x" + hex(ch);
-          });
-      }
-      function classEscape(s) {
-        return s
-          .replace(/\\/g, "\\\\")
-          .replace(/\]/g, "\\]")
-          .replace(/\^/g, "\\^")
-          .replace(/-/g, "\\-")
-          .replace(/\0/g, "\\0")
-          .replace(/\t/g, "\\t")
-          .replace(/\n/g, "\\n")
-          .replace(/\r/g, "\\r")
-          .replace(/[\x00-\x0F]/g, function (ch) {
-            return "\\x0" + hex(ch);
-          })
-          .replace(/[\x10-\x1F\x7F-\x9F]/g, function (ch) {
-            return "\\x" + hex(ch);
-          });
-      }
-      function describeExpectation(expectation) {
-        return DESCRIBE_EXPECTATION_FNS[expectation.type](expectation);
-      }
-      function describeExpected(expected2) {
-        var descriptions = new Array(expected2.length),
-          i,
-          j;
-        for (i = 0; i < expected2.length; i++) {
-          descriptions[i] = describeExpectation(expected2[i]);
-        }
-        descriptions.sort();
-        if (descriptions.length > 0) {
-          for (i = 1, j = 1; i < descriptions.length; i++) {
-            if (descriptions[i - 1] !== descriptions[i]) {
-              descriptions[j] = descriptions[i];
-              j++;
-            }
-          }
-          descriptions.length = j;
-        }
-        switch (descriptions.length) {
-          case 1:
-            return descriptions[0];
-          case 2:
-            return descriptions[0] + " or " + descriptions[1];
-          default:
-            return (
-              descriptions.slice(0, -1).join(", ") +
-              ", or " +
-              descriptions[descriptions.length - 1]
-            );
-        }
-      }
-      function describeFound(found2) {
-        return found2 ? '"' + literalEscape(found2) + '"' : "end of input";
-      }
-      return (
-        "Expected " +
-        describeExpected(expected) +
-        " but " +
-        describeFound(found) +
-        " found."
-      );
-    };
-    function peg$parse(input, options) {
-      options = options !== void 0 ? options : {};
-      var peg$FAILED = {},
-        peg$startRuleFunctions = { s: peg$parses },
-        peg$startRuleFunction = peg$parses,
-        peg$c0 = function (sentences) {
-          const result = [];
-          sentences.forEach((sent0) => {
-            sent0[0].forEach((sent1) => {
-              result.push(sent1);
-            });
-          });
-          return result;
-        },
-        peg$c1 = function (sentences) {
-          return sentences.map((sent) => {
-            sent[0].push(sent[1]);
-            return sent[0].reduce((accu, str) => accu + str).trim();
-          });
-        },
-        peg$c2 = function (open, sentences, close) {
-          const result = sentences.map((sent) => {
-            sent[0].push(sent[1]);
-            return sent[0].reduce((accu, str) => accu + str).trim();
-          });
-          result.unshift(open);
-          if (close) {
-            result.push(close);
-          }
-          return result;
-        },
-        peg$c3 = function (seqs, end) {
-          const res = seqs.reduce((accu, seq) => accu.concat(seq));
-          res.push(end);
-          return res;
-        },
-        peg$c4 = function (tokens) {
-          const result = tokens.map((pair) => pair[0] + pair[1]);
-          return result;
-        },
-        peg$c5 = function (open, tokens, end, close) {
-          const result = tokens.map((pair) => pair[0] + pair[1]);
-          result.unshift(open);
-          result.push(end);
-          result.push(close);
-          return result;
-        },
-        peg$c6 = /^[ \t\n\r.?!]/,
-        peg$c7 = peg$classExpectation(
-          [" ", "	", "\n", "\r", ".", "?", "!"],
-          false,
-          false,
-        ),
-        peg$c8 = function () {
-          return text();
-        },
-        peg$c9 = /^[ \t\n\r]/,
-        peg$c10 = peg$classExpectation([" ", "	", "\n", "\r"], false, false),
-        peg$c11 = function (t) {
-          return t;
-        },
-        peg$c12 = /^[^ \t\n\r!?([}"`)\]}"`0-9@]/,
-        peg$c13 = peg$classExpectation(
-          [
-            " ",
-            "	",
-            "\n",
-            "\r",
-            "!",
-            "?",
-            "(",
-            "[",
-            "}",
-            '"',
-            "`",
-            ")",
-            "]",
-            "}",
-            '"',
-            "`",
-            ["0", "9"],
-            "@",
-          ],
-          true,
-          false,
-        ),
-        peg$c14 = function (word) {
-          const tmp = word.reduce((accu, elt) => accu + elt);
-          return knownAbbreviations.indexOf(tmp) > -1;
-        },
-        peg$c15 = function (word) {
-          return text();
-        },
-        peg$c16 = /^[^ \t\n\r!?.([})\]}`"0-9@]/,
-        peg$c17 = peg$classExpectation(
-          [
-            " ",
-            "	",
-            "\n",
-            "\r",
-            "!",
-            "?",
-            ".",
-            "(",
-            "[",
-            "}",
-            ")",
-            "]",
-            "}",
-            "`",
-            '"',
-            ["0", "9"],
-            "@",
-          ],
-          true,
-          false,
-        ),
-        peg$c18 = function () {
-          return text();
-        },
-        peg$c19 = /^[0-9]/,
-        peg$c20 = peg$classExpectation([["0", "9"]], false, false),
-        peg$c21 = peg$anyExpectation(),
-        peg$c22 = /^[a-z]/,
-        peg$c23 = peg$classExpectation([["a", "z"]], false, false),
-        peg$c24 = /^[@]/,
-        peg$c25 = peg$classExpectation(["@"], false, false),
-        peg$c26 = /^[.]/,
-        peg$c27 = peg$classExpectation(["."], false, false),
-        peg$c28 = "http://",
-        peg$c29 = peg$literalExpectation("http://", false),
-        peg$c30 = "https://",
-        peg$c31 = peg$literalExpectation("https://", false),
-        peg$c32 = /^[a-z0-9]/,
-        peg$c33 = peg$classExpectation(
-          [
-            ["a", "z"],
-            ["0", "9"],
-          ],
-          false,
-          false,
-        ),
-        peg$c34 = /^[\/]/,
-        peg$c35 = peg$classExpectation(["/"], false, false),
-        peg$c36 = function () {
-          return text();
-        },
-        peg$c37 = /^[([{"'`\u2018]/,
-        peg$c38 = peg$classExpectation(
-          ["(", "[", "{", '"', "'", "`", "\u2018"],
-          false,
-          false,
-        ),
-        peg$c39 = /^[)\]}"'`\u2019]/,
-        peg$c40 = peg$classExpectation(
-          [")", "]", "}", '"', "'", "`", "\u2019"],
-          false,
-          false,
-        ),
-        peg$currPos = 0,
-        peg$savedPos = 0,
-        peg$posDetailsCache = [{ line: 1, column: 1 }],
-        peg$maxFailPos = 0,
-        peg$maxFailExpected = [],
-        peg$silentFails = 0,
-        peg$result;
-      if ("startRule" in options) {
-        if (!(options.startRule in peg$startRuleFunctions)) {
-          throw new Error(
-            `Can't start parsing from rule "` + options.startRule + '".',
-          );
-        }
-        peg$startRuleFunction = peg$startRuleFunctions[options.startRule];
-      }
-      function text() {
-        return input.substring(peg$savedPos, peg$currPos);
-      }
-      function location() {
-        return peg$computeLocation(peg$savedPos, peg$currPos);
-      }
-      function expected(description, location2) {
-        location2 =
-          location2 !== void 0
-            ? location2
-            : peg$computeLocation(peg$savedPos, peg$currPos);
-        throw peg$buildStructuredError(
-          [peg$otherExpectation(description)],
-          input.substring(peg$savedPos, peg$currPos),
-          location2,
-        );
-      }
-      function error(message, location2) {
-        location2 =
-          location2 !== void 0
-            ? location2
-            : peg$computeLocation(peg$savedPos, peg$currPos);
-        throw peg$buildSimpleError(message, location2);
-      }
-      function peg$literalExpectation(text2, ignoreCase) {
-        return { type: "literal", text: text2, ignoreCase };
-      }
-      function peg$classExpectation(parts, inverted, ignoreCase) {
-        return { type: "class", parts, inverted, ignoreCase };
-      }
-      function peg$anyExpectation() {
-        return { type: "any" };
-      }
-      function peg$endExpectation() {
-        return { type: "end" };
-      }
-      function peg$otherExpectation(description) {
-        return { type: "other", description };
-      }
-      function peg$computePosDetails(pos) {
-        var details = peg$posDetailsCache[pos],
-          p;
-        if (details) {
-          return details;
-        } else {
-          p = pos - 1;
-          while (!peg$posDetailsCache[p]) {
-            p--;
-          }
-          details = peg$posDetailsCache[p];
-          details = {
-            line: details.line,
-            column: details.column,
-          };
-          while (p < pos) {
-            if (input.charCodeAt(p) === 10) {
-              details.line++;
-              details.column = 1;
-            } else {
-              details.column++;
-            }
-            p++;
-          }
-          peg$posDetailsCache[pos] = details;
-          return details;
-        }
-      }
-      function peg$computeLocation(startPos, endPos) {
-        var startPosDetails = peg$computePosDetails(startPos),
-          endPosDetails = peg$computePosDetails(endPos);
-        return {
-          start: {
-            offset: startPos,
-            line: startPosDetails.line,
-            column: startPosDetails.column,
-          },
-          end: {
-            offset: endPos,
-            line: endPosDetails.line,
-            column: endPosDetails.column,
-          },
-        };
-      }
-      function peg$fail(expected2) {
-        if (peg$currPos < peg$maxFailPos) {
-          return;
-        }
-        if (peg$currPos > peg$maxFailPos) {
-          peg$maxFailPos = peg$currPos;
-          peg$maxFailExpected = [];
-        }
-        peg$maxFailExpected.push(expected2);
-      }
-      function peg$buildSimpleError(message, location2) {
-        return new peg$SyntaxError(message, null, null, location2);
-      }
-      function peg$buildStructuredError(expected2, found, location2) {
-        return new peg$SyntaxError(
-          peg$SyntaxError.buildMessage(expected2, found),
-          expected2,
-          found,
-          location2,
-        );
-      }
-      function peg$parses() {
-        var s0, s1, s2, s3, s4;
-        s0 = peg$currPos;
-        s1 = [];
-        s2 = peg$currPos;
-        s3 = peg$parseSentences();
-        if (s3 !== peg$FAILED) {
-          s4 = peg$parseWhitespace();
-          if (s4 !== peg$FAILED) {
-            s3 = [s3, s4];
-            s2 = s3;
-          } else {
-            peg$currPos = s2;
-            s2 = peg$FAILED;
-          }
-        } else {
-          peg$currPos = s2;
-          s2 = peg$FAILED;
-        }
-        if (s2 === peg$FAILED) {
-          s2 = peg$currPos;
-          s3 = peg$parseQuotedSentences();
-          if (s3 !== peg$FAILED) {
-            s4 = peg$parseWhitespace();
-            if (s4 !== peg$FAILED) {
-              s3 = [s3, s4];
-              s2 = s3;
-            } else {
-              peg$currPos = s2;
-              s2 = peg$FAILED;
-            }
-          } else {
-            peg$currPos = s2;
-            s2 = peg$FAILED;
-          }
-        }
-        if (s2 !== peg$FAILED) {
-          while (s2 !== peg$FAILED) {
-            s1.push(s2);
-            s2 = peg$currPos;
-            s3 = peg$parseSentences();
-            if (s3 !== peg$FAILED) {
-              s4 = peg$parseWhitespace();
-              if (s4 !== peg$FAILED) {
-                s3 = [s3, s4];
-                s2 = s3;
-              } else {
-                peg$currPos = s2;
-                s2 = peg$FAILED;
-              }
-            } else {
-              peg$currPos = s2;
-              s2 = peg$FAILED;
-            }
-            if (s2 === peg$FAILED) {
-              s2 = peg$currPos;
-              s3 = peg$parseQuotedSentences();
-              if (s3 !== peg$FAILED) {
-                s4 = peg$parseWhitespace();
-                if (s4 !== peg$FAILED) {
-                  s3 = [s3, s4];
-                  s2 = s3;
-                } else {
-                  peg$currPos = s2;
-                  s2 = peg$FAILED;
-                }
-              } else {
-                peg$currPos = s2;
-                s2 = peg$FAILED;
-              }
-            }
-          }
-        } else {
-          s1 = peg$FAILED;
-        }
-        if (s1 !== peg$FAILED) {
-          peg$savedPos = s0;
-          s1 = peg$c0(s1);
-        }
-        s0 = s1;
-        return s0;
-      }
-      function peg$parseSentences() {
-        var s0, s1, s2, s3, s4;
-        s0 = peg$currPos;
-        s1 = [];
-        s2 = peg$currPos;
-        s3 = peg$parseSentence();
-        if (s3 !== peg$FAILED) {
-          s4 = peg$parseWhitespace();
-          if (s4 !== peg$FAILED) {
-            s3 = [s3, s4];
-            s2 = s3;
-          } else {
-            peg$currPos = s2;
-            s2 = peg$FAILED;
-          }
-        } else {
-          peg$currPos = s2;
-          s2 = peg$FAILED;
-        }
-        if (s2 !== peg$FAILED) {
-          while (s2 !== peg$FAILED) {
-            s1.push(s2);
-            s2 = peg$currPos;
-            s3 = peg$parseSentence();
-            if (s3 !== peg$FAILED) {
-              s4 = peg$parseWhitespace();
-              if (s4 !== peg$FAILED) {
-                s3 = [s3, s4];
-                s2 = s3;
-              } else {
-                peg$currPos = s2;
-                s2 = peg$FAILED;
-              }
-            } else {
-              peg$currPos = s2;
-              s2 = peg$FAILED;
-            }
-          }
-        } else {
-          s1 = peg$FAILED;
-        }
-        if (s1 !== peg$FAILED) {
-          peg$savedPos = s0;
-          s1 = peg$c1(s1);
-        }
-        s0 = s1;
-        return s0;
-      }
-      function peg$parseQuotedSentences() {
-        var s0, s1, s2, s3, s4, s5;
-        s0 = peg$currPos;
-        s1 = peg$parseOpenSymbol();
-        if (s1 !== peg$FAILED) {
-          s2 = [];
-          s3 = peg$currPos;
-          s4 = peg$parseSentence();
-          if (s4 !== peg$FAILED) {
-            s5 = peg$parseWhitespace();
-            if (s5 !== peg$FAILED) {
-              s4 = [s4, s5];
-              s3 = s4;
-            } else {
-              peg$currPos = s3;
-              s3 = peg$FAILED;
-            }
-          } else {
-            peg$currPos = s3;
-            s3 = peg$FAILED;
-          }
-          if (s3 !== peg$FAILED) {
-            while (s3 !== peg$FAILED) {
-              s2.push(s3);
-              s3 = peg$currPos;
-              s4 = peg$parseSentence();
-              if (s4 !== peg$FAILED) {
-                s5 = peg$parseWhitespace();
-                if (s5 !== peg$FAILED) {
-                  s4 = [s4, s5];
-                  s3 = s4;
-                } else {
-                  peg$currPos = s3;
-                  s3 = peg$FAILED;
-                }
-              } else {
-                peg$currPos = s3;
-                s3 = peg$FAILED;
-              }
-            }
-          } else {
-            s2 = peg$FAILED;
-          }
-          if (s2 !== peg$FAILED) {
-            s3 = peg$parseCloseSymbol();
-            if (s3 === peg$FAILED) {
-              s3 = null;
-            }
-            if (s3 !== peg$FAILED) {
-              peg$savedPos = s0;
-              s1 = peg$c2(s1, s2, s3);
-              s0 = s1;
-            } else {
-              peg$currPos = s0;
-              s0 = peg$FAILED;
-            }
-          } else {
-            peg$currPos = s0;
-            s0 = peg$FAILED;
-          }
-        } else {
-          peg$currPos = s0;
-          s0 = peg$FAILED;
-        }
-        return s0;
-      }
-      function peg$parseSentence() {
-        var s0, s1, s2;
-        s0 = peg$currPos;
-        s1 = [];
-        s2 = peg$parseTokenSeq();
-        if (s2 === peg$FAILED) {
-          s2 = peg$parseQuotedTokenSeq();
-        }
-        if (s2 !== peg$FAILED) {
-          while (s2 !== peg$FAILED) {
-            s1.push(s2);
-            s2 = peg$parseTokenSeq();
-            if (s2 === peg$FAILED) {
-              s2 = peg$parseQuotedTokenSeq();
-            }
-          }
-        } else {
-          s1 = peg$FAILED;
-        }
-        if (s1 !== peg$FAILED) {
-          s2 = peg$parseEndOfSentence();
-          if (s2 !== peg$FAILED) {
-            peg$savedPos = s0;
-            s1 = peg$c3(s1, s2);
-            s0 = s1;
-          } else {
-            peg$currPos = s0;
-            s0 = peg$FAILED;
-          }
-        } else {
-          peg$currPos = s0;
-          s0 = peg$FAILED;
-        }
-        return s0;
-      }
-      function peg$parseTokenSeq() {
-        var s0, s1, s2, s3, s4;
-        s0 = peg$currPos;
-        s1 = [];
-        s2 = peg$currPos;
-        s3 = peg$parseToken();
-        if (s3 !== peg$FAILED) {
-          s4 = peg$parseWhitespace();
-          if (s4 !== peg$FAILED) {
-            s3 = [s3, s4];
-            s2 = s3;
-          } else {
-            peg$currPos = s2;
-            s2 = peg$FAILED;
-          }
-        } else {
-          peg$currPos = s2;
-          s2 = peg$FAILED;
-        }
-        if (s2 !== peg$FAILED) {
-          while (s2 !== peg$FAILED) {
-            s1.push(s2);
-            s2 = peg$currPos;
-            s3 = peg$parseToken();
-            if (s3 !== peg$FAILED) {
-              s4 = peg$parseWhitespace();
-              if (s4 !== peg$FAILED) {
-                s3 = [s3, s4];
-                s2 = s3;
-              } else {
-                peg$currPos = s2;
-                s2 = peg$FAILED;
-              }
-            } else {
-              peg$currPos = s2;
-              s2 = peg$FAILED;
-            }
-          }
-        } else {
-          s1 = peg$FAILED;
-        }
-        if (s1 !== peg$FAILED) {
-          peg$savedPos = s0;
-          s1 = peg$c4(s1);
-        }
-        s0 = s1;
-        return s0;
-      }
-      function peg$parseQuotedTokenSeq() {
-        var s0, s1, s2, s3, s4, s5;
-        s0 = peg$currPos;
-        s1 = peg$parseOpenSymbol();
-        if (s1 !== peg$FAILED) {
-          s2 = [];
-          s3 = peg$currPos;
-          s4 = peg$parseToken();
-          if (s4 !== peg$FAILED) {
-            s5 = peg$parseWhitespace();
-            if (s5 !== peg$FAILED) {
-              s4 = [s4, s5];
-              s3 = s4;
-            } else {
-              peg$currPos = s3;
-              s3 = peg$FAILED;
-            }
-          } else {
-            peg$currPos = s3;
-            s3 = peg$FAILED;
-          }
-          if (s3 !== peg$FAILED) {
-            while (s3 !== peg$FAILED) {
-              s2.push(s3);
-              s3 = peg$currPos;
-              s4 = peg$parseToken();
-              if (s4 !== peg$FAILED) {
-                s5 = peg$parseWhitespace();
-                if (s5 !== peg$FAILED) {
-                  s4 = [s4, s5];
-                  s3 = s4;
-                } else {
-                  peg$currPos = s3;
-                  s3 = peg$FAILED;
-                }
-              } else {
-                peg$currPos = s3;
-                s3 = peg$FAILED;
-              }
-            }
-          } else {
-            s2 = peg$FAILED;
-          }
-          if (s2 !== peg$FAILED) {
-            s3 = peg$parseEndOfSentence();
-            if (s3 !== peg$FAILED) {
-              s4 = peg$parseCloseSymbol();
-              if (s4 !== peg$FAILED) {
-                peg$savedPos = s0;
-                s1 = peg$c5(s1, s2, s3, s4);
-                s0 = s1;
-              } else {
-                peg$currPos = s0;
-                s0 = peg$FAILED;
-              }
-            } else {
-              peg$currPos = s0;
-              s0 = peg$FAILED;
-            }
-          } else {
-            peg$currPos = s0;
-            s0 = peg$FAILED;
-          }
-        } else {
-          peg$currPos = s0;
-          s0 = peg$FAILED;
-        }
-        return s0;
-      }
-      function peg$parseEndOfSentence() {
-        var s0, s1, s2;
-        s0 = peg$currPos;
-        s1 = [];
-        if (peg$c6.test(input.charAt(peg$currPos))) {
-          s2 = input.charAt(peg$currPos);
-          peg$currPos++;
-        } else {
-          s2 = peg$FAILED;
-          if (peg$silentFails === 0) {
-            peg$fail(peg$c7);
-          }
-        }
-        while (s2 !== peg$FAILED) {
-          s1.push(s2);
-          if (peg$c6.test(input.charAt(peg$currPos))) {
-            s2 = input.charAt(peg$currPos);
-            peg$currPos++;
-          } else {
-            s2 = peg$FAILED;
-            if (peg$silentFails === 0) {
-              peg$fail(peg$c7);
-            }
-          }
-        }
-        if (s1 !== peg$FAILED) {
-          peg$savedPos = s0;
-          s1 = peg$c8();
-        }
-        s0 = s1;
-        return s0;
-      }
-      function peg$parseWhitespace() {
-        var s0, s1, s2;
-        s0 = peg$currPos;
-        s1 = [];
-        if (peg$c9.test(input.charAt(peg$currPos))) {
-          s2 = input.charAt(peg$currPos);
-          peg$currPos++;
-        } else {
-          s2 = peg$FAILED;
-          if (peg$silentFails === 0) {
-            peg$fail(peg$c10);
-          }
-        }
-        while (s2 !== peg$FAILED) {
-          s1.push(s2);
-          if (peg$c9.test(input.charAt(peg$currPos))) {
-            s2 = input.charAt(peg$currPos);
-            peg$currPos++;
-          } else {
-            s2 = peg$FAILED;
-            if (peg$silentFails === 0) {
-              peg$fail(peg$c10);
-            }
-          }
-        }
-        if (s1 !== peg$FAILED) {
-          peg$savedPos = s0;
-          s1 = peg$c8();
-        }
-        s0 = s1;
-        return s0;
-      }
-      function peg$parseToken() {
-        var s0, s1;
-        s0 = peg$currPos;
-        s1 = peg$parseURI();
-        if (s1 === peg$FAILED) {
-          s1 = peg$parseEmail();
-          if (s1 === peg$FAILED) {
-            s1 = peg$parseNumber();
-            if (s1 === peg$FAILED) {
-              s1 = peg$parseAbbreviation();
-              if (s1 === peg$FAILED) {
-                s1 = peg$parseWord();
-              }
-            }
-          }
-        }
-        if (s1 !== peg$FAILED) {
-          peg$savedPos = s0;
-          s1 = peg$c11(s1);
-        }
-        s0 = s1;
-        return s0;
-      }
-      function peg$parseAbbreviation() {
-        var s0, s1, s2;
-        s0 = peg$currPos;
-        s1 = [];
-        if (peg$c12.test(input.charAt(peg$currPos))) {
-          s2 = input.charAt(peg$currPos);
-          peg$currPos++;
-        } else {
-          s2 = peg$FAILED;
-          if (peg$silentFails === 0) {
-            peg$fail(peg$c13);
-          }
-        }
-        if (s2 !== peg$FAILED) {
-          while (s2 !== peg$FAILED) {
-            s1.push(s2);
-            if (peg$c12.test(input.charAt(peg$currPos))) {
-              s2 = input.charAt(peg$currPos);
-              peg$currPos++;
-            } else {
-              s2 = peg$FAILED;
-              if (peg$silentFails === 0) {
-                peg$fail(peg$c13);
-              }
-            }
-          }
-        } else {
-          s1 = peg$FAILED;
-        }
-        if (s1 !== peg$FAILED) {
-          peg$savedPos = peg$currPos;
-          s2 = peg$c14(s1);
-          if (s2) {
-            s2 = void 0;
-          } else {
-            s2 = peg$FAILED;
-          }
-          if (s2 !== peg$FAILED) {
-            peg$savedPos = s0;
-            s1 = peg$c15(s1);
-            s0 = s1;
-          } else {
-            peg$currPos = s0;
-            s0 = peg$FAILED;
-          }
-        } else {
-          peg$currPos = s0;
-          s0 = peg$FAILED;
-        }
-        return s0;
-      }
-      function peg$parseWord() {
-        var s0, s1, s2;
-        s0 = peg$currPos;
-        s1 = [];
-        if (peg$c16.test(input.charAt(peg$currPos))) {
-          s2 = input.charAt(peg$currPos);
-          peg$currPos++;
-        } else {
-          s2 = peg$FAILED;
-          if (peg$silentFails === 0) {
-            peg$fail(peg$c17);
-          }
-        }
-        if (s2 !== peg$FAILED) {
-          while (s2 !== peg$FAILED) {
-            s1.push(s2);
-            if (peg$c16.test(input.charAt(peg$currPos))) {
-              s2 = input.charAt(peg$currPos);
-              peg$currPos++;
-            } else {
-              s2 = peg$FAILED;
-              if (peg$silentFails === 0) {
-                peg$fail(peg$c17);
-              }
-            }
-          }
-        } else {
-          s1 = peg$FAILED;
-        }
-        if (s1 !== peg$FAILED) {
-          peg$savedPos = s0;
-          s1 = peg$c18();
-        }
-        s0 = s1;
-        return s0;
-      }
-      function peg$parseNumber() {
-        var s0, s1, s2, s3, s4, s5;
-        s0 = peg$currPos;
-        s1 = [];
-        if (peg$c19.test(input.charAt(peg$currPos))) {
-          s2 = input.charAt(peg$currPos);
-          peg$currPos++;
-        } else {
-          s2 = peg$FAILED;
-          if (peg$silentFails === 0) {
-            peg$fail(peg$c20);
-          }
-        }
-        if (s2 !== peg$FAILED) {
-          while (s2 !== peg$FAILED) {
-            s1.push(s2);
-            if (peg$c19.test(input.charAt(peg$currPos))) {
-              s2 = input.charAt(peg$currPos);
-              peg$currPos++;
-            } else {
-              s2 = peg$FAILED;
-              if (peg$silentFails === 0) {
-                peg$fail(peg$c20);
-              }
-            }
-          }
-        } else {
-          s1 = peg$FAILED;
-        }
-        if (s1 !== peg$FAILED) {
-          s2 = peg$currPos;
-          if (input.length > peg$currPos) {
-            s3 = input.charAt(peg$currPos);
-            peg$currPos++;
-          } else {
-            s3 = peg$FAILED;
-            if (peg$silentFails === 0) {
-              peg$fail(peg$c21);
-            }
-          }
-          if (s3 !== peg$FAILED) {
-            s4 = [];
-            if (peg$c19.test(input.charAt(peg$currPos))) {
-              s5 = input.charAt(peg$currPos);
-              peg$currPos++;
-            } else {
-              s5 = peg$FAILED;
-              if (peg$silentFails === 0) {
-                peg$fail(peg$c20);
-              }
-            }
-            if (s5 !== peg$FAILED) {
-              while (s5 !== peg$FAILED) {
-                s4.push(s5);
-                if (peg$c19.test(input.charAt(peg$currPos))) {
-                  s5 = input.charAt(peg$currPos);
-                  peg$currPos++;
-                } else {
-                  s5 = peg$FAILED;
-                  if (peg$silentFails === 0) {
-                    peg$fail(peg$c20);
-                  }
-                }
-              }
-            } else {
-              s4 = peg$FAILED;
-            }
-            if (s4 !== peg$FAILED) {
-              s3 = [s3, s4];
-              s2 = s3;
-            } else {
-              peg$currPos = s2;
-              s2 = peg$FAILED;
-            }
-          } else {
-            peg$currPos = s2;
-            s2 = peg$FAILED;
-          }
-          if (s2 === peg$FAILED) {
-            s2 = null;
-          }
-          if (s2 !== peg$FAILED) {
-            s3 = peg$parseCloseSymbol();
-            if (s3 === peg$FAILED) {
-              s3 = null;
-            }
-            if (s3 !== peg$FAILED) {
-              peg$savedPos = s0;
-              s1 = peg$c8();
-              s0 = s1;
-            } else {
-              peg$currPos = s0;
-              s0 = peg$FAILED;
-            }
-          } else {
-            peg$currPos = s0;
-            s0 = peg$FAILED;
-          }
-        } else {
-          peg$currPos = s0;
-          s0 = peg$FAILED;
-        }
-        return s0;
-      }
-      function peg$parseEmail() {
-        var s0, s1, s2, s3, s4, s5, s6;
-        s0 = peg$currPos;
-        s1 = [];
-        if (peg$c22.test(input.charAt(peg$currPos))) {
-          s2 = input.charAt(peg$currPos);
-          peg$currPos++;
-        } else {
-          s2 = peg$FAILED;
-          if (peg$silentFails === 0) {
-            peg$fail(peg$c23);
-          }
-        }
-        if (s2 !== peg$FAILED) {
-          while (s2 !== peg$FAILED) {
-            s1.push(s2);
-            if (peg$c22.test(input.charAt(peg$currPos))) {
-              s2 = input.charAt(peg$currPos);
-              peg$currPos++;
-            } else {
-              s2 = peg$FAILED;
-              if (peg$silentFails === 0) {
-                peg$fail(peg$c23);
-              }
-            }
-          }
-        } else {
-          s1 = peg$FAILED;
-        }
-        if (s1 !== peg$FAILED) {
-          if (peg$c24.test(input.charAt(peg$currPos))) {
-            s2 = input.charAt(peg$currPos);
-            peg$currPos++;
-          } else {
-            s2 = peg$FAILED;
-            if (peg$silentFails === 0) {
-              peg$fail(peg$c25);
-            }
-          }
-          if (s2 !== peg$FAILED) {
-            s3 = [];
-            if (peg$c22.test(input.charAt(peg$currPos))) {
-              s4 = input.charAt(peg$currPos);
-              peg$currPos++;
-            } else {
-              s4 = peg$FAILED;
-              if (peg$silentFails === 0) {
-                peg$fail(peg$c23);
-              }
-            }
-            if (s4 !== peg$FAILED) {
-              while (s4 !== peg$FAILED) {
-                s3.push(s4);
-                if (peg$c22.test(input.charAt(peg$currPos))) {
-                  s4 = input.charAt(peg$currPos);
-                  peg$currPos++;
-                } else {
-                  s4 = peg$FAILED;
-                  if (peg$silentFails === 0) {
-                    peg$fail(peg$c23);
-                  }
-                }
-              }
-            } else {
-              s3 = peg$FAILED;
-            }
-            if (s3 !== peg$FAILED) {
-              if (peg$c26.test(input.charAt(peg$currPos))) {
-                s4 = input.charAt(peg$currPos);
-                peg$currPos++;
-              } else {
-                s4 = peg$FAILED;
-                if (peg$silentFails === 0) {
-                  peg$fail(peg$c27);
-                }
-              }
-              if (s4 !== peg$FAILED) {
-                s5 = [];
-                if (peg$c22.test(input.charAt(peg$currPos))) {
-                  s6 = input.charAt(peg$currPos);
-                  peg$currPos++;
-                } else {
-                  s6 = peg$FAILED;
-                  if (peg$silentFails === 0) {
-                    peg$fail(peg$c23);
-                  }
-                }
-                if (s6 !== peg$FAILED) {
-                  while (s6 !== peg$FAILED) {
-                    s5.push(s6);
-                    if (peg$c22.test(input.charAt(peg$currPos))) {
-                      s6 = input.charAt(peg$currPos);
-                      peg$currPos++;
-                    } else {
-                      s6 = peg$FAILED;
-                      if (peg$silentFails === 0) {
-                        peg$fail(peg$c23);
-                      }
-                    }
-                  }
-                } else {
-                  s5 = peg$FAILED;
-                }
-                if (s5 !== peg$FAILED) {
-                  peg$savedPos = s0;
-                  s1 = peg$c8();
-                  s0 = s1;
-                } else {
-                  peg$currPos = s0;
-                  s0 = peg$FAILED;
-                }
-              } else {
-                peg$currPos = s0;
-                s0 = peg$FAILED;
-              }
-            } else {
-              peg$currPos = s0;
-              s0 = peg$FAILED;
-            }
-          } else {
-            peg$currPos = s0;
-            s0 = peg$FAILED;
-          }
-        } else {
-          peg$currPos = s0;
-          s0 = peg$FAILED;
-        }
-        return s0;
-      }
-      function peg$parseURI() {
-        var s0, s1, s2, s3, s4, s5, s6, s7, s8, s9;
-        s0 = peg$currPos;
-        if (input.substr(peg$currPos, 7) === peg$c28) {
-          s1 = peg$c28;
-          peg$currPos += 7;
-        } else {
-          s1 = peg$FAILED;
-          if (peg$silentFails === 0) {
-            peg$fail(peg$c29);
-          }
-        }
-        if (s1 === peg$FAILED) {
-          if (input.substr(peg$currPos, 8) === peg$c30) {
-            s1 = peg$c30;
-            peg$currPos += 8;
-          } else {
-            s1 = peg$FAILED;
-            if (peg$silentFails === 0) {
-              peg$fail(peg$c31);
-            }
-          }
-        }
-        if (s1 === peg$FAILED) {
-          s1 = null;
-        }
-        if (s1 !== peg$FAILED) {
-          s2 = [];
-          if (peg$c32.test(input.charAt(peg$currPos))) {
-            s3 = input.charAt(peg$currPos);
-            peg$currPos++;
-          } else {
-            s3 = peg$FAILED;
-            if (peg$silentFails === 0) {
-              peg$fail(peg$c33);
-            }
-          }
-          if (s3 !== peg$FAILED) {
-            while (s3 !== peg$FAILED) {
-              s2.push(s3);
-              if (peg$c32.test(input.charAt(peg$currPos))) {
-                s3 = input.charAt(peg$currPos);
-                peg$currPos++;
-              } else {
-                s3 = peg$FAILED;
-                if (peg$silentFails === 0) {
-                  peg$fail(peg$c33);
-                }
-              }
-            }
-          } else {
-            s2 = peg$FAILED;
-          }
-          if (s2 !== peg$FAILED) {
-            if (peg$c26.test(input.charAt(peg$currPos))) {
-              s3 = input.charAt(peg$currPos);
-              peg$currPos++;
-            } else {
-              s3 = peg$FAILED;
-              if (peg$silentFails === 0) {
-                peg$fail(peg$c27);
-              }
-            }
-            if (s3 !== peg$FAILED) {
-              s4 = peg$currPos;
-              s5 = [];
-              if (peg$c32.test(input.charAt(peg$currPos))) {
-                s6 = input.charAt(peg$currPos);
-                peg$currPos++;
-              } else {
-                s6 = peg$FAILED;
-                if (peg$silentFails === 0) {
-                  peg$fail(peg$c33);
-                }
-              }
-              if (s6 !== peg$FAILED) {
-                while (s6 !== peg$FAILED) {
-                  s5.push(s6);
-                  if (peg$c32.test(input.charAt(peg$currPos))) {
-                    s6 = input.charAt(peg$currPos);
-                    peg$currPos++;
-                  } else {
-                    s6 = peg$FAILED;
-                    if (peg$silentFails === 0) {
-                      peg$fail(peg$c33);
-                    }
-                  }
-                }
-              } else {
-                s5 = peg$FAILED;
-              }
-              if (s5 !== peg$FAILED) {
-                if (peg$c26.test(input.charAt(peg$currPos))) {
-                  s6 = input.charAt(peg$currPos);
-                  peg$currPos++;
-                } else {
-                  s6 = peg$FAILED;
-                  if (peg$silentFails === 0) {
-                    peg$fail(peg$c27);
-                  }
-                }
-                if (s6 !== peg$FAILED) {
-                  s5 = [s5, s6];
-                  s4 = s5;
-                } else {
-                  peg$currPos = s4;
-                  s4 = peg$FAILED;
-                }
-              } else {
-                peg$currPos = s4;
-                s4 = peg$FAILED;
-              }
-              if (s4 === peg$FAILED) {
-                s4 = null;
-              }
-              if (s4 !== peg$FAILED) {
-                s5 = [];
-                if (peg$c32.test(input.charAt(peg$currPos))) {
-                  s6 = input.charAt(peg$currPos);
-                  peg$currPos++;
-                } else {
-                  s6 = peg$FAILED;
-                  if (peg$silentFails === 0) {
-                    peg$fail(peg$c33);
-                  }
-                }
-                if (s6 !== peg$FAILED) {
-                  while (s6 !== peg$FAILED) {
-                    s5.push(s6);
-                    if (peg$c32.test(input.charAt(peg$currPos))) {
-                      s6 = input.charAt(peg$currPos);
-                      peg$currPos++;
-                    } else {
-                      s6 = peg$FAILED;
-                      if (peg$silentFails === 0) {
-                        peg$fail(peg$c33);
-                      }
-                    }
-                  }
-                } else {
-                  s5 = peg$FAILED;
-                }
-                if (s5 !== peg$FAILED) {
-                  s6 = [];
-                  s7 = peg$currPos;
-                  s8 = [];
-                  if (peg$c32.test(input.charAt(peg$currPos))) {
-                    s9 = input.charAt(peg$currPos);
-                    peg$currPos++;
-                  } else {
-                    s9 = peg$FAILED;
-                    if (peg$silentFails === 0) {
-                      peg$fail(peg$c33);
-                    }
-                  }
-                  if (s9 !== peg$FAILED) {
-                    while (s9 !== peg$FAILED) {
-                      s8.push(s9);
-                      if (peg$c32.test(input.charAt(peg$currPos))) {
-                        s9 = input.charAt(peg$currPos);
-                        peg$currPos++;
-                      } else {
-                        s9 = peg$FAILED;
-                        if (peg$silentFails === 0) {
-                          peg$fail(peg$c33);
-                        }
-                      }
-                    }
-                  } else {
-                    s8 = peg$FAILED;
-                  }
-                  if (s8 !== peg$FAILED) {
-                    if (peg$c34.test(input.charAt(peg$currPos))) {
-                      s9 = input.charAt(peg$currPos);
-                      peg$currPos++;
-                    } else {
-                      s9 = peg$FAILED;
-                      if (peg$silentFails === 0) {
-                        peg$fail(peg$c35);
-                      }
-                    }
-                    if (s9 !== peg$FAILED) {
-                      s8 = [s8, s9];
-                      s7 = s8;
-                    } else {
-                      peg$currPos = s7;
-                      s7 = peg$FAILED;
-                    }
-                  } else {
-                    peg$currPos = s7;
-                    s7 = peg$FAILED;
-                  }
-                  while (s7 !== peg$FAILED) {
-                    s6.push(s7);
-                    s7 = peg$currPos;
-                    s8 = [];
-                    if (peg$c32.test(input.charAt(peg$currPos))) {
-                      s9 = input.charAt(peg$currPos);
-                      peg$currPos++;
-                    } else {
-                      s9 = peg$FAILED;
-                      if (peg$silentFails === 0) {
-                        peg$fail(peg$c33);
-                      }
-                    }
-                    if (s9 !== peg$FAILED) {
-                      while (s9 !== peg$FAILED) {
-                        s8.push(s9);
-                        if (peg$c32.test(input.charAt(peg$currPos))) {
-                          s9 = input.charAt(peg$currPos);
-                          peg$currPos++;
-                        } else {
-                          s9 = peg$FAILED;
-                          if (peg$silentFails === 0) {
-                            peg$fail(peg$c33);
-                          }
-                        }
-                      }
-                    } else {
-                      s8 = peg$FAILED;
-                    }
-                    if (s8 !== peg$FAILED) {
-                      if (peg$c34.test(input.charAt(peg$currPos))) {
-                        s9 = input.charAt(peg$currPos);
-                        peg$currPos++;
-                      } else {
-                        s9 = peg$FAILED;
-                        if (peg$silentFails === 0) {
-                          peg$fail(peg$c35);
-                        }
-                      }
-                      if (s9 !== peg$FAILED) {
-                        s8 = [s8, s9];
-                        s7 = s8;
-                      } else {
-                        peg$currPos = s7;
-                        s7 = peg$FAILED;
-                      }
-                    } else {
-                      peg$currPos = s7;
-                      s7 = peg$FAILED;
-                    }
-                  }
-                  if (s6 !== peg$FAILED) {
-                    peg$savedPos = s0;
-                    s1 = peg$c36();
-                    s0 = s1;
-                  } else {
-                    peg$currPos = s0;
-                    s0 = peg$FAILED;
-                  }
-                } else {
-                  peg$currPos = s0;
-                  s0 = peg$FAILED;
-                }
-              } else {
-                peg$currPos = s0;
-                s0 = peg$FAILED;
-              }
-            } else {
-              peg$currPos = s0;
-              s0 = peg$FAILED;
-            }
-          } else {
-            peg$currPos = s0;
-            s0 = peg$FAILED;
-          }
-        } else {
-          peg$currPos = s0;
-          s0 = peg$FAILED;
-        }
-        return s0;
-      }
-      function peg$parseOpenSymbol() {
-        var s0, s1;
-        s0 = peg$currPos;
-        if (peg$c37.test(input.charAt(peg$currPos))) {
-          s1 = input.charAt(peg$currPos);
-          peg$currPos++;
-        } else {
-          s1 = peg$FAILED;
-          if (peg$silentFails === 0) {
-            peg$fail(peg$c38);
-          }
-        }
-        if (s1 !== peg$FAILED) {
-          peg$savedPos = s0;
-          s1 = peg$c8();
-        }
-        s0 = s1;
-        return s0;
-      }
-      function peg$parseCloseSymbol() {
-        var s0, s1;
-        s0 = peg$currPos;
-        if (peg$c39.test(input.charAt(peg$currPos))) {
-          s1 = input.charAt(peg$currPos);
-          peg$currPos++;
-        } else {
-          s1 = peg$FAILED;
-          if (peg$silentFails === 0) {
-            peg$fail(peg$c40);
-          }
-        }
-        if (s1 !== peg$FAILED) {
-          peg$savedPos = s0;
-          s1 = peg$c8();
-        }
-        s0 = s1;
-        return s0;
-      }
-      const knownAbbreviations = require_abbreviations_en().knownAbbreviations;
-      peg$result = peg$startRuleFunction();
-      if (peg$result !== peg$FAILED && peg$currPos === input.length) {
-        return peg$result;
-      } else {
-        if (peg$result !== peg$FAILED && peg$currPos < input.length) {
-          peg$fail(peg$endExpectation());
-        }
-        throw peg$buildStructuredError(
-          peg$maxFailExpected,
-          peg$maxFailPos < input.length ? input.charAt(peg$maxFailPos) : null,
-          peg$maxFailPos < input.length
-            ? peg$computeLocation(peg$maxFailPos, peg$maxFailPos + 1)
-            : peg$computeLocation(peg$maxFailPos, peg$maxFailPos),
-        );
-      }
-    }
-    module.exports = {
-      SyntaxError: peg$SyntaxError,
-      parse: peg$parse,
-    };
-  },
-});
-
-// lib/natural/tokenizers/tokenizer.js
-var require_tokenizer = cjs({
-  "lib/natural/tokenizers/tokenizer.js"(exports, module) {
-    "use strict";
-    var Tokenizer = class {
-      trim(array) {
-        while (array[array.length - 1] === "") {
-          array.pop();
-        }
-        while (array[0] === "") {
-          array.shift();
-        }
-        return array;
-      }
-    };
-    module.exports = Tokenizer;
-  },
-});
-
-// lib/natural/tokenizers/sentence_tokenizer_parser.js
-var require_sentence_tokenizer_parser = cjs({
-  "lib/natural/tokenizers/sentence_tokenizer_parser.js"(exports, module) {
-    var parser = require_parser_sentence_tokenizer();
-    var Tokenizer = require_tokenizer();
-    var SentenceTokenizer = class extends Tokenizer {
-      tokenize(text) {
-        return parser.parse(text);
-      }
-    };
-    module.exports = SentenceTokenizer;
-  },
-});
-export default require_sentence_tokenizer_parser();
diff --git a/packages/core/src/node-parser/sentence-tokenizer-parser.d.ts b/packages/core/src/node-parser/sentence_tokenizer.d.ts
similarity index 73%
rename from packages/core/src/node-parser/sentence-tokenizer-parser.d.ts
rename to packages/core/src/node-parser/sentence_tokenizer.d.ts
index 87074151446a7a938065e97b3eb2c0320f363ac9..c0c2d416fde9b5b26a3d409d6c8bcafa200eedc2 100644
--- a/packages/core/src/node-parser/sentence-tokenizer-parser.d.ts
+++ b/packages/core/src/node-parser/sentence_tokenizer.d.ts
@@ -1,4 +1,5 @@
 declare class SentenceTokenizer {
+  constructor(abbreviations?: string[]);
   tokenize(text: string): string[];
 }
 
diff --git a/packages/core/src/node-parser/sentence_tokenizer.js b/packages/core/src/node-parser/sentence_tokenizer.js
new file mode 100644
index 0000000000000000000000000000000000000000..08cabd31bc2420bf180d2c5117d2a51df2360f07
--- /dev/null
+++ b/packages/core/src/node-parser/sentence_tokenizer.js
@@ -0,0 +1,222 @@
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __commonJS = (cb, mod) =>
+  function __require() {
+    return (
+      mod ||
+        (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod),
+      mod.exports
+    );
+  };
+
+// lib/natural/tokenizers/tokenizer.js
+var require_tokenizer = __commonJS({
+  "lib/natural/tokenizers/tokenizer.js"(exports, module) {
+    "use strict";
+    var Tokenizer = class {
+      trim(array) {
+        while (array[array.length - 1] === "") {
+          array.pop();
+        }
+        while (array[0] === "") {
+          array.shift();
+        }
+        return array;
+      }
+    };
+    module.exports = Tokenizer;
+  },
+});
+
+// lib/natural/tokenizers/sentence_tokenizer.js
+var require_sentence_tokenizer = __commonJS({
+  "lib/natural/tokenizers/sentence_tokenizer.js"(exports, module) {
+    var Tokenizer = require_tokenizer();
+    var NUM = "NUMBER";
+    var DELIM = "DELIM";
+    var URI = "URI";
+    var ABBREV = "ABBREV";
+    var DEBUG = false;
+    function generateUniqueCode(base, index) {
+      return `{{${base}_${index}}}`;
+    }
+    function escapeRegExp(string) {
+      return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+    }
+    var SentenceTokenizer = class extends Tokenizer {
+      constructor(abbreviations) {
+        super();
+        if (abbreviations) {
+          this.abbreviations = abbreviations;
+        } else {
+          this.abbreviations = [];
+        }
+        this.replacementMap = null;
+        this.replacementCounter = 0;
+      }
+      replaceUrisWithPlaceholders(text) {
+        const urlPattern =
+          /(https?:\/\/\S+|www\.\S+|ftp:\/\/\S+|(mailto:)?[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}|file:\/\/\S+)/gi;
+        const modifiedText = text.replace(urlPattern, (match) => {
+          const placeholder = generateUniqueCode(
+            URI,
+            this.replacementCounter++,
+          );
+          this.replacementMap.set(placeholder, match);
+          return placeholder;
+        });
+        return modifiedText;
+      }
+      replaceAbbreviations(text) {
+        if (this.abbreviations.length === 0) {
+          return text;
+        }
+        const pattern = new RegExp(
+          `(${this.abbreviations.map((abbrev) => escapeRegExp(abbrev)).join("|")})`,
+          "gi",
+        );
+        const replacedText = text.replace(pattern, (match) => {
+          const code = generateUniqueCode(ABBREV, this.replacementCounter++);
+          this.replacementMap.set(code, match);
+          return code;
+        });
+        return replacedText;
+      }
+      replaceDelimitersWithPlaceholders(text) {
+        const delimiterPattern = /([.?!… ]*)([.?!…])(["'”’)}\]]?)/g;
+        const modifiedText = text.replace(
+          delimiterPattern,
+          (match, p1, p2, p3) => {
+            const placeholder = generateUniqueCode(
+              DELIM,
+              this.replacementCounter++,
+            );
+            this.delimiterMap.set(placeholder, p1 + p2 + p3);
+            return placeholder;
+          },
+        );
+        return modifiedText;
+      }
+      splitOnPlaceholders(text, placeholders) {
+        if (this.delimiterMap.size === 0) {
+          return [text];
+        }
+        const keys = Array.from(this.delimiterMap.keys());
+        const pattern = new RegExp(`(${keys.map(escapeRegExp).join("|")})`);
+        const parts = text.split(pattern);
+        const sentences = [];
+        for (let i = 0; i < parts.length; i += 2) {
+          const sentence = parts[i];
+          const placeholder = parts[i + 1] || "";
+          sentences.push(sentence + placeholder);
+        }
+        return sentences;
+      }
+      replaceNumbersWithCode(text) {
+        const numberPattern = /\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b/g;
+        const replacedText = text.replace(numberPattern, (match) => {
+          const code = generateUniqueCode(NUM, this.replacementCounter++);
+          this.replacementMap.set(code, match);
+          return code;
+        });
+        return replacedText;
+      }
+      revertReplacements(text) {
+        let originalText = text;
+        for (const [
+          placeholder,
+          replacement,
+        ] of this.replacementMap.entries()) {
+          const pattern = new RegExp(escapeRegExp(placeholder), "g");
+          originalText = originalText.replace(pattern, replacement);
+        }
+        return originalText;
+      }
+      revertDelimiters(text) {
+        let originalText = text;
+        for (const [placeholder, replacement] of this.delimiterMap.entries()) {
+          const pattern = new RegExp(escapeRegExp(placeholder), "g");
+          originalText = originalText.replace(pattern, replacement);
+        }
+        return originalText;
+      }
+      tokenize(text) {
+        this.replacementCounter = 0;
+        this.replacementMap = /* @__PURE__ */ new Map();
+        this.delimiterMap = /* @__PURE__ */ new Map();
+        DEBUG &&
+          console.log(
+            "---Start of sentence tokenization-----------------------",
+          );
+        DEBUG && console.log("Original input: >>>" + text + "<<<");
+        const result1 = this.replaceAbbreviations(text);
+        DEBUG &&
+          console.log(
+            "Phase 1: replacing abbreviations: " + JSON.stringify(result1),
+          );
+        const result2 = this.replaceUrisWithPlaceholders(result1);
+        DEBUG &&
+          console.log("Phase 2: replacing URIs: " + JSON.stringify(result2));
+        const result3 = this.replaceNumbersWithCode(result2);
+        DEBUG &&
+          console.log(
+            "Phase 3: replacing numbers with placeholders: " +
+              JSON.stringify(result3),
+          );
+        const result4 = this.replaceDelimitersWithPlaceholders(result3);
+        DEBUG &&
+          console.log(
+            "Phase 4: replacing delimiters with placeholders: " +
+              JSON.stringify(result4),
+          );
+        const sentences = this.splitOnPlaceholders(result4);
+        DEBUG &&
+          console.log(
+            "Phase 5: splitting into sentences on placeholders: " +
+              JSON.stringify(sentences),
+          );
+        const newSentences = sentences.map((s) => {
+          const s1 = this.revertReplacements(s);
+          return this.revertDelimiters(s1);
+        });
+        DEBUG &&
+          console.log(
+            "Phase 6: replacing back abbreviations, URIs, numbers and delimiters: " +
+              JSON.stringify(newSentences),
+          );
+        const trimmedSentences = this.trim(newSentences);
+        DEBUG &&
+          console.log(
+            "Phase 7: trimming array of empty sentences: " +
+              JSON.stringify(trimmedSentences),
+          );
+        const trimmedSentences2 = trimmedSentences.map((sent) => sent.trim());
+        DEBUG &&
+          console.log(
+            "Phase 8: trimming sentences from surrounding whitespace: " +
+              JSON.stringify(trimmedSentences2),
+          );
+        DEBUG &&
+          console.log(
+            "---End of sentence tokenization--------------------------",
+          );
+        DEBUG &&
+          console.log(
+            "---Replacement map---------------------------------------",
+          );
+        DEBUG && console.log([...this.replacementMap.entries()]);
+        DEBUG &&
+          console.log(
+            "---Delimiter map-----------------------------------------",
+          );
+        DEBUG && console.log([...this.delimiterMap.entries()]);
+        DEBUG &&
+          console.log(
+            "---------------------------------------------------------",
+          );
+        return trimmedSentences2;
+      }
+    };
+    module.exports = SentenceTokenizer;
+  },
+});
+export default require_sentence_tokenizer();
diff --git a/packages/core/src/node-parser/utils.ts b/packages/core/src/node-parser/utils.ts
index 74351b6e7c1ed095a5d46dce74a8bd2b1842f40a..5f31b23162c04a22ddf0b7ba1fe304081103f76b 100644
--- a/packages/core/src/node-parser/utils.ts
+++ b/packages/core/src/node-parser/utils.ts
@@ -1,5 +1,5 @@
 import type { TextSplitter } from "./base";
-import SentenceTokenizerNew from "./sentence-tokenizer-parser.js";
+import SentenceTokenizer from "./sentence_tokenizer";
 
 export type TextSplitterFn = (text: string) => string[];
 
@@ -31,11 +31,17 @@ export const splitByChar = (): TextSplitterFn => {
   return (text: string) => text.split("");
 };
 
-let sentenceTokenizer: SentenceTokenizerNew | null = null;
+let sentenceTokenizer: SentenceTokenizer | null = null;
 
 export const splitBySentenceTokenizer = (): TextSplitterFn => {
   if (!sentenceTokenizer) {
-    sentenceTokenizer = new SentenceTokenizerNew();
+    sentenceTokenizer = new SentenceTokenizer([
+      "i.e.",
+      "etc.",
+      "vs.",
+      "Inc.",
+      "A.S.A.P.",
+    ]);
   }
   const tokenizer = sentenceTokenizer;
   return (text: string) => {
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index d89192a65b98aace9f332e19d190323fd937f50c..7ae92bdcce925f9596ac5d5761bda583daed719f 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -163,7 +163,7 @@ importers:
         version: link:../packages/llamaindex
       mongodb:
         specifier: ^6.7.0
-        version: 6.8.0(@aws-sdk/credential-providers@3.613.0)
+        version: 6.8.0(@aws-sdk/credential-providers@3.613.0(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0)))
       pathe:
         specifier: ^1.1.2
         version: 1.1.2
@@ -382,8 +382,8 @@ importers:
         specifier: 5.3.1
         version: 5.3.1(typescript@5.5.3)
       natural:
-        specifier: ^7.1.0
-        version: 7.1.0(@aws-sdk/credential-providers@3.613.0)
+        specifier: ^8.0.1
+        version: 8.0.1(@aws-sdk/credential-providers@3.613.0)
 
   packages/core/tests:
     devDependencies:
@@ -568,7 +568,7 @@ importers:
         version: 2.0.0
       mongodb:
         specifier: ^6.7.0
-        version: 6.8.0(@aws-sdk/credential-providers@3.613.0)
+        version: 6.8.0(@aws-sdk/credential-providers@3.613.0(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0)))
       notion-md-crawler:
         specifier: ^1.0.0
         version: 1.0.0(encoding@0.1.13)
@@ -8099,8 +8099,8 @@ packages:
   natural-compare@1.4.0:
     resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==}
 
-  natural@7.1.0:
-    resolution: {integrity: sha512-GBhiRgF0VUX+zPWahBVir1ajARQDZF1Fe6UpQORNzyQT57JQ2KLKYvubecvjIYh/uDaociusmySeRh+WL5OdxQ==}
+  natural@8.0.1:
+    resolution: {integrity: sha512-VVw8O5KrfvwqAFeNZEgBbdgA+AQaBlHcXEootWU7TWDaFWFI0VLfzyKMsRjnfdS3cVCpWmI04xLJonCvEv11VQ==}
     engines: {node: '>=0.4.10'}
 
   negotiator@0.6.3:
@@ -12003,7 +12003,7 @@ snapshots:
       '@babel/core': 7.24.7
       '@babel/helper-compilation-targets': 7.24.7
       '@babel/helper-plugin-utils': 7.24.7
-      debug: 4.3.5
+      debug: 4.3.6
       lodash.debounce: 4.0.8
       resolve: 1.22.8
     transitivePeerDependencies:
@@ -15604,7 +15604,7 @@ snapshots:
     dependencies:
       '@typescript-eslint/types': 5.62.0
       '@typescript-eslint/visitor-keys': 5.62.0
-      debug: 4.3.5
+      debug: 4.3.6
       globby: 11.1.0
       is-glob: 4.0.3
       semver: 7.6.2
@@ -15942,7 +15942,7 @@ snapshots:
 
   agent-base@6.0.2:
     dependencies:
-      debug: 4.3.5
+      debug: 4.3.6
     transitivePeerDependencies:
       - supports-color
     optional: true
@@ -17742,6 +17742,16 @@ snapshots:
       - eslint-import-resolver-webpack
       - supports-color
 
+  eslint-module-utils@2.8.1(@typescript-eslint/parser@7.16.0(eslint@8.57.0)(typescript@5.5.3))(eslint-import-resolver-node@0.3.9)(eslint@8.57.0):
+    dependencies:
+      debug: 3.2.7
+    optionalDependencies:
+      '@typescript-eslint/parser': 7.16.0(eslint@8.57.0)(typescript@5.5.3)
+      eslint: 8.57.0
+      eslint-import-resolver-node: 0.3.9
+    transitivePeerDependencies:
+      - supports-color
+
   eslint-module-utils@2.8.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.5.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.5.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1)(eslint@8.57.0))(eslint@8.57.0):
     dependencies:
       debug: 3.2.7
@@ -17763,7 +17773,7 @@ snapshots:
       doctrine: 2.1.0
       eslint: 8.57.0
       eslint-import-resolver-node: 0.3.9
-      eslint-module-utils: 2.8.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.5.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.1(@typescript-eslint/parser@7.2.0(eslint@8.57.0)(typescript@5.5.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.29.1)(eslint@8.57.0))(eslint@8.57.0)
+      eslint-module-utils: 2.8.1(@typescript-eslint/parser@7.16.0(eslint@8.57.0)(typescript@5.5.3))(eslint-import-resolver-node@0.3.9)(eslint@8.57.0)
       hasown: 2.0.2
       is-core-module: 2.14.0
       is-glob: 4.0.3
@@ -20463,7 +20473,7 @@ snapshots:
     optionalDependencies:
       '@aws-sdk/credential-providers': 3.613.0(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0))
 
-  mongodb@6.8.0(@aws-sdk/credential-providers@3.613.0):
+  mongodb@6.8.0(@aws-sdk/credential-providers@3.613.0(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0))):
     dependencies:
       '@mongodb-js/saslprep': 1.1.7
       bson: 6.8.0
@@ -20494,7 +20504,7 @@ snapshots:
 
   mquery@5.0.0:
     dependencies:
-      debug: 4.3.5
+      debug: 4.3.6
     transitivePeerDependencies:
       - supports-color
 
@@ -20532,7 +20542,7 @@ snapshots:
 
   natural-compare@1.4.0: {}
 
-  natural@7.1.0(@aws-sdk/credential-providers@3.613.0):
+  natural@8.0.1(@aws-sdk/credential-providers@3.613.0):
     dependencies:
       afinn-165: 1.0.4
       afinn-165-financialmarketnews: 3.0.0
@@ -22645,7 +22655,7 @@ snapshots:
 
   spdy-transport@3.0.0:
     dependencies:
-      debug: 4.3.5
+      debug: 4.3.6
       detect-node: 2.1.0
       hpack.js: 2.1.6
       obuf: 1.1.2