diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx
index dbd61623db17eaa92e60dcf6bb79426b6b8f5342..0a5ed65fc85b5345a7429dbd025523a9f09c5d6b 100644
--- a/frontend/src/App.jsx
+++ b/frontend/src/App.jsx
@@ -35,6 +35,9 @@ const GeneralTranscriptionPreference = lazy(
 const GeneralEmbeddingPreference = lazy(
   () => import("@/pages/GeneralSettings/EmbeddingPreference")
 );
+const EmbeddingTextSplitterPreference = lazy(
+  () => import("@/pages/GeneralSettings/EmbeddingTextSplitterPreference")
+);
 const GeneralVectorDatabase = lazy(
   () => import("@/pages/GeneralSettings/VectorDatabase")
 );
@@ -86,6 +89,12 @@ export default function App() {
                 path="/settings/embedding-preference"
                 element={<AdminRoute Component={GeneralEmbeddingPreference} />}
               />
+              <Route
+                path="/settings/text-splitter-preference"
+                element={
+                  <AdminRoute Component={EmbeddingTextSplitterPreference} />
+                }
+              />
               <Route
                 path="/settings/vector-database"
                 element={<AdminRoute Component={GeneralVectorDatabase} />}
diff --git a/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx b/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx
index c94a99d7871f9e61ee4f8efa686df4b6140f6217..9a1c59bc73f1c5ff4880a1de1a9779d4cdc89095 100644
--- a/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx
@@ -21,7 +21,7 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
             </p>
           </div>
           <a
-            href={paths.settings.embeddingPreference()}
+            href={paths.settings.embedder.modelPreference()}
             className="text-sm md:text-base my-2 underline"
           >
             Manage embedding &rarr;
diff --git a/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx b/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx
index 36b2f2588ecc41a861bee1f0c58163007445b6d4..1304c9e1ba573ec8f7cd4d4a21435010d404f9fd 100644
--- a/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx
@@ -21,7 +21,7 @@ export default function LocalAiOptions({ settings, showAlert = false }) {
             </p>
           </div>
           <a
-            href={paths.settings.embeddingPreference()}
+            href={paths.settings.embedder.modelPreference()}
             className="text-sm md:text-base my-2 underline"
           >
             Manage embedding &rarr;
diff --git a/frontend/src/components/SettingsSidebar/index.jsx b/frontend/src/components/SettingsSidebar/index.jsx
index 40450d4e19a47b195f184211ab7b9a82563d6c37..67797d266190570339b0a6e64f14213828085eb5 100644
--- a/frontend/src/components/SettingsSidebar/index.jsx
+++ b/frontend/src/components/SettingsSidebar/index.jsx
@@ -20,6 +20,7 @@ import {
   Barcode,
   ClosedCaptioning,
   EyeSlash,
+  SplitVertical,
 } from "@phosphor-icons/react";
 import useUser from "@/hooks/useUser";
 import { USER_BACKGROUND_COLOR } from "@/utils/constants";
@@ -288,12 +289,25 @@ const SidebarOptions = ({ user = null }) => (
       allowedRole={["admin"]}
     />
     <Option
-      href={paths.settings.embeddingPreference()}
-      btnText="Embedding Model"
+      href={paths.settings.embedder.modelPreference()}
+      childLinks={[paths.settings.embedder.chunkingPreference()]}
+      btnText="Embedder Preferences"
       icon={<FileCode className="h-5 w-5 flex-shrink-0" />}
       user={user}
       flex={true}
       allowedRole={["admin"]}
+      subOptions={
+        <>
+          <Option
+            href={paths.settings.embedder.chunkingPreference()}
+            btnText="Text Splitter & Chunking"
+            icon={<SplitVertical className="h-5 w-5 flex-shrink-0" />}
+            user={user}
+            flex={true}
+            allowedRole={["admin"]}
+          />
+        </>
+      }
     />
     <Option
       href={paths.settings.vectorDatabase()}
diff --git a/frontend/src/pages/GeneralSettings/EmbeddingTextSplitterPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingTextSplitterPreference/index.jsx
new file mode 100644
index 0000000000000000000000000000000000000000..5ee1197f117ce3cc70ca2ec3a2083c19a56f9247
--- /dev/null
+++ b/frontend/src/pages/GeneralSettings/EmbeddingTextSplitterPreference/index.jsx
@@ -0,0 +1,180 @@
+import React, { useEffect, useState } from "react";
+import Sidebar from "@/components/SettingsSidebar";
+import { isMobile } from "react-device-detect";
+import PreLoader from "@/components/Preloader";
+import CTAButton from "@/components/lib/CTAButton";
+import Admin from "@/models/admin";
+import showToast from "@/utils/toast";
+import { nFormatter, numberWithCommas } from "@/utils/numbers";
+
+function isNullOrNaN(value) {
+  if (value === null) return true;
+  return isNaN(value);
+}
+
+export default function EmbeddingTextSplitterPreference() {
+  const [settings, setSettings] = useState({});
+  const [loading, setLoading] = useState(true);
+  const [saving, setSaving] = useState(false);
+  const [hasChanges, setHasChanges] = useState(false);
+
+  const handleSubmit = async (e) => {
+    e.preventDefault();
+    const form = new FormData(e.target);
+
+    if (
+      Number(form.get("text_splitter_chunk_overlap")) >=
+      Number(form.get("text_splitter_chunk_size"))
+    ) {
+      showToast(
+        "Chunk overlap cannot be larger or equal to chunk size.",
+        "error"
+      );
+      return;
+    }
+
+    setSaving(true);
+    await Admin.updateSystemPreferences({
+      text_splitter_chunk_size: isNullOrNaN(
+        form.get("text_splitter_chunk_size")
+      )
+        ? 1000
+        : Number(form.get("text_splitter_chunk_size")),
+      text_splitter_chunk_overlap: isNullOrNaN(
+        form.get("text_splitter_chunk_overlap")
+      )
+        ? 1000
+        : Number(form.get("text_splitter_chunk_overlap")),
+    });
+    setSaving(false);
+    setHasChanges(false);
+    showToast("Text chunking strategy settings saved.", "success");
+  };
+
+  useEffect(() => {
+    async function fetchSettings() {
+      const _settings = (await Admin.systemPreferences())?.settings;
+      setSettings(_settings ?? {});
+      setLoading(false);
+    }
+    fetchSettings();
+  }, []);
+
+  return (
+    <div className="w-screen h-screen overflow-hidden bg-sidebar flex">
+      <Sidebar />
+      {loading ? (
+        <div
+          style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
+          className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
+        >
+          <div className="w-full h-full flex justify-center items-center">
+            <PreLoader />
+          </div>
+        </div>
+      ) : (
+        <div
+          style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
+          className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
+        >
+          <form
+            onSubmit={handleSubmit}
+            onChange={() => setHasChanges(true)}
+            className="flex w-full"
+          >
+            <div className="flex flex-col w-full px-1 md:pl-6 md:pr-[50px] md:py-6 py-16">
+              <div className="w-full flex flex-col gap-y-1 pb-4 border-white border-b-2 border-opacity-10">
+                <div className="flex gap-x-4 items-center">
+                  <p className="text-lg leading-6 font-bold text-white">
+                    Text splitting & Chunking Preferences
+                  </p>
+                </div>
+                <p className="text-xs leading-[18px] font-base text-white text-opacity-60">
+                  Sometimes, you may want to change the default way that new
+                  documents are split and chunked before being inserted into
+                  your vector database. <br />
+                  You should only modify this setting if you understand how text
+                  splitting works and it's side effects.
+                </p>
+                <p className="text-xs leading-[18px] font-semibold text-white/80">
+                  Changes here will only apply to{" "}
+                  <i>newly embedded documents</i>, not existing documents.
+                </p>
+              </div>
+              <div className="w-full justify-end flex">
+                {hasChanges && (
+                  <CTAButton className="mt-3 mr-0 -mb-14 z-10">
+                    {saving ? "Saving..." : "Save changes"}
+                  </CTAButton>
+                )}
+              </div>
+
+              <div className="flex flex-col gap-y-4 mt-8">
+                <div className="flex flex-col max-w-[300px]">
+                  <div className="flex flex-col gap-y-2 mb-4">
+                    <label className="text-white text-sm font-semibold block">
+                      Text Chunk Size
+                    </label>
+                    <p className="text-xs text-white/60">
+                      This is the maximum length of characters that can be
+                      present in a single vector.
+                    </p>
+                  </div>
+                  <input
+                    type="number"
+                    name="text_splitter_chunk_size"
+                    min={1}
+                    max={settings?.max_embed_chunk_size || 1000}
+                    onWheel={(e) => e?.currentTarget?.blur()}
+                    className="border-none bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
+                    placeholder="maximum length of vectorized text"
+                    defaultValue={
+                      isNullOrNaN(settings?.text_splitter_chunk_size)
+                        ? 1000
+                        : Number(settings?.text_splitter_chunk_size)
+                    }
+                    required={true}
+                    autoComplete="off"
+                  />
+                  <p className="text-xs text-white/40">
+                    Embed model maximum length is{" "}
+                    {numberWithCommas(settings?.max_embed_chunk_size || 1000)}.
+                  </p>
+                </div>
+              </div>
+
+              <div className="flex flex-col gap-y-4 mt-8">
+                <div className="flex flex-col max-w-[300px]">
+                  <div className="flex flex-col gap-y-2 mb-4">
+                    <label className="text-white text-sm font-semibold block">
+                      Text Chunk Overlap
+                    </label>
+                    <p className="text-xs text-white/60">
+                      This is the maximum overlap of characters that occurs
+                      during chunking between two adjacent text chunks.
+                    </p>
+                  </div>
+                  <input
+                    type="number"
+                    name="text_splitter_chunk_overlap"
+                    min={0}
+                    onWheel={(e) => e?.currentTarget?.blur()}
+                    className="border-none bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
+                    placeholder="maximum length of vectorized text"
+                    defaultValue={
+                      isNullOrNaN(settings?.text_splitter_chunk_overlap)
+                        ? 20
+                        : Number(settings?.text_splitter_chunk_overlap)
+                    }
+                    required={true}
+                    autoComplete="off"
+                  />
+                </div>
+              </div>
+            </div>
+          </form>
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/utils/paths.js b/frontend/src/utils/paths.js
index af0331a9200c07298b9591e1a4794b262a072fe0..ffbf04c0c73bfec4b4d151bfe565aa8fa138ff39 100644
--- a/frontend/src/utils/paths.js
+++ b/frontend/src/utils/paths.js
@@ -98,6 +98,10 @@ export default {
     transcriptionPreference: () => {
       return "/settings/transcription-preference";
     },
+    embedder: {
+      modelPreference: () => "/settings/embedding-preference",
+      chunkingPreference: () => "/settings/text-splitter-preference",
+    },
     embeddingPreference: () => {
       return "/settings/embedding-preference";
     },
diff --git a/server/endpoints/admin.js b/server/endpoints/admin.js
index 34bd66c3fecfa51232f85c991b91ce33e5a81888..4bf816a04731d30d8e711f883e9290e258593669 100644
--- a/server/endpoints/admin.js
+++ b/server/endpoints/admin.js
@@ -8,7 +8,10 @@ const { User } = require("../models/user");
 const { DocumentVectors } = require("../models/vectors");
 const { Workspace } = require("../models/workspace");
 const { WorkspaceChats } = require("../models/workspaceChats");
-const { getVectorDbClass } = require("../utils/helpers");
+const {
+  getVectorDbClass,
+  getEmbeddingEngineSelection,
+} = require("../utils/helpers");
 const {
   validRoleSelection,
   canModifyAdmin,
@@ -311,6 +314,7 @@ function adminEndpoints(app) {
     }
   );
 
+  // TODO: Allow specification of which props to get instead of returning all of them all the time.
   app.get(
     "/admin/system-preferences",
     [validatedRequest, flexUserRoleValid([ROLES.admin, ROLES.manager])],
@@ -333,6 +337,16 @@ function adminEndpoints(app) {
           support_email:
             (await SystemSettings.get({ label: "support_email" }))?.value ||
             null,
+          text_splitter_chunk_size:
+            (await SystemSettings.get({ label: "text_splitter_chunk_size" }))
+              ?.value ||
+            getEmbeddingEngineSelection()?.embeddingMaxChunkLength ||
+            null,
+          text_splitter_chunk_overlap:
+            (await SystemSettings.get({ label: "text_splitter_chunk_overlap" }))
+              ?.value || null,
+          max_embed_chunk_size:
+            getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1000,
         };
         response.status(200).json({ settings });
       } catch (e) {
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index 080a01f08727982edc698badfff06f01d0650a23..604e43073ffbce1d17d4964b6da5bf17ad620011 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -5,6 +5,11 @@ process.env.NODE_ENV === "development"
 const { isValidUrl } = require("../utils/http");
 const prisma = require("../utils/prisma");
 
+function isNullOrNaN(value) {
+  if (value === null) return true;
+  return isNaN(value);
+}
+
 const SystemSettings = {
   protectedFields: ["multi_user_mode"],
   supportedFields: [
@@ -15,6 +20,8 @@ const SystemSettings = {
     "telemetry_id",
     "footer_data",
     "support_email",
+    "text_splitter_chunk_size",
+    "text_splitter_chunk_overlap",
   ],
   validations: {
     footer_data: (updates) => {
@@ -28,6 +35,32 @@ const SystemSettings = {
         return JSON.stringify([]);
       }
     },
+    text_splitter_chunk_size: (update) => {
+      try {
+        if (isNullOrNaN(update)) throw new Error("Value is not a number.");
+        if (Number(update) <= 0) throw new Error("Value must be non-zero.");
+        return Number(update);
+      } catch (e) {
+        console.error(
+          `Failed to run validation function on text_splitter_chunk_size`,
+          e.message
+        );
+        return 1000;
+      }
+    },
+    text_splitter_chunk_overlap: (update) => {
+      try {
+        if (isNullOrNaN(update)) throw new Error("Value is not a number");
+        if (Number(update) < 0) throw new Error("Value cannot be less than 0.");
+        return Number(update);
+      } catch (e) {
+        console.error(
+          `Failed to run validation function on text_splitter_chunk_overlap`,
+          e.message
+        );
+        return 20;
+      }
+    },
   },
   currentSettings: async function () {
     const llmProvider = process.env.LLM_PROVIDER;
@@ -84,6 +117,15 @@ const SystemSettings = {
     }
   },
 
+  getValueOrFallback: async function (clause = {}, fallback = null) {
+    try {
+      return (await this.get(clause))?.value ?? fallback;
+    } catch (error) {
+      console.error(error.message);
+      return fallback;
+    }
+  },
+
   where: async function (clause = {}, limit) {
     try {
       const settings = await prisma.system_settings.findMany({
diff --git a/server/utils/EmbeddingEngines/azureOpenAi/index.js b/server/utils/EmbeddingEngines/azureOpenAi/index.js
index 4193e860d1e05ee601c6361c827162a2cfc4503f..1f9362c95d97222c231b107fd8e186af7994b018 100644
--- a/server/utils/EmbeddingEngines/azureOpenAi/index.js
+++ b/server/utils/EmbeddingEngines/azureOpenAi/index.js
@@ -17,7 +17,9 @@ class AzureOpenAiEmbedder {
     // Limit of how many strings we can process in a single pass to stay with resource or network limits
     // https://learn.microsoft.com/en-us/azure/ai-services/openai/faq#i-am-trying-to-use-embeddings-and-received-the-error--invalidrequesterror--too-many-inputs--the-max-number-of-inputs-is-1---how-do-i-fix-this-:~:text=consisting%20of%20up%20to%2016%20inputs%20per%20API%20request
     this.maxConcurrentChunks = 16;
-    this.embeddingMaxChunkLength = 1_000;
+
+    // https://learn.microsoft.com/en-us/answers/questions/1188074/text-embedding-ada-002-token-context-length
+    this.embeddingMaxChunkLength = 2048;
   }
 
   async embedTextInput(textInput) {
diff --git a/server/utils/EmbeddingEngines/openAi/index.js b/server/utils/EmbeddingEngines/openAi/index.js
index b52e78c6f433654bab8857fe056c6ea4deb75210..49841343a61058e7747062ebda55290fa1e74561 100644
--- a/server/utils/EmbeddingEngines/openAi/index.js
+++ b/server/utils/EmbeddingEngines/openAi/index.js
@@ -13,7 +13,9 @@ class OpenAiEmbedder {
 
     // Limit of how many strings we can process in a single pass to stay with resource or network limits
     this.maxConcurrentChunks = 500;
-    this.embeddingMaxChunkLength = 1_000;
+
+    // https://platform.openai.com/docs/guides/embeddings/embedding-models
+    this.embeddingMaxChunkLength = 8_191;
   }
 
   async embedTextInput(textInput) {
diff --git a/server/utils/TextSplitter/index.js b/server/utils/TextSplitter/index.js
new file mode 100644
index 0000000000000000000000000000000000000000..d7829827ca0ec1c2936e55ce35be95a8eceb22cc
--- /dev/null
+++ b/server/utils/TextSplitter/index.js
@@ -0,0 +1,84 @@
+function isNullOrNaN(value) {
+  if (value === null) return true;
+  return isNaN(value);
+}
+
+class TextSplitter {
+  #splitter;
+  constructor(config = {}) {
+    /*
+      config can be a ton of things depending on what is required or optional by the specific splitter.
+      Non-splitter related keys
+      {
+        splitByFilename: string, // TODO
+      }
+      ------
+      Default: "RecursiveCharacterTextSplitter"
+      Config: {
+        chunkSize: number,
+        chunkOverlap: number,
+      }
+      ------
+    */
+    this.config = config;
+    this.#splitter = this.#setSplitter(config);
+  }
+
+  log(text, ...args) {
+    console.log(`\x1b[35m[TextSplitter]\x1b[0m ${text}`, ...args);
+  }
+
+  // Does a quick check to determine the text chunk length limit.
+  // Embedder models have hard-set limits that cannot be exceeded, just like an LLM context
+  // so here we want to allow override of the default 1000, but up to the models maximum, which is
+  // sometimes user defined.
+  static determineMaxChunkSize(preferred = null, embedderLimit = 1000) {
+    const prefValue = isNullOrNaN(preferred)
+      ? Number(embedderLimit)
+      : Number(preferred);
+    const limit = Number(embedderLimit);
+    if (prefValue > limit)
+      console.log(
+        `\x1b[43m[WARN]\x1b[0m Text splitter chunk length of ${prefValue} exceeds embedder model max of ${embedderLimit}. Will use ${embedderLimit}.`
+      );
+    return prefValue > limit ? limit : prefValue;
+  }
+
+  #setSplitter(config = {}) {
+    // if (!config?.splitByFilename) {// TODO do something when specific extension is present? }
+    return new RecursiveSplitter({
+      chunkSize: isNaN(config?.chunkSize) ? 1_000 : Number(config?.chunkSize),
+      chunkOverlap: isNaN(config?.chunkOverlap)
+        ? 20
+        : Number(config?.chunkOverlap),
+    });
+  }
+
+  async splitText(documentText) {
+    return this.#splitter._splitText(documentText);
+  }
+}
+
+// Wrapper for Langchain default RecursiveCharacterTextSplitter class.
+class RecursiveSplitter {
+  constructor({ chunkSize, chunkOverlap }) {
+    const {
+      RecursiveCharacterTextSplitter,
+    } = require("langchain/text_splitter");
+    this.log(`Will split with`, { chunkSize, chunkOverlap });
+    this.engine = new RecursiveCharacterTextSplitter({
+      chunkSize,
+      chunkOverlap,
+    });
+  }
+
+  log(text, ...args) {
+    console.log(`\x1b[35m[RecursiveSplitter]\x1b[0m ${text}`, ...args);
+  }
+
+  async _splitText(documentText) {
+    return this.engine.splitText(documentText);
+  }
+}
+
+module.exports.TextSplitter = TextSplitter;
diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js
index df983d4f488a393375fc3f8c88cb07af17f5e224..b6f8981bb19b674e5fad746538643467e5f42ef5 100644
--- a/server/utils/vectorDbProviders/astra/index.js
+++ b/server/utils/vectorDbProviders/astra/index.js
@@ -1,5 +1,5 @@
 const { AstraDB: AstraClient } = require("@datastax/astra-db-ts");
-const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { TextSplitter } = require("../../TextSplitter");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const {
@@ -147,10 +147,17 @@ const AstraDB = {
         return { vectorized: true, error: null };
       }
 
-      const textSplitter = new RecursiveCharacterTextSplitter({
-        chunkSize:
-          getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
-        chunkOverlap: 20,
+      const textSplitter = new TextSplitter({
+        chunkSize: TextSplitter.determineMaxChunkSize(
+          await SystemSettings.getValueOrFallback({
+            label: "text_splitter_chunk_size",
+          }),
+          getEmbeddingEngineSelection()?.embeddingMaxChunkLength
+        ),
+        chunkOverlap: await SystemSettings.getValueOrFallback(
+          { label: "text_splitter_chunk_overlap" },
+          20
+        ),
       });
       const textChunks = await textSplitter.splitText(pageContent);
 
diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js
index 9e3caa7adca018dde5b498c94925aef2f299be5c..1b9cbb53a4b3b150041e936e0040d58ca35e3634 100644
--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@@ -1,5 +1,5 @@
 const { ChromaClient } = require("chromadb");
-const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { TextSplitter } = require("../../TextSplitter");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const {
@@ -180,10 +180,17 @@ const Chroma = {
       // We have to do this manually as opposed to using LangChains `Chroma.fromDocuments`
       // because we then cannot atomically control our namespace to granularly find/remove documents
       // from vectordb.
-      const textSplitter = new RecursiveCharacterTextSplitter({
-        chunkSize:
-          getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
-        chunkOverlap: 20,
+      const textSplitter = new TextSplitter({
+        chunkSize: TextSplitter.determineMaxChunkSize(
+          await SystemSettings.getValueOrFallback({
+            label: "text_splitter_chunk_size",
+          }),
+          getEmbeddingEngineSelection()?.embeddingMaxChunkLength
+        ),
+        chunkOverlap: await SystemSettings.getValueOrFallback(
+          { label: "text_splitter_chunk_overlap" },
+          20
+        ),
       });
       const textChunks = await textSplitter.splitText(pageContent);
 
diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js
index ecf10007f5100763fa9b45dbafb37546023679fd..f2fc8eee10fececee9fca475cfb4bdc1f464bd64 100644
--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@@ -5,9 +5,10 @@ const {
   getEmbeddingEngineSelection,
 } = require("../../helpers");
 const { OpenAIEmbeddings } = require("langchain/embeddings/openai");
-const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { TextSplitter } = require("../../TextSplitter");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
+const { SystemSettings } = require("../../../models/systemSettings");
 
 const LanceDb = {
   uri: `${
@@ -180,10 +181,17 @@ const LanceDb = {
       // We have to do this manually as opposed to using LangChains `xyz.fromDocuments`
       // because we then cannot atomically control our namespace to granularly find/remove documents
       // from vectordb.
-      const textSplitter = new RecursiveCharacterTextSplitter({
-        chunkSize:
-          getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
-        chunkOverlap: 20,
+      const textSplitter = new TextSplitter({
+        chunkSize: TextSplitter.determineMaxChunkSize(
+          await SystemSettings.getValueOrFallback({
+            label: "text_splitter_chunk_size",
+          }),
+          getEmbeddingEngineSelection()?.embeddingMaxChunkLength
+        ),
+        chunkOverlap: await SystemSettings.getValueOrFallback(
+          { label: "text_splitter_chunk_overlap" },
+          20
+        ),
       });
       const textChunks = await textSplitter.splitText(pageContent);
 
diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js
index a304e8714e56eb65295f37a138b28e0b075e1e14..3bd5be6d6498414090e3223c4110e07e98b54368 100644
--- a/server/utils/vectorDbProviders/milvus/index.js
+++ b/server/utils/vectorDbProviders/milvus/index.js
@@ -4,7 +4,7 @@ const {
   IndexType,
   MilvusClient,
 } = require("@zilliz/milvus2-sdk-node");
-const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { TextSplitter } = require("../../TextSplitter");
 const { v4: uuidv4 } = require("uuid");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const {
@@ -182,10 +182,17 @@ const Milvus = {
         return { vectorized: true, error: null };
       }
 
-      const textSplitter = new RecursiveCharacterTextSplitter({
-        chunkSize:
-          getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
-        chunkOverlap: 20,
+      const textSplitter = new TextSplitter({
+        chunkSize: TextSplitter.determineMaxChunkSize(
+          await SystemSettings.getValueOrFallback({
+            label: "text_splitter_chunk_size",
+          }),
+          getEmbeddingEngineSelection()?.embeddingMaxChunkLength
+        ),
+        chunkOverlap: await SystemSettings.getValueOrFallback(
+          { label: "text_splitter_chunk_overlap" },
+          20
+        ),
       });
       const textChunks = await textSplitter.splitText(pageContent);
 
diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js
index b8f288c06198569d2b82349fb5755ac8d974e1ef..efcecddc9d4a726e9e58d270970aa26ebb3d3e66 100644
--- a/server/utils/vectorDbProviders/pinecone/index.js
+++ b/server/utils/vectorDbProviders/pinecone/index.js
@@ -1,5 +1,5 @@
 const { Pinecone } = require("@pinecone-database/pinecone");
-const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { TextSplitter } = require("../../TextSplitter");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const {
@@ -125,10 +125,17 @@ const PineconeDB = {
       // because we then cannot atomically control our namespace to granularly find/remove documents
       // from vectordb.
       // https://github.com/hwchase17/langchainjs/blob/2def486af734c0ca87285a48f1a04c057ab74bdf/langchain/src/vectorstores/pinecone.ts#L167
-      const textSplitter = new RecursiveCharacterTextSplitter({
-        chunkSize:
-          getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
-        chunkOverlap: 20,
+      const textSplitter = new TextSplitter({
+        chunkSize: TextSplitter.determineMaxChunkSize(
+          await SystemSettings.getValueOrFallback({
+            label: "text_splitter_chunk_size",
+          }),
+          getEmbeddingEngineSelection()?.embeddingMaxChunkLength
+        ),
+        chunkOverlap: await SystemSettings.getValueOrFallback(
+          { label: "text_splitter_chunk_overlap" },
+          20
+        ),
       });
       const textChunks = await textSplitter.splitText(pageContent);
 
diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js
index e7e00fe64d3e3e8207d91fcc2df5b8840b376161..aaca51118f506c603cbd2691353708d7754dd863 100644
--- a/server/utils/vectorDbProviders/qdrant/index.js
+++ b/server/utils/vectorDbProviders/qdrant/index.js
@@ -1,5 +1,5 @@
 const { QdrantClient } = require("@qdrant/js-client-rest");
-const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { TextSplitter } = require("../../TextSplitter");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const {
@@ -198,10 +198,17 @@ const QDrant = {
       // We have to do this manually as opposed to using LangChains `Qdrant.fromDocuments`
       // because we then cannot atomically control our namespace to granularly find/remove documents
       // from vectordb.
-      const textSplitter = new RecursiveCharacterTextSplitter({
-        chunkSize:
-          getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
-        chunkOverlap: 20,
+      const textSplitter = new TextSplitter({
+        chunkSize: TextSplitter.determineMaxChunkSize(
+          await SystemSettings.getValueOrFallback({
+            label: "text_splitter_chunk_size",
+          }),
+          getEmbeddingEngineSelection()?.embeddingMaxChunkLength
+        ),
+        chunkOverlap: await SystemSettings.getValueOrFallback(
+          { label: "text_splitter_chunk_overlap" },
+          20
+        ),
       });
       const textChunks = await textSplitter.splitText(pageContent);
 
diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js
index 13668303f9a520b613ec3dcbf23c596c01816fb8..35112327b4f10a3a8c041a1596c72419ece3ffef 100644
--- a/server/utils/vectorDbProviders/weaviate/index.js
+++ b/server/utils/vectorDbProviders/weaviate/index.js
@@ -1,5 +1,5 @@
 const { default: weaviate } = require("weaviate-ts-client");
-const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { TextSplitter } = require("../../TextSplitter");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const {
@@ -241,10 +241,17 @@ const Weaviate = {
       // We have to do this manually as opposed to using LangChains `Chroma.fromDocuments`
       // because we then cannot atomically control our namespace to granularly find/remove documents
       // from vectordb.
-      const textSplitter = new RecursiveCharacterTextSplitter({
-        chunkSize:
-          getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
-        chunkOverlap: 20,
+      const textSplitter = new TextSplitter({
+        chunkSize: TextSplitter.determineMaxChunkSize(
+          await SystemSettings.getValueOrFallback({
+            label: "text_splitter_chunk_size",
+          }),
+          getEmbeddingEngineSelection()?.embeddingMaxChunkLength
+        ),
+        chunkOverlap: await SystemSettings.getValueOrFallback(
+          { label: "text_splitter_chunk_overlap" },
+          20
+        ),
       });
       const textChunks = await textSplitter.splitText(pageContent);
 
diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js
index be0e9e7d40d5d5174e1713c19bc1ee0564cb9369..72e6829d9a539947055205ad21e365786894eeba 100644
--- a/server/utils/vectorDbProviders/zilliz/index.js
+++ b/server/utils/vectorDbProviders/zilliz/index.js
@@ -4,7 +4,7 @@ const {
   IndexType,
   MilvusClient,
 } = require("@zilliz/milvus2-sdk-node");
-const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { TextSplitter } = require("../../TextSplitter");
 const { v4: uuidv4 } = require("uuid");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const {
@@ -183,10 +183,17 @@ const Zilliz = {
         return { vectorized: true, error: null };
       }
 
-      const textSplitter = new RecursiveCharacterTextSplitter({
-        chunkSize:
-          getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
-        chunkOverlap: 20,
+      const textSplitter = new TextSplitter({
+        chunkSize: TextSplitter.determineMaxChunkSize(
+          await SystemSettings.getValueOrFallback({
+            label: "text_splitter_chunk_size",
+          }),
+          getEmbeddingEngineSelection()?.embeddingMaxChunkLength
+        ),
+        chunkOverlap: await SystemSettings.getValueOrFallback(
+          { label: "text_splitter_chunk_overlap" },
+          20
+        ),
       });
       const textChunks = await textSplitter.splitText(pageContent);