diff --git a/cloud-deployments/aws/cloudformation/aws_https_instructions.md b/cloud-deployments/aws/cloudformation/aws_https_instructions.md
index 5eb3cc753263fac6cee5d9eca9c3e72a4623211e..39591820bc7d5f3efa138e3a94258b6762365dac 100644
--- a/cloud-deployments/aws/cloudformation/aws_https_instructions.md
+++ b/cloud-deployments/aws/cloudformation/aws_https_instructions.md
@@ -64,8 +64,14 @@ server {
    listen 80;
    server_name [insert FQDN here];
    location / {
+      # Prevent timeouts on long-running requests.
+      proxy_connect_timeout       605;
+      proxy_send_timeout          605;
+      proxy_read_timeout          605;
+      send_timeout                605;
+      keepalive_timeout           605;
       proxy_pass  http://0.0.0.0:3001;
-      }
+    }
 }
 3. Enter ':wq' to save the changes to the anything config file
 
diff --git a/server/.gitignore b/server/.gitignore
index be4af591de699562d8ea3b21aba2ef9b2c55591f..0913f9663605e7380fa7f205b83fd18681af64eb 100644
--- a/server/.gitignore
+++ b/server/.gitignore
@@ -3,6 +3,7 @@
 storage/assets/*
 !storage/assets/anything-llm.png
 storage/documents/*
+storage/tmp/*
 storage/vector-cache/*.json
 storage/exports
 storage/imports
diff --git a/server/package.json b/server/package.json
index 69cb790c38d062cd26521d37556fab0091949c0b..9761125a4aa0b95412fa044138e1547f5ecd0372 100644
--- a/server/package.json
+++ b/server/package.json
@@ -27,7 +27,7 @@
     "@pinecone-database/pinecone": "^0.1.6",
     "@prisma/client": "5.3.0",
     "@qdrant/js-client-rest": "^1.4.0",
-    "@xenova/transformers": "^2.10.0",
+    "@xenova/transformers": "^2.14.0",
     "@zilliz/milvus2-sdk-node": "^2.3.5",
     "archiver": "^5.3.1",
     "bcrypt": "^5.1.0",
@@ -78,4 +78,4 @@
     "nodemon": "^2.0.22",
     "prettier": "^3.0.3"
   }
-}
+}
\ No newline at end of file
diff --git a/server/utils/EmbeddingEngines/native/index.js b/server/utils/EmbeddingEngines/native/index.js
index d2acde32aeab5df1b13515c962dbb58623321946..789e51fe9e8280997ed855a679d9bfac1fd9faf8 100644
--- a/server/utils/EmbeddingEngines/native/index.js
+++ b/server/utils/EmbeddingEngines/native/index.js
@@ -1,6 +1,7 @@
 const path = require("path");
 const fs = require("fs");
 const { toChunks } = require("../../helpers");
+const { v4 } = require("uuid");
 
 class NativeEmbedder {
   constructor() {
@@ -15,13 +16,30 @@ class NativeEmbedder {
     this.dimensions = 384;
 
     // Limit of how many strings we can process in a single pass to stay with resource or network limits
-    this.maxConcurrentChunks = 50;
+    this.maxConcurrentChunks = 25;
     this.embeddingMaxChunkLength = 1_000;
 
     // Make directory when it does not exist in existing installations
     if (!fs.existsSync(this.cacheDir)) fs.mkdirSync(this.cacheDir);
   }
 
+  #tempfilePath() {
+    const filename = `${v4()}.tmp`;
+    const tmpPath = process.env.STORAGE_DIR
+      ? path.resolve(process.env.STORAGE_DIR, "tmp")
+      : path.resolve(__dirname, `../../../storage/tmp`);
+    if (!fs.existsSync(tmpPath)) fs.mkdirSync(tmpPath, { recursive: true });
+    return path.resolve(tmpPath, filename);
+  }
+
+  async #writeToTempfile(filePath, data) {
+    try {
+      await fs.promises.appendFile(filePath, data, { encoding: "utf8" });
+    } catch (e) {
+      console.error(`Error writing to tempfile: ${e}`);
+    }
+  }
+
   async embedderClient() {
     if (!fs.existsSync(this.modelPath)) {
       console.log(
@@ -62,18 +80,51 @@ class NativeEmbedder {
     return result?.[0] || [];
   }
 
+  // If you are thinking you want to edit this function - you probably don't.
+  // This process was benchmarked heavily on a t3.small (2GB RAM 1vCPU)
+  // and without careful memory management for the V8 garbage collector
+  // this function will likely result in an OOM on any resource-constrained deployment.
+  // To help manage very large documents we run a concurrent write-log each iteration
+  // to keep the embedding result out of memory. The `maxConcurrentChunk` is set to 25,
+  // as 50 seems to overflow no matter what. Given the above, memory use hovers around ~30%
+  // during a very large document (>100K words) but can spike up to 70% before gc.
+  // This seems repeatable for all document sizes.
+  // While this does take a while, it is zero set up and is 100% free and on-instance.
   async embedChunks(textChunks = []) {
-    const Embedder = await this.embedderClient();
-    const embeddingResults = [];
-    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
-      const output = await Embedder(chunk, {
+    const tmpFilePath = this.#tempfilePath();
+    const chunks = toChunks(textChunks, this.maxConcurrentChunks);
+    const chunkLen = chunks.length;
+
+    for (let [idx, chunk] of chunks.entries()) {
+      if (idx === 0) await this.#writeToTempfile(tmpFilePath, "[");
+      let data;
+      let pipeline = await this.embedderClient();
+      let output = await pipeline(chunk, {
         pooling: "mean",
         normalize: true,
       });
-      if (output.length === 0) continue;
-      embeddingResults.push(output.tolist());
+
+      if (output.length === 0) {
+        pipeline = null;
+        output = null;
+        data = null;
+        continue;
+      }
+
+      data = JSON.stringify(output.tolist());
+      await this.#writeToTempfile(tmpFilePath, data);
+      console.log(`\x1b[34m[Embedded Chunk ${idx + 1} of ${chunkLen}]\x1b[0m`);
+      if (chunkLen - 1 !== idx) await this.#writeToTempfile(tmpFilePath, ",");
+      if (chunkLen - 1 === idx) await this.#writeToTempfile(tmpFilePath, "]");
+      pipeline = null;
+      output = null;
+      data = null;
     }
 
+    const embeddingResults = JSON.parse(
+      fs.readFileSync(tmpFilePath, { encoding: "utf-8" })
+    );
+    fs.rmSync(tmpFilePath, { force: true });
     return embeddingResults.length > 0 ? embeddingResults.flat() : null;
   }
 }
diff --git a/server/yarn.lock b/server/yarn.lock
index 175a67947d124ff52bca8950e61bb8ba24877f03..cc129dfe9daa3d6670128737bd916400f459e472 100644
--- a/server/yarn.lock
+++ b/server/yarn.lock
@@ -269,6 +269,11 @@
   dependencies:
     "@hapi/hoek" "^9.0.0"
 
+"@huggingface/jinja@^0.1.0":
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/@huggingface/jinja/-/jinja-0.1.2.tgz#073fa0a68ef481a1806b0186bbafd8013e586fbe"
+  integrity sha512-x5mpbfJt1nKmVep5WNP5VjNsjWApWNj8pPYI+uYMkBWH9bWUJmQmHt2lbf0VCoQd54Oq3XuFEh/UyoVh7rPxmg==
+
 "@humanwhocodes/config-array@^0.11.13":
   version "0.11.13"
   resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.11.13.tgz#075dc9684f40a531d9b26b0822153c1e832ee297"
@@ -851,11 +856,12 @@
   resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406"
   integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==
 
-"@xenova/transformers@^2.10.0":
-  version "2.10.0"
-  resolved "https://registry.yarnpkg.com/@xenova/transformers/-/transformers-2.10.0.tgz#ae97d724a3addf78de7314336a9f7b28ed96a140"
-  integrity sha512-Al9WKiOsimAC3mU9Ef434GkHF0izmeAM7mMMx5npdWsWLAYL8fmJXCrULj6uCfjomMQ7jyN9rDtKpp570hffiw==
+"@xenova/transformers@^2.14.0":
+  version "2.14.0"
+  resolved "https://registry.yarnpkg.com/@xenova/transformers/-/transformers-2.14.0.tgz#6fe128957e64377ca4fca910e77f6092f3f3512a"
+  integrity sha512-rQ3O7SW5EM64b6XFZGx3XQ2cfiroefxUwU9ShfSpEZyhd082GvwNJJKndxgaukse1hZP1JUDoT0DfjDiq4IZiw==
   dependencies:
+    "@huggingface/jinja" "^0.1.0"
     onnxruntime-web "1.14.0"
     sharp "^0.32.0"
   optionalDependencies: