diff --git a/cloud-deployments/aws/cloudformation/aws_https_instructions.md b/cloud-deployments/aws/cloudformation/aws_https_instructions.md index 5eb3cc753263fac6cee5d9eca9c3e72a4623211e..39591820bc7d5f3efa138e3a94258b6762365dac 100644 --- a/cloud-deployments/aws/cloudformation/aws_https_instructions.md +++ b/cloud-deployments/aws/cloudformation/aws_https_instructions.md @@ -64,8 +64,14 @@ server { listen 80; server_name [insert FQDN here]; location / { + # Prevent timeouts on long-running requests. + proxy_connect_timeout 605; + proxy_send_timeout 605; + proxy_read_timeout 605; + send_timeout 605; + keepalive_timeout 605; proxy_pass http://0.0.0.0:3001; - } + } } 3. Enter ':wq' to save the changes to the anything config file diff --git a/server/.gitignore b/server/.gitignore index be4af591de699562d8ea3b21aba2ef9b2c55591f..0913f9663605e7380fa7f205b83fd18681af64eb 100644 --- a/server/.gitignore +++ b/server/.gitignore @@ -3,6 +3,7 @@ storage/assets/* !storage/assets/anything-llm.png storage/documents/* +storage/tmp/* storage/vector-cache/*.json storage/exports storage/imports diff --git a/server/package.json b/server/package.json index 69cb790c38d062cd26521d37556fab0091949c0b..9761125a4aa0b95412fa044138e1547f5ecd0372 100644 --- a/server/package.json +++ b/server/package.json @@ -27,7 +27,7 @@ "@pinecone-database/pinecone": "^0.1.6", "@prisma/client": "5.3.0", "@qdrant/js-client-rest": "^1.4.0", - "@xenova/transformers": "^2.10.0", + "@xenova/transformers": "^2.14.0", "@zilliz/milvus2-sdk-node": "^2.3.5", "archiver": "^5.3.1", "bcrypt": "^5.1.0", @@ -78,4 +78,4 @@ "nodemon": "^2.0.22", "prettier": "^3.0.3" } -} +} \ No newline at end of file diff --git a/server/utils/EmbeddingEngines/native/index.js b/server/utils/EmbeddingEngines/native/index.js index d2acde32aeab5df1b13515c962dbb58623321946..789e51fe9e8280997ed855a679d9bfac1fd9faf8 100644 --- a/server/utils/EmbeddingEngines/native/index.js +++ b/server/utils/EmbeddingEngines/native/index.js @@ -1,6 +1,7 @@ const path = require("path"); const fs = require("fs"); const { toChunks } = require("../../helpers"); +const { v4 } = require("uuid"); class NativeEmbedder { constructor() { @@ -15,13 +16,30 @@ class NativeEmbedder { this.dimensions = 384; // Limit of how many strings we can process in a single pass to stay with resource or network limits - this.maxConcurrentChunks = 50; + this.maxConcurrentChunks = 25; this.embeddingMaxChunkLength = 1_000; // Make directory when it does not exist in existing installations if (!fs.existsSync(this.cacheDir)) fs.mkdirSync(this.cacheDir); } + #tempfilePath() { + const filename = `${v4()}.tmp`; + const tmpPath = process.env.STORAGE_DIR + ? path.resolve(process.env.STORAGE_DIR, "tmp") + : path.resolve(__dirname, `../../../storage/tmp`); + if (!fs.existsSync(tmpPath)) fs.mkdirSync(tmpPath, { recursive: true }); + return path.resolve(tmpPath, filename); + } + + async #writeToTempfile(filePath, data) { + try { + await fs.promises.appendFile(filePath, data, { encoding: "utf8" }); + } catch (e) { + console.error(`Error writing to tempfile: ${e}`); + } + } + async embedderClient() { if (!fs.existsSync(this.modelPath)) { console.log( @@ -62,18 +80,51 @@ class NativeEmbedder { return result?.[0] || []; } + // If you are thinking you want to edit this function - you probably don't. + // This process was benchmarked heavily on a t3.small (2GB RAM 1vCPU) + // and without careful memory management for the V8 garbage collector + // this function will likely result in an OOM on any resource-constrained deployment. + // To help manage very large documents we run a concurrent write-log each iteration + // to keep the embedding result out of memory. The `maxConcurrentChunk` is set to 25, + // as 50 seems to overflow no matter what. Given the above, memory use hovers around ~30% + // during a very large document (>100K words) but can spike up to 70% before gc. + // This seems repeatable for all document sizes. + // While this does take a while, it is zero set up and is 100% free and on-instance. async embedChunks(textChunks = []) { - const Embedder = await this.embedderClient(); - const embeddingResults = []; - for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) { - const output = await Embedder(chunk, { + const tmpFilePath = this.#tempfilePath(); + const chunks = toChunks(textChunks, this.maxConcurrentChunks); + const chunkLen = chunks.length; + + for (let [idx, chunk] of chunks.entries()) { + if (idx === 0) await this.#writeToTempfile(tmpFilePath, "["); + let data; + let pipeline = await this.embedderClient(); + let output = await pipeline(chunk, { pooling: "mean", normalize: true, }); - if (output.length === 0) continue; - embeddingResults.push(output.tolist()); + + if (output.length === 0) { + pipeline = null; + output = null; + data = null; + continue; + } + + data = JSON.stringify(output.tolist()); + await this.#writeToTempfile(tmpFilePath, data); + console.log(`\x1b[34m[Embedded Chunk ${idx + 1} of ${chunkLen}]\x1b[0m`); + if (chunkLen - 1 !== idx) await this.#writeToTempfile(tmpFilePath, ","); + if (chunkLen - 1 === idx) await this.#writeToTempfile(tmpFilePath, "]"); + pipeline = null; + output = null; + data = null; } + const embeddingResults = JSON.parse( + fs.readFileSync(tmpFilePath, { encoding: "utf-8" }) + ); + fs.rmSync(tmpFilePath, { force: true }); return embeddingResults.length > 0 ? embeddingResults.flat() : null; } } diff --git a/server/yarn.lock b/server/yarn.lock index 175a67947d124ff52bca8950e61bb8ba24877f03..cc129dfe9daa3d6670128737bd916400f459e472 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -269,6 +269,11 @@ dependencies: "@hapi/hoek" "^9.0.0" +"@huggingface/jinja@^0.1.0": + version "0.1.2" + resolved "https://registry.yarnpkg.com/@huggingface/jinja/-/jinja-0.1.2.tgz#073fa0a68ef481a1806b0186bbafd8013e586fbe" + integrity sha512-x5mpbfJt1nKmVep5WNP5VjNsjWApWNj8pPYI+uYMkBWH9bWUJmQmHt2lbf0VCoQd54Oq3XuFEh/UyoVh7rPxmg== + "@humanwhocodes/config-array@^0.11.13": version "0.11.13" resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.11.13.tgz#075dc9684f40a531d9b26b0822153c1e832ee297" @@ -851,11 +856,12 @@ resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406" integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ== -"@xenova/transformers@^2.10.0": - version "2.10.0" - resolved "https://registry.yarnpkg.com/@xenova/transformers/-/transformers-2.10.0.tgz#ae97d724a3addf78de7314336a9f7b28ed96a140" - integrity sha512-Al9WKiOsimAC3mU9Ef434GkHF0izmeAM7mMMx5npdWsWLAYL8fmJXCrULj6uCfjomMQ7jyN9rDtKpp570hffiw== +"@xenova/transformers@^2.14.0": + version "2.14.0" + resolved "https://registry.yarnpkg.com/@xenova/transformers/-/transformers-2.14.0.tgz#6fe128957e64377ca4fca910e77f6092f3f3512a" + integrity sha512-rQ3O7SW5EM64b6XFZGx3XQ2cfiroefxUwU9ShfSpEZyhd082GvwNJJKndxgaukse1hZP1JUDoT0DfjDiq4IZiw== dependencies: + "@huggingface/jinja" "^0.1.0" onnxruntime-web "1.14.0" sharp "^0.32.0" optionalDependencies: