From 049bfa14cbbd44acf95d735f5fdf14cfc52e53fe Mon Sep 17 00:00:00 2001
From: timothycarambat <rambat1010@gmail.com>
Date: Wed, 20 Dec 2023 11:20:40 -0800
Subject: [PATCH] fix: fully separate chunkconcurrency from chunk length

---
 server/utils/EmbeddingEngines/azureOpenAi/index.js | 7 ++++---
 server/utils/EmbeddingEngines/localAi/index.js     | 3 ++-
 server/utils/EmbeddingEngines/native/index.js      | 5 +++--
 server/utils/EmbeddingEngines/openAi/index.js      | 5 +++--
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/server/utils/EmbeddingEngines/azureOpenAi/index.js b/server/utils/EmbeddingEngines/azureOpenAi/index.js
index 3f36b576b..e80b4b734 100644
--- a/server/utils/EmbeddingEngines/azureOpenAi/index.js
+++ b/server/utils/EmbeddingEngines/azureOpenAi/index.js
@@ -16,7 +16,8 @@ class AzureOpenAiEmbedder {
 
     // Limit of how many strings we can process in a single pass to stay with resource or network limits
     // https://learn.microsoft.com/en-us/azure/ai-services/openai/faq#i-am-trying-to-use-embeddings-and-received-the-error--invalidrequesterror--too-many-inputs--the-max-number-of-inputs-is-1---how-do-i-fix-this-:~:text=consisting%20of%20up%20to%2016%20inputs%20per%20API%20request
-    this.embeddingMaxChunkLength = 16;
+    this.maxConcurrentChunks = 16;
+    this.embeddingMaxChunkLength = 1_000;
   }
 
   async embedTextInput(textInput) {
@@ -34,9 +35,9 @@ class AzureOpenAiEmbedder {
 
     // Because there is a limit on how many chunks can be sent at once to Azure OpenAI
     // we concurrently execute each max batch of text chunks possible.
-    // Refer to constructor embeddingMaxChunkLength for more info.
+    // Refer to constructor maxConcurrentChunks for more info.
     const embeddingRequests = [];
-    for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
+    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
       embeddingRequests.push(
         new Promise((resolve) => {
           this.openai
diff --git a/server/utils/EmbeddingEngines/localAi/index.js b/server/utils/EmbeddingEngines/localAi/index.js
index 68fe66544..1480755d7 100644
--- a/server/utils/EmbeddingEngines/localAi/index.js
+++ b/server/utils/EmbeddingEngines/localAi/index.js
@@ -18,6 +18,7 @@ class LocalAiEmbedder {
     this.openai = new OpenAIApi(config);
 
     // Limit of how many strings we can process in a single pass to stay with resource or network limits
+    this.maxConcurrentChunks = 50;
     this.embeddingMaxChunkLength = maximumChunkLength();
   }
 
@@ -28,7 +29,7 @@ class LocalAiEmbedder {
 
   async embedChunks(textChunks = []) {
     const embeddingRequests = [];
-    for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
+    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
       embeddingRequests.push(
         new Promise((resolve) => {
           this.openai
diff --git a/server/utils/EmbeddingEngines/native/index.js b/server/utils/EmbeddingEngines/native/index.js
index 81189d4e2..69e13a9e3 100644
--- a/server/utils/EmbeddingEngines/native/index.js
+++ b/server/utils/EmbeddingEngines/native/index.js
@@ -14,7 +14,8 @@ class NativeEmbedder {
     this.modelPath = path.resolve(this.cacheDir, "Xenova", "all-MiniLM-L6-v2");
 
     // Limit of how many strings we can process in a single pass to stay with resource or network limits
-    this.embeddingMaxChunkLength = 50;
+    this.maxConcurrentChunks = 50;
+    this.embeddingMaxChunkLength = 1_000;
 
     // Make directory when it does not exist in existing installations
     if (!fs.existsSync(this.cacheDir)) fs.mkdirSync(this.cacheDir);
@@ -63,7 +64,7 @@ class NativeEmbedder {
   async embedChunks(textChunks = []) {
     const Embedder = await this.embedderClient();
     const embeddingResults = [];
-    for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
+    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
       const output = await Embedder(chunk, {
         pooling: "mean",
         normalize: true,
diff --git a/server/utils/EmbeddingEngines/openAi/index.js b/server/utils/EmbeddingEngines/openAi/index.js
index 6ba38c898..105be9d73 100644
--- a/server/utils/EmbeddingEngines/openAi/index.js
+++ b/server/utils/EmbeddingEngines/openAi/index.js
@@ -11,6 +11,7 @@ class OpenAiEmbedder {
     this.openai = openai;
 
     // Limit of how many strings we can process in a single pass to stay with resource or network limits
+    this.maxConcurrentChunks = 500;
     this.embeddingMaxChunkLength = 1_000;
   }
 
@@ -22,9 +23,9 @@ class OpenAiEmbedder {
   async embedChunks(textChunks = []) {
     // Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb)
     // we concurrently execute each max batch of text chunks possible.
-    // Refer to constructor embeddingMaxChunkLength for more info.
+    // Refer to constructor maxConcurrentChunks for more info.
     const embeddingRequests = [];
-    for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
+    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
       embeddingRequests.push(
         new Promise((resolve) => {
           this.openai
-- 
GitLab