fix: fully separate chunkconcurrency from chunk length

049bfa14 · timothycarambat · 7bee849c · 049bfa14 · 049bfa14 · 049bfa14
Commit 049bfa14 authored 1 year ago by timothycarambat
--- a/server/utils/EmbeddingEngines/azureOpenAi/index.js
+++ b/server/utils/EmbeddingEngines/azureOpenAi/index.js
@@ -16,7 +16,8 @@ class AzureOpenAiEmbedder {

    // Limit of how many strings we can process in a single pass to stay with resource or network limits
    // https://learn.microsoft.com/en-us/azure/ai-services/openai/faq#i-am-trying-to-use-embeddings-and-received-the-error--invalidrequesterror--too-many-inputs--the-max-number-of-inputs-is-1---how-do-i-fix-this-:~:text=consisting%20of%20up%20to%2016%20inputs%20per%20API%20request
-    this.embeddingMaxChunkLength = 16;
+    this.maxConcurrentChunks = 16;
+    this.embeddingMaxChunkLength = 1_000;
  }

  async embedTextInput(textInput) {
@@ -34,9 +35,9 @@ class AzureOpenAiEmbedder {

    // Because there is a limit on how many chunks can be sent at once to Azure OpenAI
    // we concurrently execute each max batch of text chunks possible.
-    // Refer to constructor embeddingMaxChunkLength for more info.
+    // Refer to constructor maxConcurrentChunks for more info.
    const embeddingRequests = [];
-    for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
+    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
      embeddingRequests.push(
        new Promise((resolve) => {
          this.openai

--- a/server/utils/EmbeddingEngines/localAi/index.js
+++ b/server/utils/EmbeddingEngines/localAi/index.js
@@ -18,6 +18,7 @@ class LocalAiEmbedder {
    this.openai = new OpenAIApi(config);

    // Limit of how many strings we can process in a single pass to stay with resource or network limits
+    this.maxConcurrentChunks = 50;
    this.embeddingMaxChunkLength = maximumChunkLength();
  }

@@ -28,7 +29,7 @@ class LocalAiEmbedder {

  async embedChunks(textChunks = []) {
    const embeddingRequests = [];
-    for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
+    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
      embeddingRequests.push(
        new Promise((resolve) => {
          this.openai

--- a/server/utils/EmbeddingEngines/native/index.js
+++ b/server/utils/EmbeddingEngines/native/index.js
@@ -14,7 +14,8 @@ class NativeEmbedder {
    this.modelPath = path.resolve(this.cacheDir, "Xenova", "all-MiniLM-L6-v2");

    // Limit of how many strings we can process in a single pass to stay with resource or network limits
-    this.embeddingMaxChunkLength = 50;
+    this.maxConcurrentChunks = 50;
+    this.embeddingMaxChunkLength = 1_000;

    // Make directory when it does not exist in existing installations
    if (!fs.existsSync(this.cacheDir)) fs.mkdirSync(this.cacheDir);
@@ -63,7 +64,7 @@ class NativeEmbedder {
  async embedChunks(textChunks = []) {
    const Embedder = await this.embedderClient();
    const embeddingResults = [];
-    for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
+    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
      const output = await Embedder(chunk, {
        pooling: "mean",
        normalize: true,

--- a/server/utils/EmbeddingEngines/openAi/index.js
+++ b/server/utils/EmbeddingEngines/openAi/index.js
@@ -11,6 +11,7 @@ class OpenAiEmbedder {
    this.openai = openai;

    // Limit of how many strings we can process in a single pass to stay with resource or network limits
+    this.maxConcurrentChunks = 500;
    this.embeddingMaxChunkLength = 1_000;
  }

@@ -22,9 +23,9 @@ class OpenAiEmbedder {
  async embedChunks(textChunks = []) {
    // Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb)
    // we concurrently execute each max batch of text chunks possible.
-    // Refer to constructor embeddingMaxChunkLength for more info.
+    // Refer to constructor maxConcurrentChunks for more info.
    const embeddingRequests = [];
-    for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
+    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
      embeddingRequests.push(
        new Promise((resolve) => {
          this.openai