From 658e7fa3909eb49d3ce1d106eeeed9f8f158b970 Mon Sep 17 00:00:00 2001
From: Timothy Carambat <rambat1010@gmail.com>
Date: Thu, 18 Jan 2024 11:40:48 -0800
Subject: [PATCH] chore: Better VectorDb and Embedder error messages (#620)

* chore: propogate embedder and vectordb errors during document mutations

* add default value for errors on addDocuments
---
 frontend/src/index.css                           |  4 ++++
 server/endpoints/workspaces.js                   |  8 +++++---
 server/models/documents.js                       | 15 +++++++++++----
 .../utils/EmbeddingEngines/azureOpenAi/index.js  | 16 ++++++++++++----
 server/utils/EmbeddingEngines/localAi/index.js   | 16 ++++++++++++----
 server/utils/EmbeddingEngines/openAi/index.js    | 16 ++++++++++++----
 server/utils/vectorDbProviders/chroma/index.js   |  7 +++----
 server/utils/vectorDbProviders/lance/index.js    |  7 +++----
 server/utils/vectorDbProviders/milvus/index.js   |  7 +++----
 server/utils/vectorDbProviders/pinecone/index.js |  7 +++----
 server/utils/vectorDbProviders/qdrant/index.js   |  7 +++----
 server/utils/vectorDbProviders/weaviate/index.js |  7 +++----
 server/utils/vectorDbProviders/zilliz/index.js   |  7 +++----
 13 files changed, 77 insertions(+), 47 deletions(-)

diff --git a/frontend/src/index.css b/frontend/src/index.css
index e8d7e2d8c..729cccb5f 100644
--- a/frontend/src/index.css
+++ b/frontend/src/index.css
@@ -403,3 +403,7 @@ dialog::backdrop {
 .tooltip {
   @apply !bg-black !text-white !py-2 !px-3 !rounded-md;
 }
+
+.Toastify__toast-body {
+  white-space: pre-line;
+}
diff --git a/server/endpoints/workspaces.js b/server/endpoints/workspaces.js
index 7873ef76a..7119297f6 100644
--- a/server/endpoints/workspaces.js
+++ b/server/endpoints/workspaces.js
@@ -159,7 +159,7 @@ function workspaceEndpoints(app) {
         }
 
         await Document.removeDocuments(currWorkspace, deletes);
-        const { failed = [] } = await Document.addDocuments(
+        const { failedToEmbed = [], errors = [] } = await Document.addDocuments(
           currWorkspace,
           adds
         );
@@ -167,8 +167,10 @@ function workspaceEndpoints(app) {
         response.status(200).json({
           workspace: updatedWorkspace,
           message:
-            failed.length > 0
-              ? `${failed.length} documents could not be embedded.`
+            failedToEmbed.length > 0
+              ? `${failedToEmbed.length} documents failed to add.\n\n${errors
+                  .map((msg) => `${msg}`)
+                  .join("\n\n")}`
               : null,
         });
       } catch (e) {
diff --git a/server/models/documents.js b/server/models/documents.js
index 4505089d5..8f3b88fbd 100644
--- a/server/models/documents.js
+++ b/server/models/documents.js
@@ -39,6 +39,7 @@ const Document = {
     if (additions.length === 0) return { failed: [], embedded: [] };
     const embedded = [];
     const failedToEmbed = [];
+    const errors = new Set();
 
     for (const path of additions) {
       const data = await fileData(path);
@@ -53,14 +54,20 @@ const Document = {
         workspaceId: workspace.id,
         metadata: JSON.stringify(metadata),
       };
-      const vectorized = await VectorDb.addDocumentToNamespace(
+
+      const { vectorized, error } = await VectorDb.addDocumentToNamespace(
         workspace.slug,
         { ...data, docId },
         path
       );
+
       if (!vectorized) {
-        console.error("Failed to vectorize", path);
-        failedToEmbed.push(path);
+        console.error(
+          "Failed to vectorize",
+          metadata?.title || newDoc.filename
+        );
+        failedToEmbed.push(metadata?.title || newDoc.filename);
+        errors.add(error);
         continue;
       }
 
@@ -77,7 +84,7 @@ const Document = {
       Embedder: process.env.EMBEDDING_ENGINE || "inherit",
       VectorDbSelection: process.env.VECTOR_DB || "pinecone",
     });
-    return { failed: failedToEmbed, embedded };
+    return { failedToEmbed, errors: Array.from(errors), embedded };
   },
 
   removeDocuments: async function (workspace, removals = []) {
diff --git a/server/utils/EmbeddingEngines/azureOpenAi/index.js b/server/utils/EmbeddingEngines/azureOpenAi/index.js
index e80b4b734..4193e860d 100644
--- a/server/utils/EmbeddingEngines/azureOpenAi/index.js
+++ b/server/utils/EmbeddingEngines/azureOpenAi/index.js
@@ -46,7 +46,12 @@ class AzureOpenAiEmbedder {
               resolve({ data: res.data, error: null });
             })
             .catch((e) => {
-              resolve({ data: [], error: e?.error });
+              e.type =
+                e?.response?.data?.error?.code ||
+                e?.response?.status ||
+                "failed_to_embed";
+              e.message = e?.response?.data?.error?.message || e.message;
+              resolve({ data: [], error: e });
             });
         })
       );
@@ -62,11 +67,14 @@ class AzureOpenAiEmbedder {
         .map((res) => res.error)
         .flat();
       if (errors.length > 0) {
+        let uniqueErrors = new Set();
+        errors.map((error) =>
+          uniqueErrors.add(`[${error.type}]: ${error.message}`)
+        );
+
         return {
           data: [],
-          error: `(${errors.length}) Embedding Errors! ${errors
-            .map((error) => `[${error.type}]: ${error.message}`)
-            .join(", ")}`,
+          error: Array.from(uniqueErrors).join(", "),
         };
       }
       return {
diff --git a/server/utils/EmbeddingEngines/localAi/index.js b/server/utils/EmbeddingEngines/localAi/index.js
index 1480755d7..2c9db2c73 100644
--- a/server/utils/EmbeddingEngines/localAi/index.js
+++ b/server/utils/EmbeddingEngines/localAi/index.js
@@ -41,7 +41,12 @@ class LocalAiEmbedder {
               resolve({ data: res.data?.data, error: null });
             })
             .catch((e) => {
-              resolve({ data: [], error: e?.error });
+              e.type =
+                e?.response?.data?.error?.code ||
+                e?.response?.status ||
+                "failed_to_embed";
+              e.message = e?.response?.data?.error?.message || e.message;
+              resolve({ data: [], error: e });
             });
         })
       );
@@ -57,11 +62,14 @@ class LocalAiEmbedder {
         .map((res) => res.error)
         .flat();
       if (errors.length > 0) {
+        let uniqueErrors = new Set();
+        errors.map((error) =>
+          uniqueErrors.add(`[${error.type}]: ${error.message}`)
+        );
+
         return {
           data: [],
-          error: `(${errors.length}) Embedding Errors! ${errors
-            .map((error) => `[${error.type}]: ${error.message}`)
-            .join(", ")}`,
+          error: Array.from(uniqueErrors).join(", "),
         };
       }
       return {
diff --git a/server/utils/EmbeddingEngines/openAi/index.js b/server/utils/EmbeddingEngines/openAi/index.js
index 105be9d73..1f9ba432a 100644
--- a/server/utils/EmbeddingEngines/openAi/index.js
+++ b/server/utils/EmbeddingEngines/openAi/index.js
@@ -37,7 +37,12 @@ class OpenAiEmbedder {
               resolve({ data: res.data?.data, error: null });
             })
             .catch((e) => {
-              resolve({ data: [], error: e?.error });
+              e.type =
+                e?.response?.data?.error?.code ||
+                e?.response?.status ||
+                "failed_to_embed";
+              e.message = e?.response?.data?.error?.message || e.message;
+              resolve({ data: [], error: e });
             });
         })
       );
@@ -53,11 +58,14 @@ class OpenAiEmbedder {
         .map((res) => res.error)
         .flat();
       if (errors.length > 0) {
+        let uniqueErrors = new Set();
+        errors.map((error) =>
+          uniqueErrors.add(`[${error.type}]: ${error.message}`)
+        );
+
         return {
           data: [],
-          error: `(${errors.length}) Embedding Errors! ${errors
-            .map((error) => `[${error.type}]: ${error.message}`)
-            .join(", ")}`,
+          error: Array.from(uniqueErrors).join(", "),
         };
       }
       return {
diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js
index 878cf05f8..28af39e66 100644
--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@@ -171,7 +171,7 @@ const Chroma = {
         }
 
         await DocumentVectors.bulkInsert(documentVectors);
-        return true;
+        return { vectorized: true, error: null };
       }
 
       // If we are here then we are going to embed and store a novel document.
@@ -242,11 +242,10 @@ const Chroma = {
       }
 
       await DocumentVectors.bulkInsert(documentVectors);
-      return true;
+      return { vectorized: true, error: null };
     } catch (e) {
-      console.error(e);
       console.error("addDocumentToNamespace", e.message);
-      return false;
+      return { vectorized: false, error: e.message };
     }
   },
   deleteDocumentFromNamespace: async function (namespace, docId) {
diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js
index 5e58ef1c8..8f243cf9b 100644
--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@@ -172,7 +172,7 @@ const LanceDb = {
 
         await this.updateOrCreateCollection(client, submissions, namespace);
         await DocumentVectors.bulkInsert(documentVectors);
-        return true;
+        return { vectorized: true, error: null };
       }
 
       // If we are here then we are going to embed and store a novel document.
@@ -229,11 +229,10 @@ const LanceDb = {
       }
 
       await DocumentVectors.bulkInsert(documentVectors);
-      return true;
+      return { vectorized: true, error: null };
     } catch (e) {
-      console.error(e);
       console.error("addDocumentToNamespace", e.message);
-      return false;
+      return { vectorized: false, error: e.message };
     }
   },
   performSimilaritySearch: async function ({
diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js
index cc934a9a2..79a132413 100644
--- a/server/utils/vectorDbProviders/milvus/index.js
+++ b/server/utils/vectorDbProviders/milvus/index.js
@@ -167,7 +167,7 @@ const Milvus = {
         }
         await DocumentVectors.bulkInsert(documentVectors);
         await client.flushSync({ collection_names: [namespace] });
-        return true;
+        return { vectorized: true, error: null };
       }
 
       const textSplitter = new RecursiveCharacterTextSplitter({
@@ -231,11 +231,10 @@ const Milvus = {
       }
 
       await DocumentVectors.bulkInsert(documentVectors);
-      return true;
+      return { vectorized: true, error: null };
     } catch (e) {
-      console.error(e);
       console.error("addDocumentToNamespace", e.message);
-      return false;
+      return { vectorized: false, error: e.message };
     }
   },
   deleteDocumentFromNamespace: async function (namespace, docId) {
diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js
index 7a7f862c2..594a9aaf3 100644
--- a/server/utils/vectorDbProviders/pinecone/index.js
+++ b/server/utils/vectorDbProviders/pinecone/index.js
@@ -125,7 +125,7 @@ const Pinecone = {
         }
 
         await DocumentVectors.bulkInsert(documentVectors);
-        return true;
+        return { vectorized: true, error: null };
       }
 
       // If we are here then we are going to embed and store a novel document.
@@ -183,11 +183,10 @@ const Pinecone = {
       }
 
       await DocumentVectors.bulkInsert(documentVectors);
-      return true;
+      return { vectorized: true, error: null };
     } catch (e) {
-      console.error(e);
       console.error("addDocumentToNamespace", e.message);
-      return false;
+      return { vectorized: false, error: e.message };
     }
   },
   deleteDocumentFromNamespace: async function (namespace, docId) {
diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js
index 2783cde93..70c069e84 100644
--- a/server/utils/vectorDbProviders/qdrant/index.js
+++ b/server/utils/vectorDbProviders/qdrant/index.js
@@ -190,7 +190,7 @@ const QDrant = {
         }
 
         await DocumentVectors.bulkInsert(documentVectors);
-        return true;
+        return { vectorized: true, error: null };
       }
 
       // If we are here then we are going to embed and store a novel document.
@@ -272,11 +272,10 @@ const QDrant = {
       }
 
       await DocumentVectors.bulkInsert(documentVectors);
-      return true;
+      return { vectorized: true, error: null };
     } catch (e) {
-      console.error(e);
       console.error("addDocumentToNamespace", e.message);
-      return false;
+      return { vectorized: false, error: e.message };
     }
   },
   deleteDocumentFromNamespace: async function (namespace, docId) {
diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js
index 91faff64e..ac89315af 100644
--- a/server/utils/vectorDbProviders/weaviate/index.js
+++ b/server/utils/vectorDbProviders/weaviate/index.js
@@ -233,7 +233,7 @@ const Weaviate = {
         }
 
         await DocumentVectors.bulkInsert(documentVectors);
-        return true;
+        return { vectorized: true, error: null };
       }
 
       // If we are here then we are going to embed and store a novel document.
@@ -316,11 +316,10 @@ const Weaviate = {
       }
 
       await DocumentVectors.bulkInsert(documentVectors);
-      return true;
+      return { vectorized: true, error: null };
     } catch (e) {
-      console.error(e);
       console.error("addDocumentToNamespace", e.message);
-      return false;
+      return { vectorized: false, error: e.message };
     }
   },
   deleteDocumentFromNamespace: async function (namespace, docId) {
diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js
index b8493e1c2..31afab35a 100644
--- a/server/utils/vectorDbProviders/zilliz/index.js
+++ b/server/utils/vectorDbProviders/zilliz/index.js
@@ -168,7 +168,7 @@ const Zilliz = {
         }
         await DocumentVectors.bulkInsert(documentVectors);
         await client.flushSync({ collection_names: [namespace] });
-        return true;
+        return { vectorized: true, error: null };
       }
 
       const textSplitter = new RecursiveCharacterTextSplitter({
@@ -232,11 +232,10 @@ const Zilliz = {
       }
 
       await DocumentVectors.bulkInsert(documentVectors);
-      return true;
+      return { vectorized: true, error: null };
     } catch (e) {
-      console.error(e);
       console.error("addDocumentToNamespace", e.message);
-      return false;
+      return { vectorized: false, error: e.message };
     }
   },
   deleteDocumentFromNamespace: async function (namespace, docId) {
-- 
GitLab