From 5614e2ed3000489661c85f2e8af43b4271735209 Mon Sep 17 00:00:00 2001
From: Hakeem Abbas <hakeemsyd@gmail.com>
Date: Fri, 26 Jan 2024 13:07:53 -0800
Subject: [PATCH] feature: Integrate Astra as vectorDBProvider (#648)

* feature: Integrate Astra as vectorDBProvider

feature: Integrate Astra as vectorDBProvider

* Update .env.example

* Add env.example to docker example file
Update spellcheck fo Astra
Update Astra key for vector selection
Update order of AstraDB options
Resize Astra logo image to 330x330
Update methods of Astra to take in latest vectorDB params like TopN and more
Update Astra interface to support default methods and avoid crash errors from 404 collections
Update Astra interface to comply to max chunk insertion limitations
Update Astra interface to dynamically set dimensionality from chunk 0 size on creation

* reset workspaces

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
---
 .vscode/settings.json                         |   1 +
 docker/.env.example                           |   5 +
 .../Modals/MangeWorkspace/Settings/index.jsx  |  24 +-
 .../AstraDBOptions/index.jsx                  |  41 ++
 frontend/src/media/vectordbs/astraDB.png      | Bin 0 -> 1521 bytes
 .../GeneralSettings/VectorDatabase/index.jsx  |   9 +
 .../Steps/DataHandling/index.jsx              |   8 +
 .../Steps/VectorDatabaseConnection/index.jsx  |   9 +
 server/.env.example                           |   5 +
 server/models/systemSettings.js               |   6 +
 server/package.json                           |   1 +
 server/utils/helpers/index.js                 |   3 +
 server/utils/helpers/updateENV.js             |  12 +
 .../vectorDbProviders/astra/ASTRA_SETUP.md    |  22 +
 server/utils/vectorDbProviders/astra/index.js | 380 ++++++++++++++++++
 server/yarn.lock                              |  16 +-
 16 files changed, 536 insertions(+), 6 deletions(-)
 create mode 100644 frontend/src/components/VectorDBSelection/AstraDBOptions/index.jsx
 create mode 100644 frontend/src/media/vectordbs/astraDB.png
 create mode 100644 server/utils/vectorDbProviders/astra/ASTRA_SETUP.md
 create mode 100644 server/utils/vectorDbProviders/astra/index.js

diff --git a/.vscode/settings.json b/.vscode/settings.json
index ab66c194b..14c396fbe 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,5 +1,6 @@
 {
   "cSpell.words": [
+    "Astra",
     "Dockerized",
     "Langchain",
     "Milvus",
diff --git a/docker/.env.example b/docker/.env.example
index 0adabbdf7..858e4098b 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -103,6 +103,11 @@ GID='1000'
 # ZILLIZ_ENDPOINT="https://sample.api.gcp-us-west1.zillizcloud.com"
 # ZILLIZ_API_TOKEN=api-token-here
 
+# Enable all below if you are using vector database: Astra DB.
+# VECTOR_DB="astra"
+# ASTRA_DB_APPLICATION_TOKEN=
+# ASTRA_DB_ENDPOINT=
+
 # CLOUD DEPLOYMENT VARIRABLES ONLY
 # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
 
diff --git a/frontend/src/components/Modals/MangeWorkspace/Settings/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Settings/index.jsx
index b288dc6c1..a9471388f 100644
--- a/frontend/src/components/Modals/MangeWorkspace/Settings/index.jsx
+++ b/frontend/src/components/Modals/MangeWorkspace/Settings/index.jsx
@@ -44,6 +44,7 @@ export default function WorkspaceSettings({ active, workspace, settings }) {
   const formEl = useRef(null);
   const [saving, setSaving] = useState(false);
   const [hasChanges, setHasChanges] = useState(false);
+  const [deleting, setDeleting] = useState(false);
   const defaults = recommendedSettings(settings?.LLMProvider);
 
   const handleUpdate = async (e) => {
@@ -72,7 +73,15 @@ export default function WorkspaceSettings({ active, workspace, settings }) {
       )
     )
       return false;
-    await Workspace.delete(workspace.slug);
+
+    setDeleting(true);
+    const success = await Workspace.delete(workspace.slug);
+    if (!success) {
+      showToast("Workspace could not be deleted!", "error", { clear: true });
+      setDeleting(false);
+      return;
+    }
+
     workspace.slug === slug
       ? (window.location = paths.home())
       : window.location.reload();
@@ -310,7 +319,11 @@ export default function WorkspaceSettings({ active, workspace, settings }) {
         </div>
       </div>
       <div className="flex items-center justify-between p-2 md:p-6 space-x-2 border-t rounded-b border-gray-600">
-        <DeleteWorkspace workspace={workspace} onClick={deleteWorkspace} />
+        <DeleteWorkspace
+          deleting={deleting}
+          workspace={workspace}
+          onClick={deleteWorkspace}
+        />
         {hasChanges && (
           <button
             type="submit"
@@ -324,7 +337,7 @@ export default function WorkspaceSettings({ active, workspace, settings }) {
   );
 }
 
-function DeleteWorkspace({ workspace, onClick }) {
+function DeleteWorkspace({ deleting, workspace, onClick }) {
   const [canDelete, setCanDelete] = useState(false);
   useEffect(() => {
     async function fetchKeys() {
@@ -337,11 +350,12 @@ function DeleteWorkspace({ workspace, onClick }) {
   if (!canDelete) return null;
   return (
     <button
+      disabled={deleting}
       onClick={onClick}
       type="button"
-      className="transition-all duration-300 border border-transparent rounded-lg whitespace-nowrap text-sm px-5 py-2.5 focus:z-10 bg-transparent text-white hover:text-white hover:bg-red-600"
+      className="transition-all duration-300 border border-transparent rounded-lg whitespace-nowrap text-sm px-5 py-2.5 focus:z-10 bg-transparent text-white hover:text-white hover:bg-red-600 disabled:bg-red-600 disabled:text-red-200 disabled:animate-pulse"
     >
-      Delete Workspace
+      {deleting ? "Deleting Workspace..." : "Delete Workspace"}
     </button>
   );
 }
diff --git a/frontend/src/components/VectorDBSelection/AstraDBOptions/index.jsx b/frontend/src/components/VectorDBSelection/AstraDBOptions/index.jsx
new file mode 100644
index 000000000..11990dc44
--- /dev/null
+++ b/frontend/src/components/VectorDBSelection/AstraDBOptions/index.jsx
@@ -0,0 +1,41 @@
+export default function AstraDBOptions({ settings }) {
+  return (
+    <div className="w-full flex flex-col gap-y-4">
+      <div className="w-full flex items-center gap-4">
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            Astra DB Endpoint
+          </label>
+          <input
+            type="url"
+            name="AstraDBEndpoint"
+            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
+            placeholder="Astra DB API endpoint"
+            defaultValue={settings?.AstraDBEndpoint}
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+        </div>
+
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            Astra DB Application Token
+          </label>
+          <input
+            type="password"
+            name="AstraDBApplicationToken"
+            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
+            placeholder="AstraCS:..."
+            defaultValue={
+              settings?.AstraDBApplicationToken ? "*".repeat(20) : ""
+            }
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/media/vectordbs/astraDB.png b/frontend/src/media/vectordbs/astraDB.png
new file mode 100644
index 0000000000000000000000000000000000000000..3403c72f2c35ffa793b4736013a5d24bcd2b71c1
GIT binary patch
literal 1521
zcma)5X)qfI6i({Ct47_`)hr2#I!CRm+d4Z%H*vPa)u^*>Qc8#^tEHquU8%Z~Qpbim
zx+%I?DpsnBXi3Vdpc7X#5<;y0+@0B(zL{^{yzhPAkN4)iylZYwa?<M3002PF#n~PX
z006IigQUnYbCt)kI8ML>xYH%zhfyfzxR8o<_Dui)WR$)E2&kx1Jr=yVcGbh-xOw*M
z*^3u1+Su5fK7Cq3Lc-9{P*qhmGc)t(=;-k9aAIP@#l-~(1S%^l<8Zk3_4VN3;O*^g
zPfyQNr%sW{<h{MU+}vCR1qEAM+u7OK(a}*b7<}TyiKL_?fk41uFf=qY+S=MU98O9~
zij<U;xw-k^;2@Dmw6L&{m6grT&R$$x)YsR?U@&WIYbX?|r>Cd0vr|MwBs4U1Yio-{
zB7s047K<em3g5kZH#9Uvr_-5CCL9je)zzg?D0X&sqN1W+US9R}^)wo7czD>=)m2MN
z%i7u+fk3phv@9(x>FDTaYipaDn##z?C@LyeR8;W!{HduaD=Vve_wMcN?99x}h>3~$
z`1nw%R839Ifq{X^$w?Iz6*)P%XV0E-x!k$AIU^$@2M34U-QA|9CUtdnJRW~=a4<eT
z?&#=v^XARO#KiXYb^`+gNlD4(=H`Wk1vDDHv9aOr?~g<xVK7)+TwHm1d01E&fj}4=
z8|&@uH8wV8v)OujdX0^ZJRVO?O|7!BvY?<KC@5%uf4`%nBQP-V{rmSJAt7B|T}nzy
zD=RCOmX>B_W;Hc6k&%(p)6<28g|V@*;^N}ime?2oKxDwh{$~$#)%yHif>M&^$f6jK
z5$_S!l8MK1u}#W&=`&9y&!B_F98M~nBtOM=qZjgYm@ECyO_omP=(_F-MlHex*8QQn
zn+!T=j+RJq03P4Rcf~|_0!x<?g?wJOqf*cIXFik%wt|bfP`?*F4dXxddK1~OXvBRF
zAY?4wH^89y(3=Zt8DGUOzn&heMEa3U;-W%H(6LA_^KOftRB%!Rkp!iGHBb1%FD-h1
zGT+@-J|N}LBP82mzAU-iI)+7prro|y=V!mn_{{p1;9JD`B*TIg5XEEU_|ByrAe3e5
zTp*(%NdxN`>O^gypS9bgt*1oYuPB2-(uOyx*w~kMxvbyv){5fC{8PLW-Nz~O6z*h6
zG%s4pz!C<RXY}{hN<_FPjWifoMItk{YU~wO+NSUi;I?qCf3hpCeRu9KjJlX?3K?`(
zbEF!<b8sg4_@H{(b12pSQpI}9A?D{}(66w1aGG(kHKf`Pq5E;$gX$ZK(?gL3YuiJm
zZ`tANw7~<D2IxM|wsMy9vM+tw-89z^@*w@~rhAm7b^KFKII9STBBPlyd9YmhJ|8Y1
z;mzZAFfw(oWT-4xp5BimF2xg}_q`D@D62_I-xuR9e<uD<ECE$Zzb?e&`<alS5BqG3
zO5^{Y-8MXI0jE%d1saV398^s5%46#^YOPNqn7sU&$vV<LS0`V8EG|h6m=h{_7QQI>
z3D<tXG9_Frh931Q;nkO#E_Wxm*Us(n;k3=}a8TC&O#bg>T#csYDcClz)_ug2bhY;R
z#U9njvei9yBFoLSXEjTq(zY#(KaiGRm~Ol_@fcOr=9W&dM3~lA6Mo4+tek(I1<9pN
z-I;N@B}UQ|b}_H{QO6D9#UqeW)B<jGq+$t&LcZNgfSD}wKZqB~$X>Zq-+e>7NxeM&
ztAHVw6AXVM2+HU(YmvSSUwD#p`3Hzf>-yx4Hlb1}hrKv_83-A{96;Rh5m@-c!m6rZ
zu+8$@8x(%&CmsFe)mBJ9dKBU<d{pOzEy9mv5K_U$%R?~<zw=G*FgGzEEB-Ch{^}~1
zj~95}JmF{Ir$<VoU5AXnCbH@aJQ+fq=y&}P0Fb%1q`mebbO7ax>HKDKad5M5y%dQ3
E2W{BMXaE2J

literal 0
HcmV?d00001

diff --git a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx
index f5f697a72..dd43fda57 100644
--- a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx
+++ b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx
@@ -10,6 +10,7 @@ import WeaviateLogo from "@/media/vectordbs/weaviate.png";
 import QDrantLogo from "@/media/vectordbs/qdrant.png";
 import MilvusLogo from "@/media/vectordbs/milvus.png";
 import ZillizLogo from "@/media/vectordbs/zilliz.png";
+import AstraDBLogo from "@/media/vectordbs/astraDB.png";
 import PreLoader from "@/components/Preloader";
 import ChangeWarningModal from "@/components/ChangeWarning";
 import { MagnifyingGlass } from "@phosphor-icons/react";
@@ -23,6 +24,7 @@ import MilvusDBOptions from "@/components/VectorDBSelection/MilvusDBOptions";
 import ZillizCloudOptions from "@/components/VectorDBSelection/ZillizCloudOptions";
 import { useModal } from "@/hooks/useModal";
 import ModalWrapper from "@/components/ModalWrapper";
+import AstraDBOptions from "@/components/VectorDBSelection/AstraDBOptions";
 
 export default function GeneralVectorDatabase() {
   const [saving, setSaving] = useState(false);
@@ -100,6 +102,13 @@ export default function GeneralVectorDatabase() {
       options: <MilvusDBOptions settings={settings} />,
       description: "Open-source, highly scalable, and blazing fast.",
     },
+    {
+      name: "AstraDB",
+      value: "astra",
+      logo: AstraDBLogo,
+      options: <AstraDBOptions settings={settings} />,
+      description: "Vector Search for Real-world GenAI.",
+    },
   ];
 
   const updateVectorChoice = (selection) => {
diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
index ae5730276..60a3b6da4 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@@ -11,6 +11,7 @@ import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import MistralLogo from "@/media/llmprovider/mistral.jpeg";
 import ZillizLogo from "@/media/vectordbs/zilliz.png";
+import AstraDBLogo from "@/media/vectordbs/astraDB.png";
 import ChromaLogo from "@/media/vectordbs/chroma.png";
 import PineconeLogo from "@/media/vectordbs/pinecone.png";
 import LanceDbLogo from "@/media/vectordbs/lancedb.png";
@@ -147,6 +148,13 @@ const VECTOR_DB_PRIVACY = {
     ],
     logo: ZillizLogo,
   },
+  astra: {
+    name: "AstraDB",
+    description: [
+      "Your vectors and document text are stored on your cloud AstraDB database.",
+    ],
+    logo: AstraDBLogo,
+  },
   lancedb: {
     name: "LanceDB",
     description: [
diff --git a/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx
index af0b5662d..98034528d 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx
@@ -7,6 +7,7 @@ import WeaviateLogo from "@/media/vectordbs/weaviate.png";
 import QDrantLogo from "@/media/vectordbs/qdrant.png";
 import MilvusLogo from "@/media/vectordbs/milvus.png";
 import ZillizLogo from "@/media/vectordbs/zilliz.png";
+import AstraDBLogo from "@/media/vectordbs/astraDB.png";
 import System from "@/models/system";
 import paths from "@/utils/paths";
 import PineconeDBOptions from "@/components/VectorDBSelection/PineconeDBOptions";
@@ -16,6 +17,7 @@ import WeaviateDBOptions from "@/components/VectorDBSelection/WeaviateDBOptions"
 import LanceDBOptions from "@/components/VectorDBSelection/LanceDBOptions";
 import MilvusOptions from "@/components/VectorDBSelection/MilvusDBOptions";
 import ZillizCloudOptions from "@/components/VectorDBSelection/ZillizCloudOptions";
+import AstraDBOptions from "@/components/VectorDBSelection/AstraDBOptions";
 import showToast from "@/utils/toast";
 import { useNavigate } from "react-router-dom";
 import VectorDBItem from "@/components/VectorDBSelection/VectorDBItem";
@@ -100,6 +102,13 @@ export default function VectorDatabaseConnection({
       options: <MilvusOptions settings={settings} />,
       description: "Open-source, highly scalable, and blazing fast.",
     },
+    {
+      name: "AstraDB",
+      value: "astra",
+      logo: AstraDBLogo,
+      options: <AstraDBOptions settings={settings} />,
+      description: "Vector Search for Real-world GenAI.",
+    },
   ];
 
   function handleForward() {
diff --git a/server/.env.example b/server/.env.example
index e44748b41..f497fea91 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -76,6 +76,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # PINECONE_API_KEY=
 # PINECONE_INDEX=
 
+# Enable all below if you are using vector database: Astra DB.
+# VECTOR_DB="astra"
+# ASTRA_DB_APPLICATION_TOKEN=
+# ASTRA_DB_ENDPOINT=
+
 # Enable all below if you are using vector database: LanceDB.
 VECTOR_DB="lancedb"
 
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index b4e93bde6..b8c46524c 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -68,6 +68,12 @@ const SystemSettings = {
             ZillizApiToken: process.env.ZILLIZ_API_TOKEN,
           }
         : {}),
+      ...(vectorDB === "astra"
+        ? {
+            AstraDBApplicationToken: process?.env?.ASTRA_DB_APPLICATION_TOKEN,
+            AstraDBEndpoint: process?.env?.ASTRA_DB_ENDPOINT,
+          }
+        : {}),
       LLMProvider: llmProvider,
       ...(llmProvider === "openai"
         ? {
diff --git a/server/package.json b/server/package.json
index c8a41b795..bf1b85c06 100644
--- a/server/package.json
+++ b/server/package.json
@@ -22,6 +22,7 @@
   "dependencies": {
     "@anthropic-ai/sdk": "^0.8.1",
     "@azure/openai": "1.0.0-beta.10",
+    "@datastax/astra-db-ts": "^0.1.3",
     "@google/generative-ai": "^0.1.3",
     "@googleapis/youtube": "^9.0.0",
     "@pinecone-database/pinecone": "^2.0.1",
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index b72bb7977..53a76faeb 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -22,6 +22,9 @@ function getVectorDbClass() {
     case "zilliz":
       const { Zilliz } = require("../vectorDbProviders/zilliz");
       return Zilliz;
+    case "astra":
+      const { AstraDB } = require("../vectorDbProviders/astra");
+      return AstraDB;
     default:
       throw new Error("ENV: No VECTOR_DB value found in environment!");
   }
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index b061061e8..50b423474 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -204,6 +204,17 @@ const KEY_MAPPING = {
     checks: [isNotEmpty],
   },
 
+  // Astra DB Options
+
+  AstraDBApplicationToken: {
+    envKey: "ASTRA_DB_APPLICATION_TOKEN",
+    checks: [isNotEmpty],
+  },
+  AstraDBEndpoint: {
+    envKey: "ASTRA_DB_ENDPOINT",
+    checks: [isNotEmpty],
+  },
+
   // Together Ai Options
   TogetherAiApiKey: {
     envKey: "TOGETHER_AI_API_KEY",
@@ -322,6 +333,7 @@ function supportedVectorDB(input = "") {
     "qdrant",
     "milvus",
     "zilliz",
+    "astra",
   ];
   return supported.includes(input)
     ? null
diff --git a/server/utils/vectorDbProviders/astra/ASTRA_SETUP.md b/server/utils/vectorDbProviders/astra/ASTRA_SETUP.md
new file mode 100644
index 000000000..e3749f077
--- /dev/null
+++ b/server/utils/vectorDbProviders/astra/ASTRA_SETUP.md
@@ -0,0 +1,22 @@
+# How to setup Astra Vector Database for AnythingLLM
+
+[Official Astra DB Docs](https://docs.datastax.com/en/astra/astra-db-vector/get-started/quickstart.html) for reference.
+
+### How to get started
+
+**Requirements**
+
+- Astra Vector Database with active status.
+
+**Instructions**
+
+- [Create an Astra account or sign in to an existing Astra account](astra.datastax.com)
+- Create an Astra Serverless(Vector) Database.
+- Make sure DB is in active state.
+- Get `API ENDPOINT`and `Application Token` from Overview screen
+
+```
+VECTOR_DB="astra"
+ASTRA_DB_ENDPOINT=Astra DB API endpoint
+ASTRA_DB_APPLICATION_TOKEN=AstraCS:..
+```
diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js
new file mode 100644
index 000000000..df983d4f4
--- /dev/null
+++ b/server/utils/vectorDbProviders/astra/index.js
@@ -0,0 +1,380 @@
+const { AstraDB: AstraClient } = require("@datastax/astra-db-ts");
+const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
+const { storeVectorResult, cachedVectorInformation } = require("../../files");
+const { v4: uuidv4 } = require("uuid");
+const {
+  toChunks,
+  getLLMProvider,
+  getEmbeddingEngineSelection,
+} = require("../../helpers");
+
+const AstraDB = {
+  name: "AstraDB",
+  connect: async function () {
+    if (process.env.VECTOR_DB !== "astra")
+      throw new Error("AstraDB::Invalid ENV settings");
+
+    const client = new AstraClient(
+      process?.env?.ASTRA_DB_APPLICATION_TOKEN,
+      process?.env?.ASTRA_DB_ENDPOINT
+    );
+    return { client };
+  },
+  heartbeat: async function () {
+    return { heartbeat: Number(new Date()) };
+  },
+  // Astra interface will return a valid collection object even if the collection
+  // does not actually exist. So we run a simple check which will always throw
+  // when the table truly does not exist. Faster than iterating all collections.
+  isRealCollection: async function (astraCollection = null) {
+    if (!astraCollection) return false;
+    return await astraCollection
+      .countDocuments()
+      .then(() => true)
+      .catch(() => false);
+  },
+  totalVectors: async function () {
+    const { client } = await this.connect();
+    const collectionNames = await this.allNamespaces(client);
+    var totalVectors = 0;
+    for (const name of collectionNames) {
+      const collection = await client.collection(name).catch(() => null);
+      const count = await collection.countDocuments().catch(() => 0);
+      totalVectors += count ? count : 0;
+    }
+    return totalVectors;
+  },
+  namespaceCount: async function (_namespace = null) {
+    const { client } = await this.connect();
+    const namespace = await this.namespace(client, _namespace);
+    return namespace?.vectorCount || 0;
+  },
+  namespace: async function (client, namespace = null) {
+    if (!namespace) throw new Error("No namespace value provided.");
+    const collection = await client.collection(namespace).catch(() => null);
+    if (!(await this.isRealCollection(collection))) return null;
+
+    const count = await collection.countDocuments().catch((e) => {
+      console.error("Astra::namespaceExists", e.message);
+      return null;
+    });
+
+    return {
+      name: namespace,
+      ...collection,
+      vectorCount: typeof count === "number" ? count : 0,
+    };
+  },
+  hasNamespace: async function (namespace = null) {
+    if (!namespace) return false;
+    const { client } = await this.connect();
+    return await this.namespaceExists(client, namespace);
+  },
+  namespaceExists: async function (client, namespace = null) {
+    if (!namespace) throw new Error("No namespace value provided.");
+    const collection = await client.collection(namespace);
+    return await this.isRealCollection(collection);
+  },
+  deleteVectorsInNamespace: async function (client, namespace = null) {
+    await client.dropCollection(namespace);
+    return true;
+  },
+  // AstraDB requires a dimension aspect for collection creation
+  // we pass this in from the first chunk to infer the dimensions like other
+  // providers do.
+  getOrCreateCollection: async function (client, namespace, dimensions = null) {
+    const isExists = await this.namespaceExists(client, namespace);
+    if (!isExists) {
+      if (!dimensions)
+        throw new Error(
+          `AstraDB:getOrCreateCollection Unable to infer vector dimension from input. Open an issue on Github for support.`
+        );
+
+      await client.createCollection(namespace, {
+        vector: {
+          dimension: dimensions,
+          metric: "cosine",
+        },
+      });
+    }
+    return await client.collection(namespace);
+  },
+  addDocumentToNamespace: async function (
+    namespace,
+    documentData = {},
+    fullFilePath = null
+  ) {
+    const { DocumentVectors } = require("../../../models/vectors");
+    try {
+      let vectorDimension = null;
+      const { pageContent, docId, ...metadata } = documentData;
+      if (!pageContent || pageContent.length == 0) return false;
+
+      console.log("Adding new vectorized document into namespace", namespace);
+      const cacheResult = await cachedVectorInformation(fullFilePath);
+      if (cacheResult.exists) {
+        const { client } = await this.connect();
+        const { chunks } = cacheResult;
+        const documentVectors = [];
+        vectorDimension = chunks[0][0].values.length || null;
+
+        const collection = await this.getOrCreateCollection(
+          client,
+          namespace,
+          vectorDimension
+        );
+        if (!(await this.isRealCollection(collection)))
+          throw new Error("Failed to create new AstraDB collection!", {
+            namespace,
+          });
+
+        for (const chunk of chunks) {
+          // Before sending to Astra and saving the records to our db
+          // we need to assign the id of each chunk that is stored in the cached file.
+          const newChunks = chunk.map((chunk) => {
+            const _id = uuidv4();
+            documentVectors.push({ docId, vectorId: _id });
+            return {
+              _id: _id,
+              $vector: chunk.values,
+              metadata: chunk.metadata || {},
+            };
+          });
+
+          await collection.insertMany(newChunks);
+        }
+        await DocumentVectors.bulkInsert(documentVectors);
+        return { vectorized: true, error: null };
+      }
+
+      const textSplitter = new RecursiveCharacterTextSplitter({
+        chunkSize:
+          getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
+        chunkOverlap: 20,
+      });
+      const textChunks = await textSplitter.splitText(pageContent);
+
+      console.log("Chunks created from document:", textChunks.length);
+      const LLMConnector = getLLMProvider();
+      const documentVectors = [];
+      const vectors = [];
+      const vectorValues = await LLMConnector.embedChunks(textChunks);
+
+      if (!!vectorValues && vectorValues.length > 0) {
+        for (const [i, vector] of vectorValues.entries()) {
+          if (!vectorDimension) vectorDimension = vector.length;
+          const vectorRecord = {
+            _id: uuidv4(),
+            $vector: vector,
+            metadata: { ...metadata, text: textChunks[i] },
+          };
+
+          vectors.push(vectorRecord);
+          documentVectors.push({ docId, vectorId: vectorRecord._id });
+        }
+      } else {
+        throw new Error(
+          "Could not embed document chunks! This document will not be recorded."
+        );
+      }
+      const { client } = await this.connect();
+      const collection = await this.getOrCreateCollection(
+        client,
+        namespace,
+        vectorDimension
+      );
+      if (!(await this.isRealCollection(collection)))
+        throw new Error("Failed to create new AstraDB collection!", {
+          namespace,
+        });
+
+      if (vectors.length > 0) {
+        const chunks = [];
+
+        console.log("Inserting vectorized chunks into Astra DB.");
+
+        // AstraDB has maximum upsert size of 20 records per-request so we have to use a lower chunk size here
+        // in order to do the queries - this takes a lot more time than other providers but there
+        // is no way around it. This will save the vector-cache with the same layout, so we don't
+        // have to chunk again for cached files.
+        for (const chunk of toChunks(vectors, 20)) {
+          chunks.push(
+            chunk.map((c) => {
+              return { id: c._id, values: c.$vector, metadata: c.metadata };
+            })
+          );
+          await collection.insertMany(chunk);
+        }
+        await storeVectorResult(chunks, fullFilePath);
+      }
+
+      await DocumentVectors.bulkInsert(documentVectors);
+      return { vectorized: true, error: null };
+    } catch (e) {
+      console.error("addDocumentToNamespace", e.message);
+      return { vectorized: false, error: e.message };
+    }
+  },
+  deleteDocumentFromNamespace: async function (namespace, docId) {
+    const { DocumentVectors } = require("../../../models/vectors");
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace)))
+      throw new Error(
+        "Invalid namespace - has it been collected and populated yet?"
+      );
+    const collection = await client.collection(namespace);
+
+    const knownDocuments = await DocumentVectors.where({ docId });
+    if (knownDocuments.length === 0) return;
+
+    const vectorIds = knownDocuments.map((doc) => doc.vectorId);
+    for (const id of vectorIds) {
+      await collection.deleteMany({
+        _id: id,
+      });
+    }
+
+    const indexes = knownDocuments.map((doc) => doc.id);
+    await DocumentVectors.deleteIds(indexes);
+    return true;
+  },
+  performSimilaritySearch: async function ({
+    namespace = null,
+    input = "",
+    LLMConnector = null,
+    similarityThreshold = 0.25,
+    topN = 4,
+  }) {
+    if (!namespace || !input || !LLMConnector)
+      throw new Error("Invalid request to performSimilaritySearch.");
+
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace))) {
+      return {
+        contextTexts: [],
+        sources: [],
+        message:
+          "Invalid query - no namespace found for workspace in vector db!",
+      };
+    }
+
+    const queryVector = await LLMConnector.embedTextInput(input);
+    const { contextTexts, sourceDocuments } = await this.similarityResponse(
+      client,
+      namespace,
+      queryVector,
+      similarityThreshold,
+      topN
+    );
+
+    const sources = sourceDocuments.map((metadata, i) => {
+      return { ...metadata, text: contextTexts[i] };
+    });
+    return {
+      contextTexts,
+      sources: this.curateSources(sources),
+      message: false,
+    };
+  },
+  similarityResponse: async function (
+    client,
+    namespace,
+    queryVector,
+    similarityThreshold = 0.25,
+    topN = 4
+  ) {
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    const collection = await client.collection(namespace);
+    const responses = await collection
+      .find(
+        {},
+        {
+          sort: { $vector: queryVector },
+          limit: topN,
+          includeSimilarity: true,
+        }
+      )
+      .toArray();
+
+    responses.forEach((response) => {
+      if (response.$similarity < similarityThreshold) return;
+      result.contextTexts.push(response.metadata.text);
+      result.sourceDocuments.push(response);
+      result.scores.push(response.$similarity);
+    });
+    return result;
+  },
+  allNamespaces: async function (client) {
+    try {
+      let header = new Headers();
+      header.append("Token", client?.httpClient?.applicationToken);
+      header.append("Content-Type", "application/json");
+
+      let raw = JSON.stringify({
+        findCollections: {},
+      });
+
+      let requestOptions = {
+        method: "POST",
+        headers: header,
+        body: raw,
+        redirect: "follow",
+      };
+
+      const call = await fetch(client?.httpClient?.baseUrl, requestOptions);
+      const resp = await call?.text();
+      const collections = resp ? JSON.parse(resp)?.status?.collections : [];
+      return collections;
+    } catch (e) {
+      console.error("Astra::AllNamespace", e);
+      return [];
+    }
+  },
+  "namespace-stats": async function (reqBody = {}) {
+    const { namespace = null } = reqBody;
+    if (!namespace) throw new Error("namespace required");
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace)))
+      throw new Error("Namespace by that name does not exist.");
+    const stats = await this.namespace(client, namespace);
+    return stats
+      ? stats
+      : { message: "No stats were able to be fetched from DB for namespace" };
+  },
+  "delete-namespace": async function (reqBody = {}) {
+    const { namespace = null } = reqBody;
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace)))
+      throw new Error("Namespace by that name does not exist.");
+
+    const details = await this.namespace(client, namespace);
+    await this.deleteVectorsInNamespace(client, namespace);
+    return {
+      message: `Namespace ${namespace} was deleted along with ${
+        details?.vectorCount || "all"
+      } vectors.`,
+    };
+  },
+  curateSources: function (sources = []) {
+    const documents = [];
+    for (const source of sources) {
+      if (Object.keys(source).length > 0) {
+        const metadata = source.hasOwnProperty("metadata")
+          ? source.metadata
+          : source;
+        documents.push({
+          ...metadata,
+        });
+      }
+    }
+
+    return documents;
+  },
+};
+
+module.exports.AstraDB = AstraDB;
diff --git a/server/yarn.lock b/server/yarn.lock
index 67b4e11b6..ecc8f1a75 100644
--- a/server/yarn.lock
+++ b/server/yarn.lock
@@ -174,6 +174,15 @@
     enabled "2.0.x"
     kuler "^2.0.0"
 
+"@datastax/astra-db-ts@^0.1.3":
+  version "0.1.3"
+  resolved "https://registry.yarnpkg.com/@datastax/astra-db-ts/-/astra-db-ts-0.1.3.tgz#fcc25cda8d146c06278860054f09d687ff031568"
+  integrity sha512-7lnpym0HhUtfJVd8+vu6vYdDQpFyYof7TVLFVD2fgoIjUwj3EksFXmqDqicLAlLferZDllqSVthX9pXQ5Rdapw==
+  dependencies:
+    axios "^1.4.0"
+    bson "^6.2.0"
+    winston "^3.7.2"
+
 "@eslint-community/eslint-utils@^4.2.0":
   version "4.4.0"
   resolved "https://registry.yarnpkg.com/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz#a23514e8fb9af1269d5f7788aa556798d61c6b59"
@@ -1353,6 +1362,11 @@ braces@~3.0.2:
   dependencies:
     fill-range "^7.0.1"
 
+bson@^6.2.0:
+  version "6.2.0"
+  resolved "https://registry.yarnpkg.com/bson/-/bson-6.2.0.tgz#4b6acafc266ba18eeee111373c2699304a9ba0a3"
+  integrity sha512-ID1cI+7bazPDyL9wYy9GaQ8gEEohWvcUl/Yf0dIdutJxnmInEEyCsb4awy/OiBfall7zBA179Pahi3vCdFze3Q==
+
 btoa-lite@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/btoa-lite/-/btoa-lite-1.0.0.tgz#337766da15801210fdd956c22e9c6891ab9d0337"
@@ -5636,7 +5650,7 @@ winston-transport@^4.5.0:
     readable-stream "^3.6.0"
     triple-beam "^1.3.0"
 
-winston@^3.9.0:
+winston@^3.7.2, winston@^3.9.0:
   version "3.11.0"
   resolved "https://registry.yarnpkg.com/winston/-/winston-3.11.0.tgz#2d50b0a695a2758bb1c95279f0a88e858163ed91"
   integrity sha512-L3yR6/MzZAOl0DsysUXHVjOwv8mKZ71TrA/41EIduGpOOV5LQVodqN+QdQ6BS6PJ/RdIshZhq84P/fStEZkk7g==
-- 
GitLab