diff --git a/.vscode/settings.json b/.vscode/settings.json index ab66c194b3df56c15b019bcc41adfaa20fb6621b..14c396fbe813f6463931ee5af3ff2476a115fdad 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,6 @@ { "cSpell.words": [ + "Astra", "Dockerized", "Langchain", "Milvus", diff --git a/docker/.env.example b/docker/.env.example index 0adabbdf7d0e1fcf4c71d013bdedf9cb6d703944..858e4098be04792ab8b1e355e48e40cfca406639 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -103,6 +103,11 @@ GID='1000' # ZILLIZ_ENDPOINT="https://sample.api.gcp-us-west1.zillizcloud.com" # ZILLIZ_API_TOKEN=api-token-here +# Enable all below if you are using vector database: Astra DB. +# VECTOR_DB="astra" +# ASTRA_DB_APPLICATION_TOKEN= +# ASTRA_DB_ENDPOINT= + # CLOUD DEPLOYMENT VARIRABLES ONLY # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. diff --git a/frontend/src/components/Modals/MangeWorkspace/Settings/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Settings/index.jsx index b288dc6c14b0456b8063c5d4b20de1f89328b281..a9471388fe689e7fc3841df6184adc8cf9542e64 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Settings/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Settings/index.jsx @@ -44,6 +44,7 @@ export default function WorkspaceSettings({ active, workspace, settings }) { const formEl = useRef(null); const [saving, setSaving] = useState(false); const [hasChanges, setHasChanges] = useState(false); + const [deleting, setDeleting] = useState(false); const defaults = recommendedSettings(settings?.LLMProvider); const handleUpdate = async (e) => { @@ -72,7 +73,15 @@ export default function WorkspaceSettings({ active, workspace, settings }) { ) ) return false; - await Workspace.delete(workspace.slug); + + setDeleting(true); + const success = await Workspace.delete(workspace.slug); + if (!success) { + showToast("Workspace could not be deleted!", "error", { clear: true }); + setDeleting(false); + return; + } + workspace.slug === slug ? (window.location = paths.home()) : window.location.reload(); @@ -310,7 +319,11 @@ export default function WorkspaceSettings({ active, workspace, settings }) { </div> </div> <div className="flex items-center justify-between p-2 md:p-6 space-x-2 border-t rounded-b border-gray-600"> - <DeleteWorkspace workspace={workspace} onClick={deleteWorkspace} /> + <DeleteWorkspace + deleting={deleting} + workspace={workspace} + onClick={deleteWorkspace} + /> {hasChanges && ( <button type="submit" @@ -324,7 +337,7 @@ export default function WorkspaceSettings({ active, workspace, settings }) { ); } -function DeleteWorkspace({ workspace, onClick }) { +function DeleteWorkspace({ deleting, workspace, onClick }) { const [canDelete, setCanDelete] = useState(false); useEffect(() => { async function fetchKeys() { @@ -337,11 +350,12 @@ function DeleteWorkspace({ workspace, onClick }) { if (!canDelete) return null; return ( <button + disabled={deleting} onClick={onClick} type="button" - className="transition-all duration-300 border border-transparent rounded-lg whitespace-nowrap text-sm px-5 py-2.5 focus:z-10 bg-transparent text-white hover:text-white hover:bg-red-600" + className="transition-all duration-300 border border-transparent rounded-lg whitespace-nowrap text-sm px-5 py-2.5 focus:z-10 bg-transparent text-white hover:text-white hover:bg-red-600 disabled:bg-red-600 disabled:text-red-200 disabled:animate-pulse" > - Delete Workspace + {deleting ? "Deleting Workspace..." : "Delete Workspace"} </button> ); } diff --git a/frontend/src/components/VectorDBSelection/AstraDBOptions/index.jsx b/frontend/src/components/VectorDBSelection/AstraDBOptions/index.jsx new file mode 100644 index 0000000000000000000000000000000000000000..11990dc44c82e8dc96f62a0759df7f4a4602e012 --- /dev/null +++ b/frontend/src/components/VectorDBSelection/AstraDBOptions/index.jsx @@ -0,0 +1,41 @@ +export default function AstraDBOptions({ settings }) { + return ( + <div className="w-full flex flex-col gap-y-4"> + <div className="w-full flex items-center gap-4"> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + Astra DB Endpoint + </label> + <input + type="url" + name="AstraDBEndpoint" + className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="Astra DB API endpoint" + defaultValue={settings?.AstraDBEndpoint} + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + Astra DB Application Token + </label> + <input + type="password" + name="AstraDBApplicationToken" + className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="AstraCS:..." + defaultValue={ + settings?.AstraDBApplicationToken ? "*".repeat(20) : "" + } + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + </div> + </div> + ); +} diff --git a/frontend/src/media/vectordbs/astraDB.png b/frontend/src/media/vectordbs/astraDB.png new file mode 100644 index 0000000000000000000000000000000000000000..3403c72f2c35ffa793b4736013a5d24bcd2b71c1 Binary files /dev/null and b/frontend/src/media/vectordbs/astraDB.png differ diff --git a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx index f5f697a72c12cf85e9784b50b6571ba0f7fd4c57..dd43fda5703a57845cee5fe2d0be7c7745cc9832 100644 --- a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx +++ b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx @@ -10,6 +10,7 @@ import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; import MilvusLogo from "@/media/vectordbs/milvus.png"; import ZillizLogo from "@/media/vectordbs/zilliz.png"; +import AstraDBLogo from "@/media/vectordbs/astraDB.png"; import PreLoader from "@/components/Preloader"; import ChangeWarningModal from "@/components/ChangeWarning"; import { MagnifyingGlass } from "@phosphor-icons/react"; @@ -23,6 +24,7 @@ import MilvusDBOptions from "@/components/VectorDBSelection/MilvusDBOptions"; import ZillizCloudOptions from "@/components/VectorDBSelection/ZillizCloudOptions"; import { useModal } from "@/hooks/useModal"; import ModalWrapper from "@/components/ModalWrapper"; +import AstraDBOptions from "@/components/VectorDBSelection/AstraDBOptions"; export default function GeneralVectorDatabase() { const [saving, setSaving] = useState(false); @@ -100,6 +102,13 @@ export default function GeneralVectorDatabase() { options: <MilvusDBOptions settings={settings} />, description: "Open-source, highly scalable, and blazing fast.", }, + { + name: "AstraDB", + value: "astra", + logo: AstraDBLogo, + options: <AstraDBOptions settings={settings} />, + description: "Vector Search for Real-world GenAI.", + }, ]; const updateVectorChoice = (selection) => { diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index ae573027639ea8318a8bdd1dfc45deb48c2d871c..60a3b6da4d5c1b2792899750900c4bff108042a8 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -11,6 +11,7 @@ import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import MistralLogo from "@/media/llmprovider/mistral.jpeg"; import ZillizLogo from "@/media/vectordbs/zilliz.png"; +import AstraDBLogo from "@/media/vectordbs/astraDB.png"; import ChromaLogo from "@/media/vectordbs/chroma.png"; import PineconeLogo from "@/media/vectordbs/pinecone.png"; import LanceDbLogo from "@/media/vectordbs/lancedb.png"; @@ -147,6 +148,13 @@ const VECTOR_DB_PRIVACY = { ], logo: ZillizLogo, }, + astra: { + name: "AstraDB", + description: [ + "Your vectors and document text are stored on your cloud AstraDB database.", + ], + logo: AstraDBLogo, + }, lancedb: { name: "LanceDB", description: [ diff --git a/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx index af0b5662d2a707b526d76a89f25a0c5c1234f990..98034528d516e012fb196c79d7459abc24a62cd3 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx @@ -7,6 +7,7 @@ import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; import MilvusLogo from "@/media/vectordbs/milvus.png"; import ZillizLogo from "@/media/vectordbs/zilliz.png"; +import AstraDBLogo from "@/media/vectordbs/astraDB.png"; import System from "@/models/system"; import paths from "@/utils/paths"; import PineconeDBOptions from "@/components/VectorDBSelection/PineconeDBOptions"; @@ -16,6 +17,7 @@ import WeaviateDBOptions from "@/components/VectorDBSelection/WeaviateDBOptions" import LanceDBOptions from "@/components/VectorDBSelection/LanceDBOptions"; import MilvusOptions from "@/components/VectorDBSelection/MilvusDBOptions"; import ZillizCloudOptions from "@/components/VectorDBSelection/ZillizCloudOptions"; +import AstraDBOptions from "@/components/VectorDBSelection/AstraDBOptions"; import showToast from "@/utils/toast"; import { useNavigate } from "react-router-dom"; import VectorDBItem from "@/components/VectorDBSelection/VectorDBItem"; @@ -100,6 +102,13 @@ export default function VectorDatabaseConnection({ options: <MilvusOptions settings={settings} />, description: "Open-source, highly scalable, and blazing fast.", }, + { + name: "AstraDB", + value: "astra", + logo: AstraDBLogo, + options: <AstraDBOptions settings={settings} />, + description: "Vector Search for Real-world GenAI.", + }, ]; function handleForward() { diff --git a/server/.env.example b/server/.env.example index e44748b41f5eb9c01e795e3f9f891c1087385c0e..f497fea91de8a9f09688d541e894eafdd94389ba 100644 --- a/server/.env.example +++ b/server/.env.example @@ -76,6 +76,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # PINECONE_API_KEY= # PINECONE_INDEX= +# Enable all below if you are using vector database: Astra DB. +# VECTOR_DB="astra" +# ASTRA_DB_APPLICATION_TOKEN= +# ASTRA_DB_ENDPOINT= + # Enable all below if you are using vector database: LanceDB. VECTOR_DB="lancedb" diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index b4e93bde627823c6ede6951c972d8990a3a84ed2..b8c46524cfaff11fb8b34f5fe73bb10ae0b5105d 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -68,6 +68,12 @@ const SystemSettings = { ZillizApiToken: process.env.ZILLIZ_API_TOKEN, } : {}), + ...(vectorDB === "astra" + ? { + AstraDBApplicationToken: process?.env?.ASTRA_DB_APPLICATION_TOKEN, + AstraDBEndpoint: process?.env?.ASTRA_DB_ENDPOINT, + } + : {}), LLMProvider: llmProvider, ...(llmProvider === "openai" ? { diff --git a/server/package.json b/server/package.json index c8a41b795eb63243d821f09e3f244792a54032d3..bf1b85c06e866fd1425aa794d833029812ea8fad 100644 --- a/server/package.json +++ b/server/package.json @@ -22,6 +22,7 @@ "dependencies": { "@anthropic-ai/sdk": "^0.8.1", "@azure/openai": "1.0.0-beta.10", + "@datastax/astra-db-ts": "^0.1.3", "@google/generative-ai": "^0.1.3", "@googleapis/youtube": "^9.0.0", "@pinecone-database/pinecone": "^2.0.1", diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index b72bb7977b7857b788c7b977950d7c44560cf673..53a76faebf269ea7bbf4965261d932d1f83d7042 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -22,6 +22,9 @@ function getVectorDbClass() { case "zilliz": const { Zilliz } = require("../vectorDbProviders/zilliz"); return Zilliz; + case "astra": + const { AstraDB } = require("../vectorDbProviders/astra"); + return AstraDB; default: throw new Error("ENV: No VECTOR_DB value found in environment!"); } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index b061061e82de0ac7f296e9453d38568716bf0ed6..50b423474a8c12327572520c884b259f835172f0 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -204,6 +204,17 @@ const KEY_MAPPING = { checks: [isNotEmpty], }, + // Astra DB Options + + AstraDBApplicationToken: { + envKey: "ASTRA_DB_APPLICATION_TOKEN", + checks: [isNotEmpty], + }, + AstraDBEndpoint: { + envKey: "ASTRA_DB_ENDPOINT", + checks: [isNotEmpty], + }, + // Together Ai Options TogetherAiApiKey: { envKey: "TOGETHER_AI_API_KEY", @@ -322,6 +333,7 @@ function supportedVectorDB(input = "") { "qdrant", "milvus", "zilliz", + "astra", ]; return supported.includes(input) ? null diff --git a/server/utils/vectorDbProviders/astra/ASTRA_SETUP.md b/server/utils/vectorDbProviders/astra/ASTRA_SETUP.md new file mode 100644 index 0000000000000000000000000000000000000000..e3749f0779468a415735a7e64ca79df65f52d534 --- /dev/null +++ b/server/utils/vectorDbProviders/astra/ASTRA_SETUP.md @@ -0,0 +1,22 @@ +# How to setup Astra Vector Database for AnythingLLM + +[Official Astra DB Docs](https://docs.datastax.com/en/astra/astra-db-vector/get-started/quickstart.html) for reference. + +### How to get started + +**Requirements** + +- Astra Vector Database with active status. + +**Instructions** + +- [Create an Astra account or sign in to an existing Astra account](astra.datastax.com) +- Create an Astra Serverless(Vector) Database. +- Make sure DB is in active state. +- Get `API ENDPOINT`and `Application Token` from Overview screen + +``` +VECTOR_DB="astra" +ASTRA_DB_ENDPOINT=Astra DB API endpoint +ASTRA_DB_APPLICATION_TOKEN=AstraCS:.. +``` diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js new file mode 100644 index 0000000000000000000000000000000000000000..df983d4f488a393375fc3f8c88cb07af17f5e224 --- /dev/null +++ b/server/utils/vectorDbProviders/astra/index.js @@ -0,0 +1,380 @@ +const { AstraDB: AstraClient } = require("@datastax/astra-db-ts"); +const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter"); +const { storeVectorResult, cachedVectorInformation } = require("../../files"); +const { v4: uuidv4 } = require("uuid"); +const { + toChunks, + getLLMProvider, + getEmbeddingEngineSelection, +} = require("../../helpers"); + +const AstraDB = { + name: "AstraDB", + connect: async function () { + if (process.env.VECTOR_DB !== "astra") + throw new Error("AstraDB::Invalid ENV settings"); + + const client = new AstraClient( + process?.env?.ASTRA_DB_APPLICATION_TOKEN, + process?.env?.ASTRA_DB_ENDPOINT + ); + return { client }; + }, + heartbeat: async function () { + return { heartbeat: Number(new Date()) }; + }, + // Astra interface will return a valid collection object even if the collection + // does not actually exist. So we run a simple check which will always throw + // when the table truly does not exist. Faster than iterating all collections. + isRealCollection: async function (astraCollection = null) { + if (!astraCollection) return false; + return await astraCollection + .countDocuments() + .then(() => true) + .catch(() => false); + }, + totalVectors: async function () { + const { client } = await this.connect(); + const collectionNames = await this.allNamespaces(client); + var totalVectors = 0; + for (const name of collectionNames) { + const collection = await client.collection(name).catch(() => null); + const count = await collection.countDocuments().catch(() => 0); + totalVectors += count ? count : 0; + } + return totalVectors; + }, + namespaceCount: async function (_namespace = null) { + const { client } = await this.connect(); + const namespace = await this.namespace(client, _namespace); + return namespace?.vectorCount || 0; + }, + namespace: async function (client, namespace = null) { + if (!namespace) throw new Error("No namespace value provided."); + const collection = await client.collection(namespace).catch(() => null); + if (!(await this.isRealCollection(collection))) return null; + + const count = await collection.countDocuments().catch((e) => { + console.error("Astra::namespaceExists", e.message); + return null; + }); + + return { + name: namespace, + ...collection, + vectorCount: typeof count === "number" ? count : 0, + }; + }, + hasNamespace: async function (namespace = null) { + if (!namespace) return false; + const { client } = await this.connect(); + return await this.namespaceExists(client, namespace); + }, + namespaceExists: async function (client, namespace = null) { + if (!namespace) throw new Error("No namespace value provided."); + const collection = await client.collection(namespace); + return await this.isRealCollection(collection); + }, + deleteVectorsInNamespace: async function (client, namespace = null) { + await client.dropCollection(namespace); + return true; + }, + // AstraDB requires a dimension aspect for collection creation + // we pass this in from the first chunk to infer the dimensions like other + // providers do. + getOrCreateCollection: async function (client, namespace, dimensions = null) { + const isExists = await this.namespaceExists(client, namespace); + if (!isExists) { + if (!dimensions) + throw new Error( + `AstraDB:getOrCreateCollection Unable to infer vector dimension from input. Open an issue on Github for support.` + ); + + await client.createCollection(namespace, { + vector: { + dimension: dimensions, + metric: "cosine", + }, + }); + } + return await client.collection(namespace); + }, + addDocumentToNamespace: async function ( + namespace, + documentData = {}, + fullFilePath = null + ) { + const { DocumentVectors } = require("../../../models/vectors"); + try { + let vectorDimension = null; + const { pageContent, docId, ...metadata } = documentData; + if (!pageContent || pageContent.length == 0) return false; + + console.log("Adding new vectorized document into namespace", namespace); + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + const { chunks } = cacheResult; + const documentVectors = []; + vectorDimension = chunks[0][0].values.length || null; + + const collection = await this.getOrCreateCollection( + client, + namespace, + vectorDimension + ); + if (!(await this.isRealCollection(collection))) + throw new Error("Failed to create new AstraDB collection!", { + namespace, + }); + + for (const chunk of chunks) { + // Before sending to Astra and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + const newChunks = chunk.map((chunk) => { + const _id = uuidv4(); + documentVectors.push({ docId, vectorId: _id }); + return { + _id: _id, + $vector: chunk.values, + metadata: chunk.metadata || {}, + }; + }); + + await collection.insertMany(newChunks); + } + await DocumentVectors.bulkInsert(documentVectors); + return { vectorized: true, error: null }; + } + + const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: + getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000, + chunkOverlap: 20, + }); + const textChunks = await textSplitter.splitText(pageContent); + + console.log("Chunks created from document:", textChunks.length); + const LLMConnector = getLLMProvider(); + const documentVectors = []; + const vectors = []; + const vectorValues = await LLMConnector.embedChunks(textChunks); + + if (!!vectorValues && vectorValues.length > 0) { + for (const [i, vector] of vectorValues.entries()) { + if (!vectorDimension) vectorDimension = vector.length; + const vectorRecord = { + _id: uuidv4(), + $vector: vector, + metadata: { ...metadata, text: textChunks[i] }, + }; + + vectors.push(vectorRecord); + documentVectors.push({ docId, vectorId: vectorRecord._id }); + } + } else { + throw new Error( + "Could not embed document chunks! This document will not be recorded." + ); + } + const { client } = await this.connect(); + const collection = await this.getOrCreateCollection( + client, + namespace, + vectorDimension + ); + if (!(await this.isRealCollection(collection))) + throw new Error("Failed to create new AstraDB collection!", { + namespace, + }); + + if (vectors.length > 0) { + const chunks = []; + + console.log("Inserting vectorized chunks into Astra DB."); + + // AstraDB has maximum upsert size of 20 records per-request so we have to use a lower chunk size here + // in order to do the queries - this takes a lot more time than other providers but there + // is no way around it. This will save the vector-cache with the same layout, so we don't + // have to chunk again for cached files. + for (const chunk of toChunks(vectors, 20)) { + chunks.push( + chunk.map((c) => { + return { id: c._id, values: c.$vector, metadata: c.metadata }; + }) + ); + await collection.insertMany(chunk); + } + await storeVectorResult(chunks, fullFilePath); + } + + await DocumentVectors.bulkInsert(documentVectors); + return { vectorized: true, error: null }; + } catch (e) { + console.error("addDocumentToNamespace", e.message); + return { vectorized: false, error: e.message }; + } + }, + deleteDocumentFromNamespace: async function (namespace, docId) { + const { DocumentVectors } = require("../../../models/vectors"); + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) + throw new Error( + "Invalid namespace - has it been collected and populated yet?" + ); + const collection = await client.collection(namespace); + + const knownDocuments = await DocumentVectors.where({ docId }); + if (knownDocuments.length === 0) return; + + const vectorIds = knownDocuments.map((doc) => doc.vectorId); + for (const id of vectorIds) { + await collection.deleteMany({ + _id: id, + }); + } + + const indexes = knownDocuments.map((doc) => doc.id); + await DocumentVectors.deleteIds(indexes); + return true; + }, + performSimilaritySearch: async function ({ + namespace = null, + input = "", + LLMConnector = null, + similarityThreshold = 0.25, + topN = 4, + }) { + if (!namespace || !input || !LLMConnector) + throw new Error("Invalid request to performSimilaritySearch."); + + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) { + return { + contextTexts: [], + sources: [], + message: + "Invalid query - no namespace found for workspace in vector db!", + }; + } + + const queryVector = await LLMConnector.embedTextInput(input); + const { contextTexts, sourceDocuments } = await this.similarityResponse( + client, + namespace, + queryVector, + similarityThreshold, + topN + ); + + const sources = sourceDocuments.map((metadata, i) => { + return { ...metadata, text: contextTexts[i] }; + }); + return { + contextTexts, + sources: this.curateSources(sources), + message: false, + }; + }, + similarityResponse: async function ( + client, + namespace, + queryVector, + similarityThreshold = 0.25, + topN = 4 + ) { + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + const collection = await client.collection(namespace); + const responses = await collection + .find( + {}, + { + sort: { $vector: queryVector }, + limit: topN, + includeSimilarity: true, + } + ) + .toArray(); + + responses.forEach((response) => { + if (response.$similarity < similarityThreshold) return; + result.contextTexts.push(response.metadata.text); + result.sourceDocuments.push(response); + result.scores.push(response.$similarity); + }); + return result; + }, + allNamespaces: async function (client) { + try { + let header = new Headers(); + header.append("Token", client?.httpClient?.applicationToken); + header.append("Content-Type", "application/json"); + + let raw = JSON.stringify({ + findCollections: {}, + }); + + let requestOptions = { + method: "POST", + headers: header, + body: raw, + redirect: "follow", + }; + + const call = await fetch(client?.httpClient?.baseUrl, requestOptions); + const resp = await call?.text(); + const collections = resp ? JSON.parse(resp)?.status?.collections : []; + return collections; + } catch (e) { + console.error("Astra::AllNamespace", e); + return []; + } + }, + "namespace-stats": async function (reqBody = {}) { + const { namespace = null } = reqBody; + if (!namespace) throw new Error("namespace required"); + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) + throw new Error("Namespace by that name does not exist."); + const stats = await this.namespace(client, namespace); + return stats + ? stats + : { message: "No stats were able to be fetched from DB for namespace" }; + }, + "delete-namespace": async function (reqBody = {}) { + const { namespace = null } = reqBody; + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) + throw new Error("Namespace by that name does not exist."); + + const details = await this.namespace(client, namespace); + await this.deleteVectorsInNamespace(client, namespace); + return { + message: `Namespace ${namespace} was deleted along with ${ + details?.vectorCount || "all" + } vectors.`, + }; + }, + curateSources: function (sources = []) { + const documents = []; + for (const source of sources) { + if (Object.keys(source).length > 0) { + const metadata = source.hasOwnProperty("metadata") + ? source.metadata + : source; + documents.push({ + ...metadata, + }); + } + } + + return documents; + }, +}; + +module.exports.AstraDB = AstraDB; diff --git a/server/yarn.lock b/server/yarn.lock index 67b4e11b67c7d7681fdc1ce22ffa2f9f8d1fbc20..ecc8f1a75555e57078e20566c12d814f70661aba 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -174,6 +174,15 @@ enabled "2.0.x" kuler "^2.0.0" +"@datastax/astra-db-ts@^0.1.3": + version "0.1.3" + resolved "https://registry.yarnpkg.com/@datastax/astra-db-ts/-/astra-db-ts-0.1.3.tgz#fcc25cda8d146c06278860054f09d687ff031568" + integrity sha512-7lnpym0HhUtfJVd8+vu6vYdDQpFyYof7TVLFVD2fgoIjUwj3EksFXmqDqicLAlLferZDllqSVthX9pXQ5Rdapw== + dependencies: + axios "^1.4.0" + bson "^6.2.0" + winston "^3.7.2" + "@eslint-community/eslint-utils@^4.2.0": version "4.4.0" resolved "https://registry.yarnpkg.com/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz#a23514e8fb9af1269d5f7788aa556798d61c6b59" @@ -1353,6 +1362,11 @@ braces@~3.0.2: dependencies: fill-range "^7.0.1" +bson@^6.2.0: + version "6.2.0" + resolved "https://registry.yarnpkg.com/bson/-/bson-6.2.0.tgz#4b6acafc266ba18eeee111373c2699304a9ba0a3" + integrity sha512-ID1cI+7bazPDyL9wYy9GaQ8gEEohWvcUl/Yf0dIdutJxnmInEEyCsb4awy/OiBfall7zBA179Pahi3vCdFze3Q== + btoa-lite@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/btoa-lite/-/btoa-lite-1.0.0.tgz#337766da15801210fdd956c22e9c6891ab9d0337" @@ -5636,7 +5650,7 @@ winston-transport@^4.5.0: readable-stream "^3.6.0" triple-beam "^1.3.0" -winston@^3.9.0: +winston@^3.7.2, winston@^3.9.0: version "3.11.0" resolved "https://registry.yarnpkg.com/winston/-/winston-3.11.0.tgz#2d50b0a695a2758bb1c95279f0a88e858163ed91" integrity sha512-L3yR6/MzZAOl0DsysUXHVjOwv8mKZ71TrA/41EIduGpOOV5LQVodqN+QdQ6BS6PJ/RdIshZhq84P/fStEZkk7g==