diff --git a/docker/.env.example b/docker/.env.example index 7fedf944c2fb277fe527f1d5e581710d84adbd46..23789af45847e87f55aa5c8de0d144c232159565 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -124,6 +124,10 @@ GID='1000' # COHERE_API_KEY= # EMBEDDING_MODEL_PREF='embed-english-v3.0' +# EMBEDDING_ENGINE='voyageai' +# VOYAGEAI_API_KEY= +# EMBEDDING_MODEL_PREF='voyage-large-2-instruct' + ########################################### ######## Vector Database Selection ######## ########################################### diff --git a/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx new file mode 100644 index 0000000000000000000000000000000000000000..33ce693db4067a36c5bed858212bcb899caf8490 --- /dev/null +++ b/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx @@ -0,0 +1,50 @@ +export default function VoyageAiOptions({ settings }) { + return ( + <div className="w-full flex flex-col gap-y-4"> + <div className="w-full flex items-center gap-4"> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + API Key + </label> + <input + type="password" + name="VoyageAiApiKey" + className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="Voyage AI API Key" + defaultValue={settings?.VoyageAiApiKey ? "*".repeat(20) : ""} + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + Model Preference + </label> + <select + name="EmbeddingModelPref" + required={true} + defaultValue={settings?.EmbeddingModelPref} + className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + > + <optgroup label="Available embedding models"> + {[ + "voyage-large-2-instruct", + "voyage-law-2", + "voyage-code-2", + "voyage-large-2", + "voyage-2", + ].map((model) => { + return ( + <option key={model} value={model}> + {model} + </option> + ); + })} + </optgroup> + </select> + </div> + </div> + </div> + ); +} diff --git a/frontend/src/media/embeddingprovider/voyageai.png b/frontend/src/media/embeddingprovider/voyageai.png new file mode 100644 index 0000000000000000000000000000000000000000..4fd57eaac410eebba278abadf2a2ed882de7959c Binary files /dev/null and b/frontend/src/media/embeddingprovider/voyageai.png differ diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx index 8f234b5ac1dce928ecb70cc3c95cb83498763004..5a0f51c1df55092f47385469aa35f47ecf34f2e5 100644 --- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx @@ -10,6 +10,8 @@ import LocalAiLogo from "@/media/llmprovider/localai.png"; import OllamaLogo from "@/media/llmprovider/ollama.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import CohereLogo from "@/media/llmprovider/cohere.png"; +import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png"; + import PreLoader from "@/components/Preloader"; import ChangeWarningModal from "@/components/ChangeWarning"; import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions"; @@ -19,6 +21,7 @@ import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbedd import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions"; import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions"; import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOptions"; +import VoyageAiOptions from "@/components/EmbeddingSelection/VoyageAiOptions"; import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; @@ -78,6 +81,13 @@ const EMBEDDERS = [ options: (settings) => <CohereEmbeddingOptions settings={settings} />, description: "Run powerful embedding models from Cohere.", }, + { + name: "Voyage AI", + value: "voyageai", + logo: VoyageAiLogo, + options: (settings) => <VoyageAiOptions settings={settings} />, + description: "Run powerful embedding models from Voyage AI.", + }, ]; export default function GeneralEmbeddingPreference() { diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index b6ae8cb20d329b3c49db87abb8f3ee6d1a25fef5..35358636d0955e2ff991cb543daa3e6a33e0a75e 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -28,6 +28,8 @@ import LanceDbLogo from "@/media/vectordbs/lancedb.png"; import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; import MilvusLogo from "@/media/vectordbs/milvus.png"; +import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png"; + import React, { useState, useEffect } from "react"; import paths from "@/utils/paths"; import { useNavigate } from "react-router-dom"; @@ -292,6 +294,13 @@ export const EMBEDDING_ENGINE_PRIVACY = { ], logo: CohereLogo, }, + voyageai: { + name: "Voyage AI", + description: [ + "Data sent to Voyage AI's servers is shared according to the terms of service of voyageai.com.", + ], + logo: VoyageAiLogo, + }, }; export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) { diff --git a/server/.env.example b/server/.env.example index 4be9ab75e86a0185adcef5285aa1fbbbe0a8a9e3..e38250beb37ad413eaa5b226b28f8057ee95caec 100644 --- a/server/.env.example +++ b/server/.env.example @@ -121,6 +121,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # COHERE_API_KEY= # EMBEDDING_MODEL_PREF='embed-english-v3.0' +# EMBEDDING_ENGINE='voyageai' +# VOYAGEAI_API_KEY= +# EMBEDDING_MODEL_PREF='voyage-large-2-instruct' + ########################################### ######## Vector Database Selection ######## ########################################### diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index 7cd2dd4705e4f58dce5db66b337f13c08de5fbe3..cbbf1f236343a1796ca4c516eaced58468b8168a 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -498,15 +498,18 @@ function apiWorkspaceEndpoints(app) { const { slug = null } = request.params; const { docPath, pinStatus = false } = reqBody(request); const workspace = await Workspace.get({ slug }); - + const document = await Document.get({ workspaceId: workspace.id, docpath: docPath, }); if (!document) return response.sendStatus(404).end(); - + await Document.update(document.id, { pinned: pinStatus }); - return response.status(200).json({ message: 'Pin status updated successfully' }).end(); + return response + .status(200) + .json({ message: "Pin status updated successfully" }) + .end(); } catch (error) { console.error("Error processing the pin status update:", error); return response.status(500).end(); diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index c8e239f154508bddc902f15a1a784aab6eefb508..a5bb6a23c318d2823225fa80f5f7edeb768950a7 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -426,6 +426,9 @@ const SystemSettings = { // Cohere API Keys CohereApiKey: !!process.env.COHERE_API_KEY, CohereModelPref: process.env.COHERE_MODEL_PREF, + + // VoyageAi API Keys + VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY, }; }, diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index b98891c9d4bb25911ea4419af5d24e7cc73026e6..8616943c2d48b543cb1444bbc3e7c3f55ad32e4e 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -1999,7 +1999,8 @@ } } } - },"/v1/workspace/{slug}/update-pin": { + }, + "/workspace/{slug}/update-pin": { "post": { "tags": [ "Workspaces" @@ -2037,6 +2038,9 @@ } } }, + "403": { + "description": "Forbidden" + }, "404": { "description": "Document not found" }, @@ -2047,20 +2051,12 @@ "requestBody": { "description": "JSON object with the document path and pin status to update.", "required": true, + "type": "object", "content": { "application/json": { - "schema": { - "type": "object", - "properties": { - "docPath": { - "type": "string", - "example": "custom-documents/my-pdf.pdf-hash.json" - }, - "pinStatus": { - "type": "boolean", - "example": true - } - } + "example": { + "docPath": "custom-documents/my-pdf.pdf-hash.json", + "pinStatus": true } } } diff --git a/server/utils/EmbeddingEngines/voyageAi/index.js b/server/utils/EmbeddingEngines/voyageAi/index.js new file mode 100644 index 0000000000000000000000000000000000000000..b25d3208d5240282bc08c0d8d344f360390ccc40 --- /dev/null +++ b/server/utils/EmbeddingEngines/voyageAi/index.js @@ -0,0 +1,45 @@ +class VoyageAiEmbedder { + constructor() { + if (!process.env.VOYAGEAI_API_KEY) + throw new Error("No Voyage AI API key was set."); + + const { + VoyageEmbeddings, + } = require("@langchain/community/embeddings/voyage"); + const voyage = new VoyageEmbeddings({ + apiKey: process.env.VOYAGEAI_API_KEY, + }); + + this.voyage = voyage; + this.model = process.env.EMBEDDING_MODEL_PREF || "voyage-large-2-instruct"; + + // Limit of how many strings we can process in a single pass to stay with resource or network limits + this.batchSize = 128; // Voyage AI's limit per request is 128 https://docs.voyageai.com/docs/rate-limits#use-larger-batches + this.embeddingMaxChunkLength = 4000; // https://docs.voyageai.com/docs/embeddings - assume a token is roughly 4 letters with some padding + } + + async embedTextInput(textInput) { + const result = await this.voyage.embedDocuments( + Array.isArray(textInput) ? textInput : [textInput], + { modelName: this.model } + ); + return result || []; + } + + async embedChunks(textChunks = []) { + try { + const embeddings = await this.voyage.embedDocuments(textChunks, { + modelName: this.model, + batchSize: this.batchSize, + }); + return embeddings; + } catch (error) { + console.error("Voyage AI Failed to embed:", error); + throw error; + } + } +} + +module.exports = { + VoyageAiEmbedder, +}; diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index d9a1ba090b17b4d1f31e4cbec63b269b67b84387..e60202a6025db055c251ad252d58585a0c6adcfb 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -125,6 +125,9 @@ function getEmbeddingEngineSelection() { case "cohere": const { CohereEmbedder } = require("../EmbeddingEngines/cohere"); return new CohereEmbedder(); + case "voyageai": + const { VoyageAiEmbedder } = require("../EmbeddingEngines/voyageAi"); + return new VoyageAiEmbedder(); default: return new NativeEmbedder(); } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 48c98e9570b5dd1edcadeb0c8409d567fe7e1bcf..401541634202980469d676d090cd1eea3be1847f 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -350,6 +350,12 @@ const KEY_MAPPING = { checks: [isNotEmpty], }, + // VoyageAi Options + VoyageAiApiKey: { + envKey: "VOYAGEAI_API_KEY", + checks: [isNotEmpty], + }, + // Whisper (transcription) providers WhisperProvider: { envKey: "WHISPER_PROVIDER", @@ -545,6 +551,7 @@ function supportedEmbeddingModel(input = "") { "ollama", "lmstudio", "cohere", + "voyageai", ]; return supported.includes(input) ? null