diff --git a/README.md b/README.md index b8202ecd0f0b12ba8a66a6f2122bdf2e32e80b92..90adb0430f775fc4230a18beff6b17ae7e19acb4 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace - [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service) - [AWS Bedrock](https://aws.amazon.com/bedrock/) - [Anthropic](https://www.anthropic.com/) +- [NVIDIA NIM (chat models)](https://build.nvidia.com/explore/discover) - [Google Gemini Pro](https://ai.google.dev/) - [Hugging Face (chat models)](https://huggingface.co/) - [Ollama (chat models)](https://ollama.ai/) diff --git a/docker/.env.example b/docker/.env.example index 2b3d10629db537d5cfb2a4f0c62b87f99bd099d8..ee53c718bc644bc4b8a8299d502a177c104073cc 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -117,6 +117,10 @@ GID='1000' # XAI_LLM_API_KEY='xai-your-api-key-here' # XAI_LLM_MODEL_PREF='grok-beta' +# LLM_PROVIDER='nvidia-nim' +# NVIDIA_NIM_LLM_BASE_PATH='http://127.0.0.1:8000' +# NVIDIA_NIM_LLM_MODEL_PREF='meta/llama-3.2-3b-instruct' + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/frontend/src/components/LLMSelection/NvidiaNimOptions/index.jsx b/frontend/src/components/LLMSelection/NvidiaNimOptions/index.jsx new file mode 100644 index 0000000000000000000000000000000000000000..ed8747a8fea26ac8df661eb9ac82b9aec4b67f92 --- /dev/null +++ b/frontend/src/components/LLMSelection/NvidiaNimOptions/index.jsx @@ -0,0 +1,11 @@ +import RemoteNvidiaNimOptions from "./remote"; +import ManagedNvidiaNimOptions from "./managed"; + +export default function NvidiaNimOptions({ settings }) { + const version = "remote"; // static to "remote" when in docker version. + return version === "remote" ? ( + <RemoteNvidiaNimOptions settings={settings} /> + ) : ( + <ManagedNvidiaNimOptions settings={settings} /> + ); +} diff --git a/frontend/src/components/LLMSelection/NvidiaNimOptions/managed.jsx b/frontend/src/components/LLMSelection/NvidiaNimOptions/managed.jsx new file mode 100644 index 0000000000000000000000000000000000000000..0dce898abc10f70ee5df49d1d6ca4b6bb6bdc7c6 --- /dev/null +++ b/frontend/src/components/LLMSelection/NvidiaNimOptions/managed.jsx @@ -0,0 +1,7 @@ +/** + * This component is used to select, start, and manage NVIDIA NIM + * containers and images via docker management tools. + */ +export default function ManagedNvidiaNimOptions({ settings }) { + return null; +} diff --git a/frontend/src/components/LLMSelection/NvidiaNimOptions/remote.jsx b/frontend/src/components/LLMSelection/NvidiaNimOptions/remote.jsx new file mode 100644 index 0000000000000000000000000000000000000000..f1fa4153d9875a643ba3f17a63f3b1163c92a923 --- /dev/null +++ b/frontend/src/components/LLMSelection/NvidiaNimOptions/remote.jsx @@ -0,0 +1,130 @@ +import PreLoader from "@/components/Preloader"; +import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery"; +import System from "@/models/system"; +import { NVIDIA_NIM_COMMON_URLS } from "@/utils/constants"; +import { useState, useEffect } from "react"; + +/** + * This component is used to select a remote Nvidia NIM model endpoint + * This is the default component and way to connect to NVIDIA NIM + * as the "managed" provider can only work in the Desktop context. + */ +export default function RemoteNvidiaNimOptions({ settings }) { + const { + autoDetecting: loading, + basePath, + basePathValue, + handleAutoDetectClick, + } = useProviderEndpointAutoDiscovery({ + provider: "nvidia-nim", + initialBasePath: settings?.NvidiaNimLLMBasePath, + ENDPOINTS: NVIDIA_NIM_COMMON_URLS, + }); + + return ( + <div className="flex gap-[36px] mt-1.5"> + <div className="flex flex-col w-60"> + <div className="flex justify-between items-center mb-2"> + <label className="text-white text-sm font-semibold"> + Nvidia Nim Base URL + </label> + {loading ? ( + <PreLoader size="6" /> + ) : ( + <> + {!basePathValue.value && ( + <button + onClick={handleAutoDetectClick} + className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]" + > + Auto-Detect + </button> + )} + </> + )} + </div> + <input + type="url" + name="NvidiaNimLLMBasePath" + className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5" + placeholder="http://localhost:8000/v1" + value={basePathValue.value} + required={true} + autoComplete="off" + spellCheck={false} + onChange={basePath.onChange} + onBlur={basePath.onBlur} + /> + <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2"> + Enter the URL where Nvidia NIM is running. + </p> + </div> + {!settings?.credentialsOnly && ( + <NvidiaNimModelSelection + settings={settings} + basePath={basePath.value} + /> + )} + </div> + ); +} +function NvidiaNimModelSelection({ settings, basePath }) { + const [models, setModels] = useState([]); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findCustomModels() { + setLoading(true); + const { models } = await System.customModels( + "nvidia-nim", + null, + basePath + ); + setModels(models); + setLoading(false); + } + findCustomModels(); + }, [basePath]); + + if (loading || models.length === 0) { + return ( + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-3"> + Chat Model Selection + </label> + <select + name="NvidiaNimLLMModelPref" + disabled={true} + className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + > + <option disabled={true} selected={true}> + -- loading available models -- + </option> + </select> + </div> + ); + } + + return ( + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-3"> + Chat Model Selection + </label> + <select + name="NvidiaNimLLMModelPref" + required={true} + className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + > + {models.map((model) => ( + <option + key={model.id} + value={model.id} + selected={settings?.NvidiaNimLLMModelPref === model.id} + > + {model.name} + </option> + ))} + </select> + </div> + ); +} diff --git a/frontend/src/hooks/useGetProvidersModels.js b/frontend/src/hooks/useGetProvidersModels.js index 57cf650188ba6a9364d8b86e75132b21624e5081..52e84122bae04a5a2fa8696b982eb0704a5a344d 100644 --- a/frontend/src/hooks/useGetProvidersModels.js +++ b/frontend/src/hooks/useGetProvidersModels.js @@ -43,6 +43,7 @@ const PROVIDER_DEFAULT_MODELS = { ollama: [], togetherai: [], fireworksai: [], + "nvidia-nim": [], groq: [], native: [], cohere: [ diff --git a/frontend/src/media/llmprovider/nvidia-nim.png b/frontend/src/media/llmprovider/nvidia-nim.png new file mode 100644 index 0000000000000000000000000000000000000000..cdec289cf6f28b4a6cb62c868525ed2f9a7d740f Binary files /dev/null and b/frontend/src/media/llmprovider/nvidia-nim.png differ diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index 14c5ea11d08b4804f0cf5a76d752f79cec4dc07e..f4cead2b81c7e6789aa1f6bf42aed932bd695252 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -29,6 +29,7 @@ import AWSBedrockLogo from "@/media/llmprovider/bedrock.png"; import DeepSeekLogo from "@/media/llmprovider/deepseek.png"; import APIPieLogo from "@/media/llmprovider/apipie.png"; import XAILogo from "@/media/llmprovider/xai.png"; +import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png"; import PreLoader from "@/components/Preloader"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; @@ -56,6 +57,7 @@ import AWSBedrockLLMOptions from "@/components/LLMSelection/AwsBedrockLLMOptions import DeepSeekOptions from "@/components/LLMSelection/DeepSeekOptions"; import ApiPieLLMOptions from "@/components/LLMSelection/ApiPieOptions"; import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions"; +import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; @@ -94,6 +96,15 @@ export const AVAILABLE_LLM_PROVIDERS = [ description: "Google's largest and most capable AI model", requiredConfig: ["GeminiLLMApiKey"], }, + { + name: "Nvidia NIM", + value: "nvidia-nim", + logo: NvidiaNimLogo, + options: (settings) => <NvidiaNimOptions settings={settings} />, + description: + "Run full parameter LLMs directly on your GPU using Nvidia's inference microservice via Docker.", + requiredConfig: ["NvidiaNimLLMBasePath"], + }, { name: "HuggingFace", value: "huggingface", diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index ab83a5af240c5d67b0042b7aa2c965517666edbf..1fefca235b6e9209458fc636f1224c5471d50243 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -9,6 +9,7 @@ import GeminiLogo from "@/media/llmprovider/gemini.png"; import OllamaLogo from "@/media/llmprovider/ollama.png"; import TogetherAILogo from "@/media/llmprovider/togetherai.png"; import FireworksAILogo from "@/media/llmprovider/fireworksai.jpeg"; +import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import MistralLogo from "@/media/llmprovider/mistral.jpeg"; @@ -76,6 +77,13 @@ export const LLM_SELECTION_PRIVACY = { ], logo: GeminiLogo, }, + "nvidia-nim": { + name: "Nvidia NIM", + description: [ + "Your model and chats are only accessible on the machine running the Nvidia NIM service", + ], + logo: NvidiaNimLogo, + }, lmstudio: { name: "LMStudio", description: [ diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index 69704f19c7899b7225562ff3675ad44a3b5ffade..ea78d6c05cd0d6a3c04d83f18b8a17175e121530 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -24,7 +24,7 @@ import DeepSeekLogo from "@/media/llmprovider/deepseek.png"; import APIPieLogo from "@/media/llmprovider/apipie.png"; import NovitaLogo from "@/media/llmprovider/novita.png"; import XAILogo from "@/media/llmprovider/xai.png"; - +import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png"; import CohereLogo from "@/media/llmprovider/cohere.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions"; @@ -51,6 +51,7 @@ import DeepSeekOptions from "@/components/LLMSelection/DeepSeekOptions"; import ApiPieLLMOptions from "@/components/LLMSelection/ApiPieOptions"; import NovitaLLMOptions from "@/components/LLMSelection/NovitaLLMOptions"; import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions"; +import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import System from "@/models/system"; @@ -91,6 +92,14 @@ const LLMS = [ options: (settings) => <GeminiLLMOptions settings={settings} />, description: "Google's largest and most capable AI model", }, + { + name: "Nvidia NIM", + value: "nvidia-nim", + logo: NvidiaNimLogo, + options: (settings) => <NvidiaNimOptions settings={settings} />, + description: + "Run full parameter LLMs directly on your GPU using Nvidia's inference microservice via Docker.", + }, { name: "HuggingFace", value: "huggingface", diff --git a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx index 6025d295370e2b4c9fcba06663b4628c3822b36f..effc7b7448bf381efcacda306faa40a8771975ea 100644 --- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx @@ -28,6 +28,7 @@ const ENABLED_PROVIDERS = [ "litellm", "apipie", "xai", + "nvidia-nim", // TODO: More agent support. // "cohere", // Has tool calling and will need to build explicit support // "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested. diff --git a/frontend/src/utils/constants.js b/frontend/src/utils/constants.js index 6333ef8bf98a103a173812c6d2fcb2f6dc2bd78d..71f3048b72f48cdb1937a121a7469eba0f19c757 100644 --- a/frontend/src/utils/constants.js +++ b/frontend/src/utils/constants.js @@ -37,6 +37,13 @@ export const LOCALAI_COMMON_URLS = [ "http://172.17.0.1:8080/v1", ]; +export const NVIDIA_NIM_COMMON_URLS = [ + "http://127.0.0.1:8000/v1/version", + "http://localhost:8000/v1/version", + "http://host.docker.internal:8000/v1/version", + "http://172.17.0.1:8000/v1/version", +]; + export function fullApiUrl() { if (API_BASE !== "/api") return API_BASE; return `${window.location.origin}/api`; diff --git a/server/.env.example b/server/.env.example index ba56517a8fb367797a8d3e4d7f4b059704d7b2bb..fb84a9f8d2a8f52aeebd825b172bc3eb9b14ff38 100644 --- a/server/.env.example +++ b/server/.env.example @@ -107,6 +107,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # XAI_LLM_API_KEY='xai-your-api-key-here' # XAI_LLM_MODEL_PREF='grok-beta' +# LLM_PROVIDER='nvidia-nim' +# NVIDIA_NIM_LLM_BASE_PATH='http://127.0.0.1:8000' +# NVIDIA_NIM_LLM_MODEL_PREF='meta/llama-3.2-3b-instruct' + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 58011d868ae6206cfd01be440e57cbe6cd59aedc..3403c0824cd5d287e1bcccf2d35f8f7f5489c500 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -547,6 +547,11 @@ const SystemSettings = { // xAI LLM API Keys XAIApiKey: !!process.env.XAI_LLM_API_KEY, XAIModelPref: process.env.XAI_LLM_MODEL_PREF, + + // Nvidia NIM Keys + NvidiaNimLLMBasePath: process.env.NVIDIA_NIM_LLM_BASE_PATH, + NvidiaNimLLMModelPref: process.env.NVIDIA_NIM_LLM_MODEL_PREF, + NvidiaNimLLMTokenLimit: process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT, }; }, diff --git a/server/utils/AiProviders/nvidiaNim/index.js b/server/utils/AiProviders/nvidiaNim/index.js new file mode 100644 index 0000000000000000000000000000000000000000..6deb7b2e4a0a4f9358ac0e6a68b1be524ef2fd97 --- /dev/null +++ b/server/utils/AiProviders/nvidiaNim/index.js @@ -0,0 +1,220 @@ +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { + handleDefaultStreamResponseV2, +} = require("../../helpers/chat/responses"); + +class NvidiaNimLLM { + constructor(embedder = null, modelPreference = null) { + if (!process.env.NVIDIA_NIM_LLM_BASE_PATH) + throw new Error("No Nvidia NIM API Base Path was set."); + + const { OpenAI: OpenAIApi } = require("openai"); + this.nvidiaNim = new OpenAIApi({ + baseURL: parseNvidiaNimBasePath(process.env.NVIDIA_NIM_LLM_BASE_PATH), + apiKey: null, + }); + + this.model = modelPreference || process.env.NVIDIA_NIM_LLM_MODEL_PREF; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + this.embedder = embedder ?? new NativeEmbedder(); + this.defaultTemp = 0.7; + this.#log( + `Loaded with model: ${this.model} with context window: ${this.promptWindowLimit()}` + ); + } + + #log(text, ...args) { + console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args); + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + /** + * Set the model token limit `NVIDIA_NIM_LLM_TOKEN_LIMIT` for the given model ID + * @param {string} modelId + * @param {string} basePath + * @returns {Promise<void>} + */ + static async setModelTokenLimit(modelId, basePath = null) { + if (!modelId) return; + const { OpenAI: OpenAIApi } = require("openai"); + const openai = new OpenAIApi({ + baseURL: parseNvidiaNimBasePath( + basePath || process.env.NVIDIA_NIM_LLM_BASE_PATH + ), + apiKey: null, + }); + const model = await openai.models + .list() + .then((results) => results.data) + .catch(() => { + return []; + }); + + if (!model.length) return; + const modelInfo = model.find((model) => model.id === modelId); + if (!modelInfo) return; + process.env.NVIDIA_NIM_LLM_TOKEN_LIMIT = Number( + modelInfo.max_model_len || 4096 + ); + } + + streamingEnabled() { + return "streamGetChatCompletion" in this; + } + + static promptWindowLimit(_modelName) { + const limit = process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No Nvidia NIM token context limit was set."); + return Number(limit); + } + + // Ensure the user set a value for the token limit + // and if undefined - assume 4096 window. + promptWindowLimit() { + const limit = process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No Nvidia NIM token context limit was set."); + return Number(limit); + } + + async isValidChatCompletionModel(_ = "") { + return true; + } + + /** + * Generates appropriate content array for a message + attachments. + * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}} + * @returns {string|object[]} + */ + #generateContent({ userPrompt, attachments = [] }) { + if (!attachments.length) { + return userPrompt; + } + + const content = [{ type: "text", text: userPrompt }]; + for (let attachment of attachments) { + content.push({ + type: "image_url", + image_url: { + url: attachment.contentString, + detail: "auto", + }, + }); + } + return content.flat(); + } + + /** + * Construct the user prompt for this model. + * @param {{attachments: import("../../helpers").Attachment[]}} param0 + * @returns + */ + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + attachments = [], + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [ + prompt, + ...chatHistory, + { + role: "user", + content: this.#generateContent({ userPrompt, attachments }), + }, + ]; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + if (!this.model) + throw new Error( + `Nvidia NIM chat: ${this.model} is not valid or defined model for chat completion!` + ); + + const result = await this.nvidiaNim.chat.completions.create({ + model: this.model, + messages, + temperature, + }); + + if (!result.hasOwnProperty("choices") || result.choices.length === 0) + return null; + return result.choices[0].message.content; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + if (!this.model) + throw new Error( + `Nvidia NIM chat: ${this.model} is not valid or defined model for chat completion!` + ); + + const streamRequest = await this.nvidiaNim.chat.completions.create({ + model: this.model, + stream: true, + messages, + temperature, + }); + return streamRequest; + } + + handleStream(response, stream, responseProps) { + return handleDefaultStreamResponseV2(response, stream, responseProps); + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +/** + * Parse the base path for the Nvidia NIM container API. Since the base path must end in /v1 and cannot have a trailing slash, + * and the user can possibly set it to anything and likely incorrectly due to pasting behaviors, we need to ensure it is in the correct format. + * @param {string} basePath + * @returns {string} + */ +function parseNvidiaNimBasePath(providedBasePath = "") { + try { + const baseURL = new URL(providedBasePath); + const basePath = `${baseURL.origin}/v1`; + return basePath; + } catch (e) { + return providedBasePath; + } +} + +module.exports = { + NvidiaNimLLM, + parseNvidiaNimBasePath, +}; diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js index d61867f4d4520725da77be39cc1396e423ef5f54..3c2faf5b1642c2e88ce2d250bfe064d92807c5c3 100644 --- a/server/utils/agents/aibitat/index.js +++ b/server/utils/agents/aibitat/index.js @@ -783,6 +783,8 @@ ${this.getHistory({ to: route.to }) return new Providers.AWSBedrockProvider({}); case "fireworksai": return new Providers.FireworksAIProvider({ model: config.model }); + case "nvidia-nim": + return new Providers.NvidiaNimProvider({ model: config.model }); case "deepseek": return new Providers.DeepSeekProvider({ model: config.model }); case "litellm": diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js index 4ba6840d70bc1a696bc6dc3fc61e089f4070c53b..bd7920611be776d799dcf83ab401e445c7ef2c9b 100644 --- a/server/utils/agents/aibitat/providers/ai-provider.js +++ b/server/utils/agents/aibitat/providers/ai-provider.js @@ -155,6 +155,14 @@ class Provider { apiKey: process.env.XAI_LLM_API_KEY ?? null, ...config, }); + case "novita": + return new ChatOpenAI({ + configuration: { + baseURL: "https://api.novita.ai/v3/openai", + }, + apiKey: process.env.NOVITA_LLM_API_KEY ?? null, + ...config, + }); // OSS Model Runners // case "anythingllm_ollama": @@ -207,12 +215,12 @@ class Provider { apiKey: process.env.LITE_LLM_API_KEY ?? null, ...config, }); - case "novita": + case "nvidia-nim": return new ChatOpenAI({ configuration: { - baseURL: "https://api.novita.ai/v3/openai", + baseURL: process.env.NVIDIA_NIM_LLM_BASE_PATH, }, - apiKey: process.env.NOVITA_LLM_API_KEY ?? null, + apiKey: null, ...config, }); diff --git a/server/utils/agents/aibitat/providers/index.js b/server/utils/agents/aibitat/providers/index.js index c454c39387573b66a85cf00a852b30b886ab197f..e5c01123c6308f8af941f036ad5403da77a48d2d 100644 --- a/server/utils/agents/aibitat/providers/index.js +++ b/server/utils/agents/aibitat/providers/index.js @@ -19,6 +19,7 @@ const LiteLLMProvider = require("./litellm.js"); const ApiPieProvider = require("./apipie.js"); const XAIProvider = require("./xai.js"); const NovitaProvider = require("./novita.js"); +const NvidiaNimProvider = require("./nvidiaNim.js"); module.exports = { OpenAIProvider, @@ -42,4 +43,5 @@ module.exports = { ApiPieProvider, XAIProvider, NovitaProvider, + NvidiaNimProvider, }; diff --git a/server/utils/agents/aibitat/providers/nvidiaNim.js b/server/utils/agents/aibitat/providers/nvidiaNim.js new file mode 100644 index 0000000000000000000000000000000000000000..50132abcb50f881924d39edbd75ff3a773d8a17a --- /dev/null +++ b/server/utils/agents/aibitat/providers/nvidiaNim.js @@ -0,0 +1,117 @@ +const OpenAI = require("openai"); +const Provider = require("./ai-provider.js"); +const InheritMultiple = require("./helpers/classes.js"); +const UnTooled = require("./helpers/untooled.js"); + +/** + * The agent provider for the Nvidia NIM provider. + * We wrap Nvidia NIM in UnTooled because its tool-calling may not be supported for specific models and this normalizes that. + */ +class NvidiaNimProvider extends InheritMultiple([Provider, UnTooled]) { + model; + + constructor(config = {}) { + const { model } = config; + super(); + const client = new OpenAI({ + baseURL: process.env.NVIDIA_NIM_LLM_BASE_PATH, + apiKey: null, + maxRetries: 0, + }); + + this._client = client; + this.model = model; + this.verbose = true; + } + + get client() { + return this._client; + } + + async #handleFunctionCallChat({ messages = [] }) { + return await this.client.chat.completions + .create({ + model: this.model, + temperature: 0, + messages, + }) + .then((result) => { + if (!result.hasOwnProperty("choices")) + throw new Error("Nvidia NIM chat: No results!"); + if (result.choices.length === 0) + throw new Error("Nvidia NIM chat: No results length!"); + return result.choices[0].message.content; + }) + .catch((_) => { + return null; + }); + } + + /** + * Create a completion based on the received messages. + * + * @param messages A list of messages to send to the API. + * @param functions + * @returns The completion. + */ + async complete(messages, functions = null) { + try { + let completion; + if (functions.length > 0) { + const { toolCall, text } = await this.functionCall( + messages, + functions, + this.#handleFunctionCallChat.bind(this) + ); + + if (toolCall !== null) { + this.providerLog(`Valid tool call found - running ${toolCall.name}.`); + this.deduplicator.trackRun(toolCall.name, toolCall.arguments); + return { + result: null, + functionCall: { + name: toolCall.name, + arguments: toolCall.arguments, + }, + cost: 0, + }; + } + completion = { content: text }; + } + + if (!completion?.content) { + this.providerLog( + "Will assume chat completion without tool call inputs." + ); + const response = await this.client.chat.completions.create({ + model: this.model, + messages: this.cleanMsgs(messages), + }); + completion = response.choices[0].message; + } + + // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent + // from calling the exact same function over and over in a loop within a single chat exchange + // _but_ we should enable it to call previously used tools in a new chat interaction. + this.deduplicator.reset("runs"); + return { + result: completion.content, + cost: 0, + }; + } catch (error) { + throw error; + } + } + + /** + * Get the cost of the completion. + * + * @param _usage The completion to get the cost for. + * @returns The cost of the completion. + */ + getCost(_usage) { + return 0; + } +} + +module.exports = NvidiaNimProvider; diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js index 6b1d42af298a2a6c4469fe1e784849ba71297c6d..2263a59682497b49a261cdb78053c38f33c44a7a 100644 --- a/server/utils/agents/index.js +++ b/server/utils/agents/index.js @@ -177,6 +177,12 @@ class AgentHandler { if (!process.env.NOVITA_LLM_API_KEY) throw new Error("Novita API Key must be provided to use agents."); break; + case "nvidia-nim": + if (!process.env.NVIDIA_NIM_LLM_BASE_PATH) + throw new Error( + "Nvidia NIM base path must be provided to use agents." + ); + break; default: throw new Error( @@ -240,6 +246,8 @@ class AgentHandler { return process.env.XAI_LLM_MODEL_PREF ?? "grok-beta"; case "novita": return process.env.NOVITA_LLM_MODEL_PREF ?? "gryphe/mythomax-l2-13b"; + case "nvidia-nim": + return process.env.NVIDIA_NIM_LLM_MODEL_PREF ?? null; default: return null; } diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index 72882d6d1c11810c81983af6de6cec3803a6d228..35ab5570d05eecca84fe68fc0640f73949f582a0 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -6,6 +6,8 @@ const { fireworksAiModels } = require("../AiProviders/fireworksAi"); const { ElevenLabsTTS } = require("../TextToSpeech/elevenLabs"); const { fetchNovitaModels } = require("../AiProviders/novita"); const { parseLMStudioBasePath } = require("../AiProviders/lmStudio"); +const { parseNvidiaNimBasePath } = require("../AiProviders/nvidiaNim"); + const SUPPORT_CUSTOM_MODELS = [ "openai", "localai", @@ -13,6 +15,7 @@ const SUPPORT_CUSTOM_MODELS = [ "native-llm", "togetherai", "fireworksai", + "nvidia-nim", "mistral", "perplexity", "openrouter", @@ -68,6 +71,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await getNovitaModels(); case "xai": return await getXAIModels(apiKey); + case "nvidia-nim": + return await getNvidiaNimModels(basePath); default: return { models: [], error: "Invalid provider for custom models" }; } @@ -520,6 +525,37 @@ async function getXAIModels(_apiKey = null) { return { models, error: null }; } +async function getNvidiaNimModels(basePath = null) { + try { + const { OpenAI: OpenAIApi } = require("openai"); + const openai = new OpenAIApi({ + baseURL: parseNvidiaNimBasePath( + basePath ?? process.env.NVIDIA_NIM_LLM_BASE_PATH + ), + apiKey: null, + }); + const modelResponse = await openai.models + .list() + .then((results) => results.data) + .catch((e) => { + throw new Error(e.message); + }); + + const models = modelResponse.map((model) => { + return { + id: model.id, + name: model.id, + organization: model.owned_by, + }; + }); + + return { models, error: null }; + } catch (e) { + console.error(`Nvidia NIM:getNvidiaNimModels`, e.message); + return { models: [], error: "Could not fetch Nvidia NIM Models" }; + } +} + module.exports = { getCustomModels, }; diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index cbf07fbd0eff1fa7240aaae45ffb0cca567424b6..e599078b6a9ec60cea2d8490b0b2a444f300a206 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -171,6 +171,9 @@ function getLLMProvider({ provider = null, model = null } = {}) { case "xai": const { XAiLLM } = require("../AiProviders/xai"); return new XAiLLM(embedder, model); + case "nvidia-nim": + const { NvidiaNimLLM } = require("../AiProviders/nvidiaNim"); + return new NvidiaNimLLM(embedder, model); default: throw new Error( `ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}` @@ -309,6 +312,9 @@ function getLLMProviderClass({ provider = null } = {}) { case "xai": const { XAiLLM } = require("../AiProviders/xai"); return XAiLLM; + case "nvidia-nim": + const { NvidiaNimLLM } = require("../AiProviders/nvidiaNim"); + return NvidiaNimLLM; default: return null; } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 0af4b839b30cdbb709e18a4b0fbd9fbbf532e9c3..3165dc40a3a4d3d81f0a5f2912b301839b0953a5 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -578,6 +578,29 @@ const KEY_MAPPING = { envKey: "XAI_LLM_MODEL_PREF", checks: [isNotEmpty], }, + + // Nvidia NIM Options + NvidiaNimLLMBasePath: { + envKey: "NVIDIA_NIM_LLM_BASE_PATH", + checks: [isValidURL], + postUpdate: [ + (_, __, nextValue) => { + const { parseNvidiaNimBasePath } = require("../AiProviders/nvidiaNim"); + process.env.NVIDIA_NIM_LLM_BASE_PATH = + parseNvidiaNimBasePath(nextValue); + }, + ], + }, + NvidiaNimLLMModelPref: { + envKey: "NVIDIA_NIM_LLM_MODEL_PREF", + checks: [], + postUpdate: [ + async (_, __, nextValue) => { + const { NvidiaNimLLM } = require("../AiProviders/nvidiaNim"); + await NvidiaNimLLM.setModelTokenLimit(nextValue); + }, + ], + }, }; function isNotEmpty(input = "") { @@ -684,6 +707,7 @@ function supportedLLM(input = "") { "deepseek", "apipie", "xai", + "nvidia-nim", ].includes(input); return validSelection ? null : `${input} is not a valid LLM provider.`; }