Skip to content
Snippets Groups Projects
Unverified Commit b2dd35fe authored by Timothy Carambat's avatar Timothy Carambat Committed by GitHub
Browse files

Add Support for NVIDIA NIM (#2766)

* Add Support for NVIDIA NIM

* update README

* linting
parent 6c9e2342
No related branches found
No related tags found
No related merge requests found
Showing
with 560 additions and 4 deletions
......@@ -78,6 +78,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
- [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
- [AWS Bedrock](https://aws.amazon.com/bedrock/)
- [Anthropic](https://www.anthropic.com/)
- [NVIDIA NIM (chat models)](https://build.nvidia.com/explore/discover)
- [Google Gemini Pro](https://ai.google.dev/)
- [Hugging Face (chat models)](https://huggingface.co/)
- [Ollama (chat models)](https://ollama.ai/)
......
......@@ -117,6 +117,10 @@ GID='1000'
# XAI_LLM_API_KEY='xai-your-api-key-here'
# XAI_LLM_MODEL_PREF='grok-beta'
# LLM_PROVIDER='nvidia-nim'
# NVIDIA_NIM_LLM_BASE_PATH='http://127.0.0.1:8000'
# NVIDIA_NIM_LLM_MODEL_PREF='meta/llama-3.2-3b-instruct'
###########################################
######## Embedding API SElECTION ##########
###########################################
......
import RemoteNvidiaNimOptions from "./remote";
import ManagedNvidiaNimOptions from "./managed";
export default function NvidiaNimOptions({ settings }) {
const version = "remote"; // static to "remote" when in docker version.
return version === "remote" ? (
<RemoteNvidiaNimOptions settings={settings} />
) : (
<ManagedNvidiaNimOptions settings={settings} />
);
}
/**
* This component is used to select, start, and manage NVIDIA NIM
* containers and images via docker management tools.
*/
export default function ManagedNvidiaNimOptions({ settings }) {
return null;
}
import PreLoader from "@/components/Preloader";
import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
import System from "@/models/system";
import { NVIDIA_NIM_COMMON_URLS } from "@/utils/constants";
import { useState, useEffect } from "react";
/**
* This component is used to select a remote Nvidia NIM model endpoint
* This is the default component and way to connect to NVIDIA NIM
* as the "managed" provider can only work in the Desktop context.
*/
export default function RemoteNvidiaNimOptions({ settings }) {
const {
autoDetecting: loading,
basePath,
basePathValue,
handleAutoDetectClick,
} = useProviderEndpointAutoDiscovery({
provider: "nvidia-nim",
initialBasePath: settings?.NvidiaNimLLMBasePath,
ENDPOINTS: NVIDIA_NIM_COMMON_URLS,
});
return (
<div className="flex gap-[36px] mt-1.5">
<div className="flex flex-col w-60">
<div className="flex justify-between items-center mb-2">
<label className="text-white text-sm font-semibold">
Nvidia Nim Base URL
</label>
{loading ? (
<PreLoader size="6" />
) : (
<>
{!basePathValue.value && (
<button
onClick={handleAutoDetectClick}
className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
>
Auto-Detect
</button>
)}
</>
)}
</div>
<input
type="url"
name="NvidiaNimLLMBasePath"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="http://localhost:8000/v1"
value={basePathValue.value}
required={true}
autoComplete="off"
spellCheck={false}
onChange={basePath.onChange}
onBlur={basePath.onBlur}
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Enter the URL where Nvidia NIM is running.
</p>
</div>
{!settings?.credentialsOnly && (
<NvidiaNimModelSelection
settings={settings}
basePath={basePath.value}
/>
)}
</div>
);
}
function NvidiaNimModelSelection({ settings, basePath }) {
const [models, setModels] = useState([]);
const [loading, setLoading] = useState(true);
useEffect(() => {
async function findCustomModels() {
setLoading(true);
const { models } = await System.customModels(
"nvidia-nim",
null,
basePath
);
setModels(models);
setLoading(false);
}
findCustomModels();
}, [basePath]);
if (loading || models.length === 0) {
return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Chat Model Selection
</label>
<select
name="NvidiaNimLLMModelPref"
disabled={true}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
-- loading available models --
</option>
</select>
</div>
);
}
return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Chat Model Selection
</label>
<select
name="NvidiaNimLLMModelPref"
required={true}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{models.map((model) => (
<option
key={model.id}
value={model.id}
selected={settings?.NvidiaNimLLMModelPref === model.id}
>
{model.name}
</option>
))}
</select>
</div>
);
}
......@@ -43,6 +43,7 @@ const PROVIDER_DEFAULT_MODELS = {
ollama: [],
togetherai: [],
fireworksai: [],
"nvidia-nim": [],
groq: [],
native: [],
cohere: [
......
frontend/src/media/llmprovider/nvidia-nim.png

64.4 KiB

......@@ -29,6 +29,7 @@ import AWSBedrockLogo from "@/media/llmprovider/bedrock.png";
import DeepSeekLogo from "@/media/llmprovider/deepseek.png";
import APIPieLogo from "@/media/llmprovider/apipie.png";
import XAILogo from "@/media/llmprovider/xai.png";
import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
import PreLoader from "@/components/Preloader";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
......@@ -56,6 +57,7 @@ import AWSBedrockLLMOptions from "@/components/LLMSelection/AwsBedrockLLMOptions
import DeepSeekOptions from "@/components/LLMSelection/DeepSeekOptions";
import ApiPieLLMOptions from "@/components/LLMSelection/ApiPieOptions";
import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions";
import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions";
import LLMItem from "@/components/LLMSelection/LLMItem";
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
......@@ -94,6 +96,15 @@ export const AVAILABLE_LLM_PROVIDERS = [
description: "Google's largest and most capable AI model",
requiredConfig: ["GeminiLLMApiKey"],
},
{
name: "Nvidia NIM",
value: "nvidia-nim",
logo: NvidiaNimLogo,
options: (settings) => <NvidiaNimOptions settings={settings} />,
description:
"Run full parameter LLMs directly on your GPU using Nvidia's inference microservice via Docker.",
requiredConfig: ["NvidiaNimLLMBasePath"],
},
{
name: "HuggingFace",
value: "huggingface",
......
......@@ -9,6 +9,7 @@ import GeminiLogo from "@/media/llmprovider/gemini.png";
import OllamaLogo from "@/media/llmprovider/ollama.png";
import TogetherAILogo from "@/media/llmprovider/togetherai.png";
import FireworksAILogo from "@/media/llmprovider/fireworksai.jpeg";
import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
import LocalAiLogo from "@/media/llmprovider/localai.png";
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
......@@ -76,6 +77,13 @@ export const LLM_SELECTION_PRIVACY = {
],
logo: GeminiLogo,
},
"nvidia-nim": {
name: "Nvidia NIM",
description: [
"Your model and chats are only accessible on the machine running the Nvidia NIM service",
],
logo: NvidiaNimLogo,
},
lmstudio: {
name: "LMStudio",
description: [
......
......@@ -24,7 +24,7 @@ import DeepSeekLogo from "@/media/llmprovider/deepseek.png";
import APIPieLogo from "@/media/llmprovider/apipie.png";
import NovitaLogo from "@/media/llmprovider/novita.png";
import XAILogo from "@/media/llmprovider/xai.png";
import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
import CohereLogo from "@/media/llmprovider/cohere.png";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
......@@ -51,6 +51,7 @@ import DeepSeekOptions from "@/components/LLMSelection/DeepSeekOptions";
import ApiPieLLMOptions from "@/components/LLMSelection/ApiPieOptions";
import NovitaLLMOptions from "@/components/LLMSelection/NovitaLLMOptions";
import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions";
import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions";
import LLMItem from "@/components/LLMSelection/LLMItem";
import System from "@/models/system";
......@@ -91,6 +92,14 @@ const LLMS = [
options: (settings) => <GeminiLLMOptions settings={settings} />,
description: "Google's largest and most capable AI model",
},
{
name: "Nvidia NIM",
value: "nvidia-nim",
logo: NvidiaNimLogo,
options: (settings) => <NvidiaNimOptions settings={settings} />,
description:
"Run full parameter LLMs directly on your GPU using Nvidia's inference microservice via Docker.",
},
{
name: "HuggingFace",
value: "huggingface",
......
......@@ -28,6 +28,7 @@ const ENABLED_PROVIDERS = [
"litellm",
"apipie",
"xai",
"nvidia-nim",
// TODO: More agent support.
// "cohere", // Has tool calling and will need to build explicit support
// "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested.
......
......@@ -37,6 +37,13 @@ export const LOCALAI_COMMON_URLS = [
"http://172.17.0.1:8080/v1",
];
export const NVIDIA_NIM_COMMON_URLS = [
"http://127.0.0.1:8000/v1/version",
"http://localhost:8000/v1/version",
"http://host.docker.internal:8000/v1/version",
"http://172.17.0.1:8000/v1/version",
];
export function fullApiUrl() {
if (API_BASE !== "/api") return API_BASE;
return `${window.location.origin}/api`;
......
......@@ -107,6 +107,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# XAI_LLM_API_KEY='xai-your-api-key-here'
# XAI_LLM_MODEL_PREF='grok-beta'
# LLM_PROVIDER='nvidia-nim'
# NVIDIA_NIM_LLM_BASE_PATH='http://127.0.0.1:8000'
# NVIDIA_NIM_LLM_MODEL_PREF='meta/llama-3.2-3b-instruct'
###########################################
######## Embedding API SElECTION ##########
###########################################
......
......@@ -547,6 +547,11 @@ const SystemSettings = {
// xAI LLM API Keys
XAIApiKey: !!process.env.XAI_LLM_API_KEY,
XAIModelPref: process.env.XAI_LLM_MODEL_PREF,
// Nvidia NIM Keys
NvidiaNimLLMBasePath: process.env.NVIDIA_NIM_LLM_BASE_PATH,
NvidiaNimLLMModelPref: process.env.NVIDIA_NIM_LLM_MODEL_PREF,
NvidiaNimLLMTokenLimit: process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT,
};
},
......
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
class NvidiaNimLLM {
constructor(embedder = null, modelPreference = null) {
if (!process.env.NVIDIA_NIM_LLM_BASE_PATH)
throw new Error("No Nvidia NIM API Base Path was set.");
const { OpenAI: OpenAIApi } = require("openai");
this.nvidiaNim = new OpenAIApi({
baseURL: parseNvidiaNimBasePath(process.env.NVIDIA_NIM_LLM_BASE_PATH),
apiKey: null,
});
this.model = modelPreference || process.env.NVIDIA_NIM_LLM_MODEL_PREF;
this.limits = {
history: this.promptWindowLimit() * 0.15,
system: this.promptWindowLimit() * 0.15,
user: this.promptWindowLimit() * 0.7,
};
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
this.#log(
`Loaded with model: ${this.model} with context window: ${this.promptWindowLimit()}`
);
}
#log(text, ...args) {
console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
}
#appendContext(contextTexts = []) {
if (!contextTexts || !contextTexts.length) return "";
return (
"\nContext:\n" +
contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")
);
}
/**
* Set the model token limit `NVIDIA_NIM_LLM_TOKEN_LIMIT` for the given model ID
* @param {string} modelId
* @param {string} basePath
* @returns {Promise<void>}
*/
static async setModelTokenLimit(modelId, basePath = null) {
if (!modelId) return;
const { OpenAI: OpenAIApi } = require("openai");
const openai = new OpenAIApi({
baseURL: parseNvidiaNimBasePath(
basePath || process.env.NVIDIA_NIM_LLM_BASE_PATH
),
apiKey: null,
});
const model = await openai.models
.list()
.then((results) => results.data)
.catch(() => {
return [];
});
if (!model.length) return;
const modelInfo = model.find((model) => model.id === modelId);
if (!modelInfo) return;
process.env.NVIDIA_NIM_LLM_TOKEN_LIMIT = Number(
modelInfo.max_model_len || 4096
);
}
streamingEnabled() {
return "streamGetChatCompletion" in this;
}
static promptWindowLimit(_modelName) {
const limit = process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No Nvidia NIM token context limit was set.");
return Number(limit);
}
// Ensure the user set a value for the token limit
// and if undefined - assume 4096 window.
promptWindowLimit() {
const limit = process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No Nvidia NIM token context limit was set.");
return Number(limit);
}
async isValidChatCompletionModel(_ = "") {
return true;
}
/**
* Generates appropriate content array for a message + attachments.
* @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
* @returns {string|object[]}
*/
#generateContent({ userPrompt, attachments = [] }) {
if (!attachments.length) {
return userPrompt;
}
const content = [{ type: "text", text: userPrompt }];
for (let attachment of attachments) {
content.push({
type: "image_url",
image_url: {
url: attachment.contentString,
detail: "auto",
},
});
}
return content.flat();
}
/**
* Construct the user prompt for this model.
* @param {{attachments: import("../../helpers").Attachment[]}} param0
* @returns
*/
constructPrompt({
systemPrompt = "",
contextTexts = [],
chatHistory = [],
userPrompt = "",
attachments = [],
}) {
const prompt = {
role: "system",
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
};
return [
prompt,
...chatHistory,
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),
},
];
}
async getChatCompletion(messages = null, { temperature = 0.7 }) {
if (!this.model)
throw new Error(
`Nvidia NIM chat: ${this.model} is not valid or defined model for chat completion!`
);
const result = await this.nvidiaNim.chat.completions.create({
model: this.model,
messages,
temperature,
});
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
if (!this.model)
throw new Error(
`Nvidia NIM chat: ${this.model} is not valid or defined model for chat completion!`
);
const streamRequest = await this.nvidiaNim.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
handleStream(response, stream, responseProps) {
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
async embedTextInput(textInput) {
return await this.embedder.embedTextInput(textInput);
}
async embedChunks(textChunks = []) {
return await this.embedder.embedChunks(textChunks);
}
async compressMessages(promptArgs = {}, rawHistory = []) {
const { messageArrayCompressor } = require("../../helpers/chat");
const messageArray = this.constructPrompt(promptArgs);
return await messageArrayCompressor(this, messageArray, rawHistory);
}
}
/**
* Parse the base path for the Nvidia NIM container API. Since the base path must end in /v1 and cannot have a trailing slash,
* and the user can possibly set it to anything and likely incorrectly due to pasting behaviors, we need to ensure it is in the correct format.
* @param {string} basePath
* @returns {string}
*/
function parseNvidiaNimBasePath(providedBasePath = "") {
try {
const baseURL = new URL(providedBasePath);
const basePath = `${baseURL.origin}/v1`;
return basePath;
} catch (e) {
return providedBasePath;
}
}
module.exports = {
NvidiaNimLLM,
parseNvidiaNimBasePath,
};
......@@ -783,6 +783,8 @@ ${this.getHistory({ to: route.to })
return new Providers.AWSBedrockProvider({});
case "fireworksai":
return new Providers.FireworksAIProvider({ model: config.model });
case "nvidia-nim":
return new Providers.NvidiaNimProvider({ model: config.model });
case "deepseek":
return new Providers.DeepSeekProvider({ model: config.model });
case "litellm":
......
......@@ -155,6 +155,14 @@ class Provider {
apiKey: process.env.XAI_LLM_API_KEY ?? null,
...config,
});
case "novita":
return new ChatOpenAI({
configuration: {
baseURL: "https://api.novita.ai/v3/openai",
},
apiKey: process.env.NOVITA_LLM_API_KEY ?? null,
...config,
});
// OSS Model Runners
// case "anythingllm_ollama":
......@@ -207,12 +215,12 @@ class Provider {
apiKey: process.env.LITE_LLM_API_KEY ?? null,
...config,
});
case "novita":
case "nvidia-nim":
return new ChatOpenAI({
configuration: {
baseURL: "https://api.novita.ai/v3/openai",
baseURL: process.env.NVIDIA_NIM_LLM_BASE_PATH,
},
apiKey: process.env.NOVITA_LLM_API_KEY ?? null,
apiKey: null,
...config,
});
......
......@@ -19,6 +19,7 @@ const LiteLLMProvider = require("./litellm.js");
const ApiPieProvider = require("./apipie.js");
const XAIProvider = require("./xai.js");
const NovitaProvider = require("./novita.js");
const NvidiaNimProvider = require("./nvidiaNim.js");
module.exports = {
OpenAIProvider,
......@@ -42,4 +43,5 @@ module.exports = {
ApiPieProvider,
XAIProvider,
NovitaProvider,
NvidiaNimProvider,
};
const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
/**
* The agent provider for the Nvidia NIM provider.
* We wrap Nvidia NIM in UnTooled because its tool-calling may not be supported for specific models and this normalizes that.
*/
class NvidiaNimProvider extends InheritMultiple([Provider, UnTooled]) {
model;
constructor(config = {}) {
const { model } = config;
super();
const client = new OpenAI({
baseURL: process.env.NVIDIA_NIM_LLM_BASE_PATH,
apiKey: null,
maxRetries: 0,
});
this._client = client;
this.model = model;
this.verbose = true;
}
get client() {
return this._client;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({
model: this.model,
temperature: 0,
messages,
})
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("Nvidia NIM chat: No results!");
if (result.choices.length === 0)
throw new Error("Nvidia NIM chat: No results length!");
return result.choices[0].message.content;
})
.catch((_) => {
return null;
});
}
/**
* Create a completion based on the received messages.
*
* @param messages A list of messages to send to the API.
* @param functions
* @returns The completion.
*/
async complete(messages, functions = null) {
try {
let completion;
if (functions.length > 0) {
const { toolCall, text } = await this.functionCall(
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
if (toolCall !== null) {
this.providerLog(`Valid tool call found - running ${toolCall.name}.`);
this.deduplicator.trackRun(toolCall.name, toolCall.arguments);
return {
result: null,
functionCall: {
name: toolCall.name,
arguments: toolCall.arguments,
},
cost: 0,
};
}
completion = { content: text };
}
if (!completion?.content) {
this.providerLog(
"Will assume chat completion without tool call inputs."
);
const response = await this.client.chat.completions.create({
model: this.model,
messages: this.cleanMsgs(messages),
});
completion = response.choices[0].message;
}
// The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
// from calling the exact same function over and over in a loop within a single chat exchange
// _but_ we should enable it to call previously used tools in a new chat interaction.
this.deduplicator.reset("runs");
return {
result: completion.content,
cost: 0,
};
} catch (error) {
throw error;
}
}
/**
* Get the cost of the completion.
*
* @param _usage The completion to get the cost for.
* @returns The cost of the completion.
*/
getCost(_usage) {
return 0;
}
}
module.exports = NvidiaNimProvider;
......@@ -177,6 +177,12 @@ class AgentHandler {
if (!process.env.NOVITA_LLM_API_KEY)
throw new Error("Novita API Key must be provided to use agents.");
break;
case "nvidia-nim":
if (!process.env.NVIDIA_NIM_LLM_BASE_PATH)
throw new Error(
"Nvidia NIM base path must be provided to use agents."
);
break;
default:
throw new Error(
......@@ -240,6 +246,8 @@ class AgentHandler {
return process.env.XAI_LLM_MODEL_PREF ?? "grok-beta";
case "novita":
return process.env.NOVITA_LLM_MODEL_PREF ?? "gryphe/mythomax-l2-13b";
case "nvidia-nim":
return process.env.NVIDIA_NIM_LLM_MODEL_PREF ?? null;
default:
return null;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment