diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js index 5702fc839ceecf7309a0cb44f9fea9aeb0932293..386e84a53f09971c295622686205106471143de3 100644 --- a/server/utils/AiProviders/anthropic/index.js +++ b/server/utils/AiProviders/anthropic/index.js @@ -4,6 +4,7 @@ const { clientAbortedHandler, } = require("../../helpers/chat/responses"); const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { MODEL_MAP } = require("../modelMap"); class AnthropicLLM { constructor(embedder = null, modelPreference = null) { @@ -32,25 +33,12 @@ class AnthropicLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(modelName) { + return MODEL_MAP.anthropic[modelName] ?? 100_000; + } + promptWindowLimit() { - switch (this.model) { - case "claude-instant-1.2": - return 100_000; - case "claude-2.0": - return 100_000; - case "claude-2.1": - return 200_000; - case "claude-3-opus-20240229": - return 200_000; - case "claude-3-sonnet-20240229": - return 200_000; - case "claude-3-haiku-20240307": - return 200_000; - case "claude-3-5-sonnet-20240620": - return 200_000; - default: - return 100_000; // assume a claude-instant-1.2 model - } + return MODEL_MAP.anthropic[this.model] ?? 100_000; } isValidChatCompletionModel(modelName = "") { diff --git a/server/utils/AiProviders/azureOpenAi/index.js b/server/utils/AiProviders/azureOpenAi/index.js index 231d9c04ce9cefa9fe416744d542b51f6475a200..feb6f0a1b6ec6e0e413cfced88dd295f1734132c 100644 --- a/server/utils/AiProviders/azureOpenAi/index.js +++ b/server/utils/AiProviders/azureOpenAi/index.js @@ -43,6 +43,12 @@ class AzureOpenAiLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + return !!process.env.AZURE_OPENAI_TOKEN_LIMIT + ? Number(process.env.AZURE_OPENAI_TOKEN_LIMIT) + : 4096; + } + // Sure the user selected a proper value for the token limit // could be any of these https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-models // and if undefined - assume it is the lowest end. diff --git a/server/utils/AiProviders/bedrock/index.js b/server/utils/AiProviders/bedrock/index.js index f579c03310a2244012c0dbe8b241eeb92ca40554..ebff7ea29f73db13e3f6ffc2b76291fe411f49f6 100644 --- a/server/utils/AiProviders/bedrock/index.js +++ b/server/utils/AiProviders/bedrock/index.js @@ -82,6 +82,13 @@ class AWSBedrockLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + const limit = process.env.AWS_BEDROCK_LLM_MODEL_TOKEN_LIMIT || 8191; + if (!limit || isNaN(Number(limit))) + throw new Error("No valid token context limit was set."); + return Number(limit); + } + // Ensure the user set a value for the token limit // and if undefined - assume 4096 window. promptWindowLimit() { diff --git a/server/utils/AiProviders/cohere/index.js b/server/utils/AiProviders/cohere/index.js index f57d2bc5cd9215d6480e7ec97828051c2bb33ac3..f61d43f6232fcf9589a4883d18f35c2e4f9d292c 100644 --- a/server/utils/AiProviders/cohere/index.js +++ b/server/utils/AiProviders/cohere/index.js @@ -1,6 +1,7 @@ const { v4 } = require("uuid"); const { writeResponseChunk } = require("../../helpers/chat/responses"); const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { MODEL_MAP } = require("../modelMap"); class CohereLLM { constructor(embedder = null) { @@ -58,23 +59,12 @@ class CohereLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(modelName) { + return MODEL_MAP.cohere[modelName] ?? 4_096; + } + promptWindowLimit() { - switch (this.model) { - case "command-r": - return 128_000; - case "command-r-plus": - return 128_000; - case "command": - return 4_096; - case "command-light": - return 4_096; - case "command-nightly": - return 8_192; - case "command-light-nightly": - return 8_192; - default: - return 4_096; - } + return MODEL_MAP.cohere[this.model] ?? 4_096; } async isValidChatCompletionModel(model = "") { diff --git a/server/utils/AiProviders/gemini/index.js b/server/utils/AiProviders/gemini/index.js index f29d73e356efada1239d7327aec8b506aadebe6a..7acc924cc19c998818dcf34daab0e97d4252102a 100644 --- a/server/utils/AiProviders/gemini/index.js +++ b/server/utils/AiProviders/gemini/index.js @@ -3,6 +3,7 @@ const { writeResponseChunk, clientAbortedHandler, } = require("../../helpers/chat/responses"); +const { MODEL_MAP } = require("../modelMap"); class GeminiLLM { constructor(embedder = null, modelPreference = null) { @@ -89,21 +90,12 @@ class GeminiLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(modelName) { + return MODEL_MAP.gemini[modelName] ?? 30_720; + } + promptWindowLimit() { - switch (this.model) { - case "gemini-pro": - return 30_720; - case "gemini-1.0-pro": - return 30_720; - case "gemini-1.5-flash-latest": - return 1_048_576; - case "gemini-1.5-pro-latest": - return 2_097_152; - case "gemini-1.5-pro-exp-0801": - return 2_097_152; - default: - return 30_720; // assume a gemini-pro model - } + return MODEL_MAP.gemini[this.model] ?? 30_720; } isValidChatCompletionModel(modelName = "") { diff --git a/server/utils/AiProviders/genericOpenAi/index.js b/server/utils/AiProviders/genericOpenAi/index.js index 7c027c4344507127c03b84b0b576e25fbee13852..fe2902300060a2c23274fa11dac335c892bef5e9 100644 --- a/server/utils/AiProviders/genericOpenAi/index.js +++ b/server/utils/AiProviders/genericOpenAi/index.js @@ -55,6 +55,13 @@ class GenericOpenAiLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + const limit = process.env.GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No token context limit was set."); + return Number(limit); + } + // Ensure the user set a value for the token limit // and if undefined - assume 4096 window. promptWindowLimit() { diff --git a/server/utils/AiProviders/groq/index.js b/server/utils/AiProviders/groq/index.js index d76bddcc461a44b04a579b6ad39e93fbdc5dc7af..c176f1dcaddeb0c51b41255b1cbaa048a897a066 100644 --- a/server/utils/AiProviders/groq/index.js +++ b/server/utils/AiProviders/groq/index.js @@ -2,6 +2,7 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native"); const { handleDefaultStreamResponseV2, } = require("../../helpers/chat/responses"); +const { MODEL_MAP } = require("../modelMap"); class GroqLLM { constructor(embedder = null, modelPreference = null) { @@ -40,21 +41,12 @@ class GroqLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(modelName) { + return MODEL_MAP.groq[modelName] ?? 8192; + } + promptWindowLimit() { - switch (this.model) { - case "gemma2-9b-it": - case "gemma-7b-it": - case "llama3-70b-8192": - case "llama3-8b-8192": - return 8192; - case "llama-3.1-70b-versatile": - case "llama-3.1-8b-instant": - return 8000; - case "mixtral-8x7b-32768": - return 32768; - default: - return 8192; - } + return MODEL_MAP.groq[this.model] ?? 8192; } async isValidChatCompletionModel(modelName = "") { diff --git a/server/utils/AiProviders/huggingface/index.js b/server/utils/AiProviders/huggingface/index.js index ddb1f6c42a168f9bbb2ba0bc7a066ca426a2539e..021a636b3dd41abdf713f78d42968cc06828469d 100644 --- a/server/utils/AiProviders/huggingface/index.js +++ b/server/utils/AiProviders/huggingface/index.js @@ -45,6 +45,13 @@ class HuggingFaceLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + const limit = process.env.HUGGING_FACE_LLM_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No HuggingFace token context limit was set."); + return Number(limit); + } + promptWindowLimit() { const limit = process.env.HUGGING_FACE_LLM_TOKEN_LIMIT || 4096; if (!limit || isNaN(Number(limit))) diff --git a/server/utils/AiProviders/koboldCPP/index.js b/server/utils/AiProviders/koboldCPP/index.js index 5c67103d303520de1a8471bcb10bdb3de1edce5e..9a700793daf46c91629fbe08bef82cc2ef190a7e 100644 --- a/server/utils/AiProviders/koboldCPP/index.js +++ b/server/utils/AiProviders/koboldCPP/index.js @@ -51,6 +51,13 @@ class KoboldCPPLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + const limit = process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No token context limit was set."); + return Number(limit); + } + // Ensure the user set a value for the token limit // and if undefined - assume 4096 window. promptWindowLimit() { diff --git a/server/utils/AiProviders/liteLLM/index.js b/server/utils/AiProviders/liteLLM/index.js index 897a484dd19628cf5965bb7f937c2f42043d3165..d8907e7a9095cf9a4c544eda7bb9f4d022ad1fde 100644 --- a/server/utils/AiProviders/liteLLM/index.js +++ b/server/utils/AiProviders/liteLLM/index.js @@ -50,6 +50,13 @@ class LiteLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + const limit = process.env.LITE_LLM_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No token context limit was set."); + return Number(limit); + } + // Ensure the user set a value for the token limit // and if undefined - assume 4096 window. promptWindowLimit() { diff --git a/server/utils/AiProviders/lmStudio/index.js b/server/utils/AiProviders/lmStudio/index.js index 6ff025884c73736a3cd122a1b96e2ec2ddd27b41..6f0593b8c2276039ea25110a2903eb15c39897ce 100644 --- a/server/utils/AiProviders/lmStudio/index.js +++ b/server/utils/AiProviders/lmStudio/index.js @@ -48,6 +48,13 @@ class LMStudioLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No LMStudio token context limit was set."); + return Number(limit); + } + // Ensure the user set a value for the token limit // and if undefined - assume 4096 window. promptWindowLimit() { diff --git a/server/utils/AiProviders/localAi/index.js b/server/utils/AiProviders/localAi/index.js index 2275e1e8ded96b82afc531d780c2ee5922de5fcb..2d5e8b1f455c25fec301de3731a7ffcfb935c291 100644 --- a/server/utils/AiProviders/localAi/index.js +++ b/server/utils/AiProviders/localAi/index.js @@ -40,6 +40,13 @@ class LocalAiLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + const limit = process.env.LOCAL_AI_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No LocalAi token context limit was set."); + return Number(limit); + } + // Ensure the user set a value for the token limit // and if undefined - assume 4096 window. promptWindowLimit() { diff --git a/server/utils/AiProviders/mistral/index.js b/server/utils/AiProviders/mistral/index.js index 92cc63f5acf76933212345e3b8fc42281dc90881..7dfe741963e22b60c36476547c70b2542f03e51f 100644 --- a/server/utils/AiProviders/mistral/index.js +++ b/server/utils/AiProviders/mistral/index.js @@ -41,6 +41,10 @@ class MistralLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit() { + return 32000; + } + promptWindowLimit() { return 32000; } diff --git a/server/utils/AiProviders/modelMap.js b/server/utils/AiProviders/modelMap.js new file mode 100644 index 0000000000000000000000000000000000000000..151bd7cd8ca4469baa759ccc1173b5beb1b5f6d3 --- /dev/null +++ b/server/utils/AiProviders/modelMap.js @@ -0,0 +1,55 @@ +/** + * The model name and context window for all know model windows + * that are available through providers which has discrete model options. + */ +const MODEL_MAP = { + anthropic: { + "claude-instant-1.2": 100_000, + "claude-2.0": 100_000, + "claude-2.1": 200_000, + "claude-3-opus-20240229": 200_000, + "claude-3-sonnet-20240229": 200_000, + "claude-3-haiku-20240307": 200_000, + "claude-3-5-sonnet-20240620": 200_000, + }, + cohere: { + "command-r": 128_000, + "command-r-plus": 128_000, + command: 4_096, + "command-light": 4_096, + "command-nightly": 8_192, + "command-light-nightly": 8_192, + }, + gemini: { + "gemini-pro": 30_720, + "gemini-1.0-pro": 30_720, + "gemini-1.5-flash-latest": 1_048_576, + "gemini-1.5-pro-latest": 2_097_152, + "gemini-1.5-pro-exp-0801": 2_097_152, + }, + groq: { + "gemma2-9b-it": 8192, + "gemma-7b-it": 8192, + "llama3-70b-8192": 8192, + "llama3-8b-8192": 8192, + "llama-3.1-70b-versatile": 8000, + "llama-3.1-8b-instant": 8000, + "mixtral-8x7b-32768": 32768, + }, + openai: { + "gpt-3.5-turbo": 16_385, + "gpt-3.5-turbo-1106": 16_385, + "gpt-4o": 128_000, + "gpt-4o-2024-08-06": 128_000, + "gpt-4o-2024-05-13": 128_000, + "gpt-4o-mini": 128_000, + "gpt-4o-mini-2024-07-18": 128_000, + "gpt-4-turbo": 128_000, + "gpt-4-1106-preview": 128_000, + "gpt-4-turbo-preview": 128_000, + "gpt-4": 8_192, + "gpt-4-32k": 32_000, + }, +}; + +module.exports = { MODEL_MAP }; diff --git a/server/utils/AiProviders/native/index.js b/server/utils/AiProviders/native/index.js index 630cc9ea124b24678fab418885b5621145ed405b..4d15cdac060f0b6efa4a2b70a7ca79e29a5d3830 100644 --- a/server/utils/AiProviders/native/index.js +++ b/server/utils/AiProviders/native/index.js @@ -96,6 +96,13 @@ class NativeLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + const limit = process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No NativeAI token context limit was set."); + return Number(limit); + } + // Ensure the user set a value for the token limit promptWindowLimit() { const limit = process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT || 4096; diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js index 02e780777879175ed0b9f30fa1eb3221ac9ffd59..eb18ee6f33010ecfe93b19551c2643f1f66f397e 100644 --- a/server/utils/AiProviders/ollama/index.js +++ b/server/utils/AiProviders/ollama/index.js @@ -82,6 +82,13 @@ class OllamaAILLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + const limit = process.env.OLLAMA_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No Ollama token context limit was set."); + return Number(limit); + } + // Ensure the user set a value for the token limit // and if undefined - assume 4096 window. promptWindowLimit() { diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js index 57ea28897565bf55c8f28d7629ba83d83279be8d..b0e52dc2b9885e8f55b7a8a5a161e2b00ee0c718 100644 --- a/server/utils/AiProviders/openAi/index.js +++ b/server/utils/AiProviders/openAi/index.js @@ -2,6 +2,7 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native"); const { handleDefaultStreamResponseV2, } = require("../../helpers/chat/responses"); +const { MODEL_MAP } = require("../modelMap"); class OpenAiLLM { constructor(embedder = null, modelPreference = null) { @@ -38,27 +39,12 @@ class OpenAiLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(modelName) { + return MODEL_MAP.openai[modelName] ?? 4_096; + } + promptWindowLimit() { - switch (this.model) { - case "gpt-3.5-turbo": - case "gpt-3.5-turbo-1106": - return 16_385; - case "gpt-4o": - case "gpt-4o-2024-08-06": - case "gpt-4o-2024-05-13": - case "gpt-4o-mini": - case "gpt-4o-mini-2024-07-18": - case "gpt-4-turbo": - case "gpt-4-1106-preview": - case "gpt-4-turbo-preview": - return 128_000; - case "gpt-4": - return 8_192; - case "gpt-4-32k": - return 32_000; - default: - return 4_096; // assume a fine-tune 3.5? - } + return MODEL_MAP.openai[this.model] ?? 4_096; } // Short circuit if name has 'gpt' since we now fetch models from OpenAI API diff --git a/server/utils/AiProviders/openRouter/index.js b/server/utils/AiProviders/openRouter/index.js index 00a176e1ba6ef392fe469b37adffa269587e6bed..3ec813423e3465dba2eeb2beb924f807aeaf6d22 100644 --- a/server/utils/AiProviders/openRouter/index.js +++ b/server/utils/AiProviders/openRouter/index.js @@ -117,6 +117,17 @@ class OpenRouterLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(modelName) { + const cacheModelPath = path.resolve(cacheFolder, "models.json"); + const availableModels = fs.existsSync(cacheModelPath) + ? safeJsonParse( + fs.readFileSync(cacheModelPath, { encoding: "utf-8" }), + {} + ) + : {}; + return availableModels[modelName]?.maxLength || 4096; + } + promptWindowLimit() { const availableModels = this.models(); return availableModels[this.model]?.maxLength || 4096; diff --git a/server/utils/AiProviders/perplexity/index.js b/server/utils/AiProviders/perplexity/index.js index 712605f0bbc1a746acf58bd07d71bb470f4c0979..93639f9f14d98106f97ce08c04e9a29eed64d3e3 100644 --- a/server/utils/AiProviders/perplexity/index.js +++ b/server/utils/AiProviders/perplexity/index.js @@ -52,6 +52,11 @@ class PerplexityLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(modelName) { + const availableModels = perplexityModels(); + return availableModels[modelName]?.maxLength || 4096; + } + promptWindowLimit() { const availableModels = this.allModelInformation(); return availableModels[this.model]?.maxLength || 4096; diff --git a/server/utils/AiProviders/textGenWebUI/index.js b/server/utils/AiProviders/textGenWebUI/index.js index 9400a12f4b981acac129f4a9eea76a752199fdde..68d7a6ac8588aa74aecd9043253de25127321c51 100644 --- a/server/utils/AiProviders/textGenWebUI/index.js +++ b/server/utils/AiProviders/textGenWebUI/index.js @@ -48,6 +48,13 @@ class TextGenWebUILLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(_modelName) { + const limit = process.env.TEXT_GEN_WEB_UI_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No token context limit was set."); + return Number(limit); + } + // Ensure the user set a value for the token limit // and if undefined - assume 4096 window. promptWindowLimit() { diff --git a/server/utils/AiProviders/togetherAi/index.js b/server/utils/AiProviders/togetherAi/index.js index 5d25edf9ebb9e1cdf92630e5d82081ca6d13a59a..8c9f8831cf0c5987bf4890d368c9aaafb291ff90 100644 --- a/server/utils/AiProviders/togetherAi/index.js +++ b/server/utils/AiProviders/togetherAi/index.js @@ -48,6 +48,11 @@ class TogetherAiLLM { return "streamGetChatCompletion" in this; } + static promptWindowLimit(modelName) { + const availableModels = togetherAiModels(); + return availableModels[modelName]?.maxLength || 4096; + } + // Ensure the user set a value for the token limit // and if undefined - assume 4096 window. promptWindowLimit() { diff --git a/server/utils/agents/aibitat/plugins/summarize.js b/server/utils/agents/aibitat/plugins/summarize.js index bd491f9608c948987bf3def188140e436c2a8611..d532a07159ac2c5052d9c2f28493a357b7448dba 100644 --- a/server/utils/agents/aibitat/plugins/summarize.js +++ b/server/utils/agents/aibitat/plugins/summarize.js @@ -136,9 +136,11 @@ const docSummarizer = { ); } + const { TokenManager } = require("../../../helpers/tiktoken"); if ( - document.content?.length < - Provider.contextLimit(this.super.provider) + new TokenManager(this.super.model).countFromString( + document.content + ) < Provider.contextLimit(this.super.provider, this.super.model) ) { return document.content; } diff --git a/server/utils/agents/aibitat/plugins/web-scraping.js b/server/utils/agents/aibitat/plugins/web-scraping.js index df26caf819ddd82dfe11ac71c342515a042a7557..a7dc7a3c790ff1b910ef5e0439e65e95ff67d6ae 100644 --- a/server/utils/agents/aibitat/plugins/web-scraping.js +++ b/server/utils/agents/aibitat/plugins/web-scraping.js @@ -77,7 +77,11 @@ const webScraping = { throw new Error("There was no content to be collected or read."); } - if (content.length < Provider.contextLimit(this.super.provider)) { + const { TokenManager } = require("../../../helpers/tiktoken"); + if ( + new TokenManager(this.super.model).countFromString(content) < + Provider.contextLimit(this.super.provider, this.super.model) + ) { return content; } diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js index 472f72be292abae96719e6f5298484501dea2a9e..034c67ad0d7430dec91565b1bef29a8076321cbf 100644 --- a/server/utils/agents/aibitat/providers/ai-provider.js +++ b/server/utils/agents/aibitat/providers/ai-provider.js @@ -15,6 +15,7 @@ const { ChatAnthropic } = require("@langchain/anthropic"); const { ChatBedrockConverse } = require("@langchain/aws"); const { ChatOllama } = require("@langchain/community/chat_models/ollama"); const { toValidNumber } = require("../../../http"); +const { getLLMProviderClass } = require("../../../helpers"); const DEFAULT_WORKSPACE_PROMPT = "You are a helpful ai assistant who can assist the user and use tools available to help answer the users prompts and questions."; @@ -173,15 +174,16 @@ class Provider { } } - static contextLimit(provider = "openai") { - switch (provider) { - case "openai": - return 8_000; - case "anthropic": - return 100_000; - default: - return 8_000; - } + /** + * Get the context limit for a provider/model combination using static method in AIProvider class. + * @param {string} provider + * @param {string} modelName + * @returns {number} + */ + static contextLimit(provider = "openai", modelName) { + const llm = getLLMProviderClass({ provider }); + if (!llm || !llm.hasOwnProperty("promptWindowLimit")) return 8_000; + return llm.promptWindowLimit(modelName); } // For some providers we may want to override the system prompt to be more verbose. diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 765e7226fdee3d3a9d0514ac8bde174528aa644c..6ec0b2a3189ecdcd5838c0ee6dc471e215421743 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -20,6 +20,11 @@ * @property {Function} compressMessages - Compresses chat messages to fit within the token limit. */ +/** + * @typedef {Object} BaseLLMProviderClass - Class method of provider - not instantiated + * @property {function(string): number} promptWindowLimit - Returns the token limit for the provided model. + */ + /** * @typedef {Object} BaseVectorDatabaseProvider * @property {string} name - The name of the Vector Database instance. @@ -204,6 +209,78 @@ function getEmbeddingEngineSelection() { } } +/** + * Returns the LLMProviderClass - this is a helper method to access static methods on a class + * @param {{provider: string | null} | null} params - Initialize params for LLMs provider + * @returns {BaseLLMProviderClass} + */ +function getLLMProviderClass({ provider = null } = {}) { + switch (provider) { + case "openai": + const { OpenAiLLM } = require("../AiProviders/openAi"); + return OpenAiLLM; + case "azure": + const { AzureOpenAiLLM } = require("../AiProviders/azureOpenAi"); + return AzureOpenAiLLM; + case "anthropic": + const { AnthropicLLM } = require("../AiProviders/anthropic"); + return AnthropicLLM; + case "gemini": + const { GeminiLLM } = require("../AiProviders/gemini"); + return GeminiLLM; + case "lmstudio": + const { LMStudioLLM } = require("../AiProviders/lmStudio"); + return LMStudioLLM; + case "localai": + const { LocalAiLLM } = require("../AiProviders/localAi"); + return LocalAiLLM; + case "ollama": + const { OllamaAILLM } = require("../AiProviders/ollama"); + return OllamaAILLM; + case "togetherai": + const { TogetherAiLLM } = require("../AiProviders/togetherAi"); + return TogetherAiLLM; + case "perplexity": + const { PerplexityLLM } = require("../AiProviders/perplexity"); + return PerplexityLLM; + case "openrouter": + const { OpenRouterLLM } = require("../AiProviders/openRouter"); + return OpenRouterLLM; + case "mistral": + const { MistralLLM } = require("../AiProviders/mistral"); + return MistralLLM; + case "native": + const { NativeLLM } = require("../AiProviders/native"); + return NativeLLM; + case "huggingface": + const { HuggingFaceLLM } = require("../AiProviders/huggingface"); + return HuggingFaceLLM; + case "groq": + const { GroqLLM } = require("../AiProviders/groq"); + return GroqLLM; + case "koboldcpp": + const { KoboldCPPLLM } = require("../AiProviders/koboldCPP"); + return KoboldCPPLLM; + case "textgenwebui": + const { TextGenWebUILLM } = require("../AiProviders/textGenWebUI"); + return TextGenWebUILLM; + case "cohere": + const { CohereLLM } = require("../AiProviders/cohere"); + return CohereLLM; + case "litellm": + const { LiteLLM } = require("../AiProviders/liteLLM"); + return LiteLLM; + case "generic-openai": + const { GenericOpenAiLLM } = require("../AiProviders/genericOpenAi"); + return GenericOpenAiLLM; + case "bedrock": + const { AWSBedrockLLM } = require("../AiProviders/bedrock"); + return AWSBedrockLLM; + default: + return null; + } +} + // Some models have lower restrictions on chars that can be encoded in a single pass // and by default we assume it can handle 1,000 chars, but some models use work with smaller // chars so here we can override that value when embedding information. @@ -228,6 +305,7 @@ module.exports = { getEmbeddingEngineSelection, maximumChunkLength, getVectorDbClass, + getLLMProviderClass, getLLMProvider, toChunks, };