diff --git a/README.md b/README.md index 0a5ee5bc6411ce2fb889b2b704875d9ec44a3336..8e761a4364485a2614da1fbdd37205410d8121c0 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ Some cool features of AnythingLLM - [Groq](https://groq.com/) - [Cohere](https://cohere.com/) - [KoboldCPP](https://github.com/LostRuins/koboldcpp) - +- [Text Generation Web UI](https://github.com/oobabooga/text-generation-webui) **Supported Embedding models:** @@ -96,13 +96,11 @@ Some cool features of AnythingLLM - [LM Studio (all)](https://lmstudio.ai) - [Cohere](https://cohere.com/) - **Supported Transcription models:** - [AnythingLLM Built-in](https://github.com/Mintplex-Labs/anything-llm/tree/master/server/storage/models#audiovideo-transcription) (default) - [OpenAI](https://openai.com/) - **Supported Vector Databases:** - [LanceDB](https://github.com/lancedb/lancedb) (default) @@ -114,7 +112,6 @@ Some cool features of AnythingLLM - [Milvus](https://milvus.io) - [Zilliz](https://zilliz.com) - ### Technical Overview This monorepo consists of three main sections: @@ -155,7 +152,6 @@ Mintplex Labs & the community maintain a number of deployment methods, scripts, - create PR with branch name format of `<issue number>-<short name>` - yee haw let's merge - ## Telemetry & Privacy AnythingLLM by Mintplex Labs Inc contains a telemetry feature that collects anonymous usage information. diff --git a/docker/.env.example b/docker/.env.example index e10ace026d12938381888a53aba19536579b2533..5ea5ce609da9707b58e640f046207d2d47a19a3d 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -71,6 +71,10 @@ GID='1000' # KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S' # KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096 +# LLM_PROVIDER='textgenwebui' +# TEXT_GEN_WEB_UI_BASE_PATH='http://127.0.0.1:5000/v1' +# TEXT_GEN_WEB_UI_TOKEN_LIMIT=4096 + # LLM_PROVIDER='generic-openai' # GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1' # GENERIC_OPEN_AI_MODEL_PREF='gpt-3.5-turbo' diff --git a/frontend/src/components/LLMSelection/TextGenWebUIOptions/index.jsx b/frontend/src/components/LLMSelection/TextGenWebUIOptions/index.jsx new file mode 100644 index 0000000000000000000000000000000000000000..ec29222d24482696272b7b92dad47e328d6a8e69 --- /dev/null +++ b/frontend/src/components/LLMSelection/TextGenWebUIOptions/index.jsx @@ -0,0 +1,37 @@ +export default function TextGenWebUIOptions({ settings }) { + return ( + <div className="flex gap-4 flex-wrap"> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + Base URL + </label> + <input + type="url" + name="TextGenWebUIBasePath" + className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="http://127.0.0.1:5000/v1" + defaultValue={settings?.TextGenWebUIBasePath} + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + Token context window + </label> + <input + type="number" + name="TextGenWebUITokenLimit" + className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="Content window limit (eg: 4096)" + min={1} + onScroll={(e) => e.target.blur()} + defaultValue={settings?.TextGenWebUITokenLimit} + required={true} + autoComplete="off" + /> + </div> + </div> + ); +} diff --git a/frontend/src/hooks/useGetProvidersModels.js b/frontend/src/hooks/useGetProvidersModels.js index dfd468111edf91407c3790b9970cc590f8bbe834..29075c557931a138fabd378fc9a0b011ebc5a974 100644 --- a/frontend/src/hooks/useGetProvidersModels.js +++ b/frontend/src/hooks/useGetProvidersModels.js @@ -2,7 +2,12 @@ import System from "@/models/system"; import { useEffect, useState } from "react"; // Providers which cannot use this feature for workspace<>model selection -export const DISABLED_PROVIDERS = ["azure", "lmstudio", "native"]; +export const DISABLED_PROVIDERS = [ + "azure", + "lmstudio", + "native", + "textgenwebui", +]; const PROVIDER_DEFAULT_MODELS = { openai: [], gemini: ["gemini-pro", "gemini-1.5-pro-latest"], @@ -34,6 +39,7 @@ const PROVIDER_DEFAULT_MODELS = { "command-nightly", "command-light-nightly", ], + textgenwebui: [], }; // For togetherAi, which has a large model list - we subgroup the options diff --git a/frontend/src/media/llmprovider/text-generation-webui.png b/frontend/src/media/llmprovider/text-generation-webui.png new file mode 100644 index 0000000000000000000000000000000000000000..415644485b8db163c50befd4f21cb23c2c72a2ba Binary files /dev/null and b/frontend/src/media/llmprovider/text-generation-webui.png differ diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index 60827e0def06274e04cc3ea8781869df49f3a20a..4db2012369f1243da6766b9098acdea6da50ab0b 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -19,6 +19,7 @@ import PerplexityLogo from "@/media/llmprovider/perplexity.png"; import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg"; import GroqLogo from "@/media/llmprovider/groq.png"; import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png"; +import TextGenWebUILogo from "@/media/llmprovider/text-generation-webui.png"; import CohereLogo from "@/media/llmprovider/cohere.png"; import PreLoader from "@/components/Preloader"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; @@ -42,6 +43,7 @@ import LLMItem from "@/components/LLMSelection/LLMItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; import CTAButton from "@/components/lib/CTAButton"; import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions"; +import TextGenWebUIOptions from "@/components/LLMSelection/TextGenWebUIOptions"; export const AVAILABLE_LLM_PROVIDERS = [ { @@ -168,6 +170,14 @@ export const AVAILABLE_LLM_PROVIDERS = [ "KoboldCPPTokenLimit", ], }, + { + name: "Oobabooga Web UI", + value: "textgenwebui", + logo: TextGenWebUILogo, + options: (settings) => <TextGenWebUIOptions settings={settings} />, + description: "Run local LLMs using Oobabooga's Text Generation Web UI.", + requiredConfig: ["TextGenWebUIBasePath", "TextGenWebUITokenLimit"], + }, { name: "Cohere", value: "cohere", diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index 6e8a18974f04a5aba77ae37c52d204bdff8bf850..3c9eecd629b092f30b23fd2625e6459a1ba06c01 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -16,6 +16,7 @@ import PerplexityLogo from "@/media/llmprovider/perplexity.png"; import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg"; import GroqLogo from "@/media/llmprovider/groq.png"; import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png"; +import TextGenWebUILogo from "@/media/llmprovider/text-generation-webui.png"; import CohereLogo from "@/media/llmprovider/cohere.png"; import ZillizLogo from "@/media/vectordbs/zilliz.png"; import AstraDBLogo from "@/media/vectordbs/astraDB.png"; @@ -146,6 +147,13 @@ export const LLM_SELECTION_PRIVACY = { ], logo: KoboldCPPLogo, }, + textgenwebui: { + name: "Oobabooga Web UI", + description: [ + "Your model and chats are only accessible on the server running the Oobabooga Text Generation Web UI", + ], + logo: TextGenWebUILogo, + }, "generic-openai": { name: "Generic OpenAI compatible service", description: [ diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index 4cf3c221e9ec8dec0dd9ae66165f1500570d77de..966253f475120520cfd730202814a19135611720 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -16,6 +16,7 @@ import PerplexityLogo from "@/media/llmprovider/perplexity.png"; import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg"; import GroqLogo from "@/media/llmprovider/groq.png"; import KoboldCPPLogo from "@/media/llmprovider/koboldcpp.png"; +import TextGenWebUILogo from "@/media/llmprovider/text-generation-webui.png"; import CohereLogo from "@/media/llmprovider/cohere.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions"; @@ -40,6 +41,7 @@ import paths from "@/utils/paths"; import showToast from "@/utils/toast"; import { useNavigate } from "react-router-dom"; import KoboldCPPOptions from "@/components/LLMSelection/KoboldCPPOptions"; +import TextGenWebUIOptions from "@/components/LLMSelection/TextGenWebUIOptions"; const TITLE = "LLM Preference"; const DESCRIPTION = @@ -111,6 +113,13 @@ const LLMS = [ options: (settings) => <KoboldCPPOptions settings={settings} />, description: "Run local LLMs using koboldcpp.", }, + { + name: "Oobabooga Web UI", + value: "textgenwebui", + logo: TextGenWebUILogo, + options: (settings) => <TextGenWebUIOptions settings={settings} />, + description: "Run local LLMs using Oobabooga's Text Generation Web UI.", + }, { name: "Together AI", value: "togetherai", diff --git a/server/.env.example b/server/.env.example index c8f05340a1f4e6e0d29d7f03423d5d6d7707a2fc..008d90d2fc7d94294932b2cff58a4259fe933fb9 100644 --- a/server/.env.example +++ b/server/.env.example @@ -68,6 +68,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S' # KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096 +# LLM_PROVIDER='textgenwebui' +# TEXT_GEN_WEB_UI_BASE_PATH='http://127.0.0.1:5000/v1' +# TEXT_GEN_WEB_UI_TOKEN_LIMIT=4096 + # LLM_PROVIDER='generic-openai' # GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1' # GENERIC_OPEN_AI_MODEL_PREF='gpt-3.5-turbo' diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index f7782d26a4113f752de40cdb9679e93a715ec797..9ac41db0c14c30918b4959eb1caae606e79fe9e5 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -364,6 +364,10 @@ const SystemSettings = { KoboldCPPBasePath: process.env.KOBOLD_CPP_BASE_PATH, KoboldCPPTokenLimit: process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT, + // Text Generation Web UI Keys + TextGenWebUIBasePath: process.env.TEXT_GEN_WEB_UI_BASE_PATH, + TextGenWebUITokenLimit: process.env.TEXT_GEN_WEB_UI_MODEL_TOKEN_LIMIT, + // Generic OpenAI Keys GenericOpenAiBasePath: process.env.GENERIC_OPEN_AI_BASE_PATH, GenericOpenAiModelPref: process.env.GENERIC_OPEN_AI_MODEL_PREF, diff --git a/server/utils/AiProviders/textGenWebUI/index.js b/server/utils/AiProviders/textGenWebUI/index.js new file mode 100644 index 0000000000000000000000000000000000000000..ae0282a30fce4579a6f5eafc7c8e7b089a9052a8 --- /dev/null +++ b/server/utils/AiProviders/textGenWebUI/index.js @@ -0,0 +1,131 @@ +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { + handleDefaultStreamResponseV2, +} = require("../../helpers/chat/responses"); + +class TextGenWebUILLM { + constructor(embedder = null) { + const { OpenAI: OpenAIApi } = require("openai"); + if (!process.env.TEXT_GEN_WEB_UI_BASE_PATH) + throw new Error( + "TextGenWebUI must have a valid base path to use for the api." + ); + + this.basePath = process.env.TEXT_GEN_WEB_UI_BASE_PATH; + this.openai = new OpenAIApi({ + baseURL: this.basePath, + apiKey: null, + }); + this.model = null; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + this.embedder = !embedder ? new NativeEmbedder() : embedder; + this.defaultTemp = 0.7; + this.log(`Inference API: ${this.basePath} Model: ${this.model}`); + } + + log(text, ...args) { + console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args); + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + streamingEnabled() { + return "streamGetChatCompletion" in this; + } + + // Ensure the user set a value for the token limit + // and if undefined - assume 4096 window. + promptWindowLimit() { + const limit = process.env.TEXT_GEN_WEB_UI_MODEL_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No token context limit was set."); + return Number(limit); + } + + // Short circuit since we have no idea if the model is valid or not + // in pre-flight for generic endpoints + isValidChatCompletionModel(_modelName = "") { + return true; + } + + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [prompt, ...chatHistory, { role: "user", content: userPrompt }]; + } + + async isSafe(_input = "") { + // Not implemented so must be stubbed + return { safe: true, reasons: [] }; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + const result = await this.openai.chat.completions + .create({ + model: this.model, + messages, + temperature, + }) + .catch((e) => { + throw new Error(e.response.data.error.message); + }); + + if (!result.hasOwnProperty("choices") || result.choices.length === 0) + return null; + return result.choices[0].message.content; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + const streamRequest = await this.openai.chat.completions.create({ + model: this.model, + stream: true, + messages, + temperature, + }); + return streamRequest; + } + + handleStream(response, stream, responseProps) { + return handleDefaultStreamResponseV2(response, stream, responseProps); + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +module.exports = { + TextGenWebUILLM, +}; diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index ba65e3dfb6fcb860980a4996a6771209e0d63c2d..514d32042596fc76ee63d9f7b073dea5b8ed051b 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -80,6 +80,9 @@ function getLLMProvider({ provider = null, model = null } = {}) { case "koboldcpp": const { KoboldCPPLLM } = require("../AiProviders/koboldCPP"); return new KoboldCPPLLM(embedder, model); + case "textgenwebui": + const { TextGenWebUILLM } = require("../AiProviders/textGenWebUI"); + return new TextGenWebUILLM(embedder, model); case "cohere": const { CohereLLM } = require("../AiProviders/cohere"); return new CohereLLM(embedder, model); diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 19cdfe2b2d7e74227cb773b7083de1a2ccc74620..11be3db807aab5a9326b9328c1ed24a71cf95d8b 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -146,6 +146,16 @@ const KEY_MAPPING = { checks: [nonZero], }, + // Text Generation Web UI Settings + TextGenWebUIBasePath: { + envKey: "TEXT_GEN_WEB_UI_BASE_PATH", + checks: [isValidURL], + }, + TextGenWebUITokenLimit: { + envKey: "TEXT_GEN_WEB_UI_MODEL_TOKEN_LIMIT", + checks: [nonZero], + }, + // Generic OpenAI InferenceSettings GenericOpenAiBasePath: { envKey: "GENERIC_OPEN_AI_BASE_PATH", @@ -418,6 +428,7 @@ function supportedLLM(input = "") { "openrouter", "groq", "koboldcpp", + "textgenwebui", "cohere", "generic-openai", ].includes(input);