diff --git a/docker/.env.example b/docker/.env.example index 4213c3ff507b90cf98da9859a3ff338083e9cf9c..b7674e91b34cd97002517b0939bbece01f049aab 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -48,6 +48,11 @@ GID='1000' # MISTRAL_API_KEY='example-mistral-ai-api-key' # MISTRAL_MODEL_PREF='mistral-tiny' +# LLM_PROVIDER='huggingface' +# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud +# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx +# HUGGING_FACE_LLM_TOKEN_LIMIT=8000 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/frontend/src/components/LLMSelection/HuggingFaceOptions/index.jsx b/frontend/src/components/LLMSelection/HuggingFaceOptions/index.jsx new file mode 100644 index 0000000000000000000000000000000000000000..7e8747da1a02ae6025bc5422e6e036dd64d13aae --- /dev/null +++ b/frontend/src/components/LLMSelection/HuggingFaceOptions/index.jsx @@ -0,0 +1,56 @@ +export default function HuggingFaceOptions({ settings }) { + return ( + <div className="w-full flex flex-col"> + <div className="w-full flex items-center gap-4"> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + HuggingFace Inference Endpoint + </label> + <input + type="url" + name="HuggingFaceLLMEndpoint" + className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="https://example.endpoints.huggingface.cloud" + defaultValue={settings?.HuggingFaceLLMEndpoint} + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + HuggingFace Access Token + </label> + <input + type="password" + name="HuggingFaceLLMAccessToken" + className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="HuggingFace Access Token" + defaultValue={ + settings?.HuggingFaceLLMAccessToken ? "*".repeat(20) : "" + } + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + Model Token Limit + </label> + <input + type="number" + name="HuggingFaceLLMTokenLimit" + className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="4096" + min={1} + onScroll={(e) => e.target.blur()} + defaultValue={settings?.HuggingFaceLLMTokenLimit} + required={true} + autoComplete="off" + /> + </div> + </div> + </div> + ); +} diff --git a/frontend/src/media/llmprovider/huggingface.png b/frontend/src/media/llmprovider/huggingface.png new file mode 100644 index 0000000000000000000000000000000000000000..b1cdac08609e581c136aa60d2eaca1df4eec741e Binary files /dev/null and b/frontend/src/media/llmprovider/huggingface.png differ diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index 1efa818d3e900e60dd75cdf2dfebcd03b7067c15..ac9ab71f8b92894b407689f18ab9f34ce476bf5a 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -13,6 +13,7 @@ import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import TogetherAILogo from "@/media/llmprovider/togetherai.png"; import MistralLogo from "@/media/llmprovider/mistral.jpeg"; +import HuggingFaceLogo from "@/media/llmprovider/huggingface.png"; import PreLoader from "@/components/Preloader"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions"; @@ -24,6 +25,7 @@ import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions"; import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions"; import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions"; import MistralOptions from "@/components/LLMSelection/MistralOptions"; +import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import { MagnifyingGlass } from "@phosphor-icons/react"; @@ -107,6 +109,14 @@ export default function GeneralLLMPreference() { options: <GeminiLLMOptions settings={settings} />, description: "Google's largest and most capable AI model", }, + { + name: "HuggingFace", + value: "huggingface", + logo: HuggingFaceLogo, + options: <HuggingFaceOptions settings={settings} />, + description: + "Access 150,000+ open-source LLMs and the world's AI community", + }, { name: "Ollama", value: "ollama", diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index 60a3b6da4d5c1b2792899750900c4bff108042a8..c86a62a4399d2b99680eb3bb2be4091ba377398b 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -10,6 +10,7 @@ import TogetherAILogo from "@/media/llmprovider/togetherai.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import MistralLogo from "@/media/llmprovider/mistral.jpeg"; +import HuggingFaceLogo from "@/media/llmprovider/huggingface.png"; import ZillizLogo from "@/media/vectordbs/zilliz.png"; import AstraDBLogo from "@/media/vectordbs/astraDB.png"; import ChromaLogo from "@/media/vectordbs/chroma.png"; @@ -101,6 +102,13 @@ const LLM_SELECTION_PRIVACY = { ], logo: MistralLogo, }, + huggingface: { + name: "HuggingFace", + description: [ + "Your prompts and document text used in response are sent to your HuggingFace managed endpoint", + ], + logo: HuggingFaceLogo, + }, }; const VECTOR_DB_PRIVACY = { diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index 9e8ab84a908947e834b9bcf6067b5d42fbe8d1f6..6970dfa1ff9fa550d4ec350963dfb23293ce867d 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -10,6 +10,7 @@ import LocalAiLogo from "@/media/llmprovider/localai.png"; import TogetherAILogo from "@/media/llmprovider/togetherai.png"; import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png"; import MistralLogo from "@/media/llmprovider/mistral.jpeg"; +import HuggingFaceLogo from "@/media/llmprovider/huggingface.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions"; import AnthropicAiOptions from "@/components/LLMSelection/AnthropicAiOptions"; @@ -19,6 +20,7 @@ import NativeLLMOptions from "@/components/LLMSelection/NativeLLMOptions"; import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions"; import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions"; import MistralOptions from "@/components/LLMSelection/MistralOptions"; +import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import System from "@/models/system"; import paths from "@/utils/paths"; @@ -82,6 +84,14 @@ export default function LLMPreference({ options: <GeminiLLMOptions settings={settings} />, description: "Google's largest and most capable AI model", }, + { + name: "HuggingFace", + value: "huggingface", + logo: HuggingFaceLogo, + options: <HuggingFaceOptions settings={settings} />, + description: + "Access 150,000+ open-source LLMs and the world's AI community", + }, { name: "Ollama", value: "ollama", diff --git a/server/.env.example b/server/.env.example index 96cbd58aec52cae8ec53796d69b7c8ba02985881..ec6abcac9b85d82939f17fa0c2f60618b73c7e37 100644 --- a/server/.env.example +++ b/server/.env.example @@ -45,6 +45,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # MISTRAL_API_KEY='example-mistral-ai-api-key' # MISTRAL_MODEL_PREF='mistral-tiny' +# LLM_PROVIDER='huggingface' +# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud +# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx +# HUGGING_FACE_LLM_TOKEN_LIMIT=8000 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index b8c46524cfaff11fb8b34f5fe73bb10ae0b5105d..abb930127a7d2c2d9600f6238902b63931f64f40 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -194,6 +194,20 @@ const SystemSettings = { AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, } : {}), + + ...(llmProvider === "huggingface" + ? { + HuggingFaceLLMEndpoint: process.env.HUGGING_FACE_LLM_ENDPOINT, + HuggingFaceLLMAccessToken: !!process.env.HUGGING_FACE_LLM_API_KEY, + HuggingFaceLLMTokenLimit: process.env.HUGGING_FACE_LLM_TOKEN_LIMIT, + + // For embedding credentials when Anthropic is selected. + OpenAiKey: !!process.env.OPEN_AI_KEY, + AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT, + AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY, + AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, + } + : {}), }; }, diff --git a/server/utils/AiProviders/huggingface/index.js b/server/utils/AiProviders/huggingface/index.js new file mode 100644 index 0000000000000000000000000000000000000000..4faf9b30f09f92e3219643e758badf3fbd8c708c --- /dev/null +++ b/server/utils/AiProviders/huggingface/index.js @@ -0,0 +1,185 @@ +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi"); +const { chatPrompt } = require("../../chats"); + +class HuggingFaceLLM { + constructor(embedder = null, _modelPreference = null) { + const { Configuration, OpenAIApi } = require("openai"); + if (!process.env.HUGGING_FACE_LLM_ENDPOINT) + throw new Error("No HuggingFace Inference Endpoint was set."); + if (!process.env.HUGGING_FACE_LLM_API_KEY) + throw new Error("No HuggingFace Access Token was set."); + + const config = new Configuration({ + basePath: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`, + apiKey: process.env.HUGGING_FACE_LLM_API_KEY, + }); + this.openai = new OpenAIApi(config); + // When using HF inference server - the model param is not required so + // we can stub it here. HF Endpoints can only run one model at a time. + // We set to 'tgi' so that endpoint for HF can accept message format + this.model = "tgi"; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + if (!embedder) + console.warn( + "No embedding provider defined for HuggingFaceLLM - falling back to Native for embedding!" + ); + this.embedder = !embedder ? new OpenAiEmbedder() : new NativeEmbedder(); + this.defaultTemp = 0.2; + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + streamingEnabled() { + return "streamChat" in this && "streamGetChatCompletion" in this; + } + + promptWindowLimit() { + const limit = process.env.HUGGING_FACE_LLM_TOKEN_LIMIT || 4096; + if (!limit || isNaN(Number(limit))) + throw new Error("No HuggingFace token context limit was set."); + return Number(limit); + } + + async isValidChatCompletionModel(_ = "") { + return true; + } + + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + }) { + // System prompt it not enabled for HF model chats + const prompt = { + role: "user", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + const assistantResponse = { + role: "assistant", + content: "Okay, I will follow those instructions", + }; + return [ + prompt, + assistantResponse, + ...chatHistory, + { role: "user", content: userPrompt }, + ]; + } + + async isSafe(_input = "") { + // Not implemented so must be stubbed + return { safe: true, reasons: [] }; + } + + async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) { + const textResponse = await this.openai + .createChatCompletion({ + model: this.model, + temperature: Number(workspace?.openAiTemp ?? this.defaultTemp), + n: 1, + messages: await this.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: prompt, + chatHistory, + }, + rawHistory + ), + }) + .then((json) => { + const res = json.data; + if (!res.hasOwnProperty("choices")) + throw new Error("HuggingFace chat: No results!"); + if (res.choices.length === 0) + throw new Error("HuggingFace chat: No results length!"); + return res.choices[0].message.content; + }) + .catch((error) => { + throw new Error( + `HuggingFace::createChatCompletion failed with: ${error.message}` + ); + }); + + return textResponse; + } + + async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) { + const streamRequest = await this.openai.createChatCompletion( + { + model: this.model, + stream: true, + temperature: Number(workspace?.openAiTemp ?? this.defaultTemp), + n: 1, + messages: await this.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: prompt, + chatHistory, + }, + rawHistory + ), + }, + { responseType: "stream" } + ); + return { type: "huggingFaceStream", stream: streamRequest }; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + const { data } = await this.openai.createChatCompletion({ + model: this.model, + messages, + temperature, + }); + + if (!data.hasOwnProperty("choices")) return null; + return data.choices[0].message.content; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + const streamRequest = await this.openai.createChatCompletion( + { + model: this.model, + stream: true, + messages, + temperature, + }, + { responseType: "stream" } + ); + return { type: "huggingFaceStream", stream: streamRequest }; + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +module.exports = { + HuggingFaceLLM, +}; diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js index 1202ab1674f5764374691edc25b5773c106ad574..0ee448a5eef22f56ca2d29bd707c9ac48d904b8a 100644 --- a/server/utils/chats/stream.js +++ b/server/utils/chats/stream.js @@ -383,6 +383,112 @@ function handleStreamResponses(response, stream, responseProps) { }); } + if (stream.type === "huggingFaceStream") { + return new Promise((resolve) => { + let fullText = ""; + let chunk = ""; + stream.stream.data.on("data", (data) => { + const lines = data + ?.toString() + ?.split("\n") + .filter((line) => line.trim() !== ""); + + for (const line of lines) { + let validJSON = false; + const message = chunk + line.replace(/^data:/, ""); + if (message !== "[DONE]") { + // JSON chunk is incomplete and has not ended yet + // so we need to stitch it together. You would think JSON + // chunks would only come complete - but they don't! + try { + JSON.parse(message); + validJSON = true; + } catch { + console.log("Failed to parse message", message); + } + + if (!validJSON) { + // It can be possible that the chunk decoding is running away + // and the message chunk fails to append due to string length. + // In this case abort the chunk and reset so we can continue. + // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416 + try { + chunk += message; + } catch (e) { + console.error(`Chunk appending error`, e); + chunk = ""; + } + continue; + } else { + chunk = ""; + } + } + + if (message == "[DONE]") { + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: "", + close: true, + error: false, + }); + resolve(fullText); + } else { + let error = null; + let finishReason = null; + let token = ""; + try { + const json = JSON.parse(message); + error = json?.error || null; + token = json?.choices?.[0]?.delta?.content; + finishReason = json?.choices?.[0]?.finish_reason || null; + } catch { + continue; + } + + if (!!error) { + writeResponseChunk(response, { + uuid, + sources: [], + type: "textResponseChunk", + textResponse: null, + close: true, + error, + }); + resolve(""); + return; + } + + if (token) { + fullText += token; + writeResponseChunk(response, { + uuid, + sources: [], + type: "textResponseChunk", + textResponse: token, + close: false, + error: false, + }); + } + + if (finishReason !== null) { + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: "", + close: true, + error: false, + }); + resolve(fullText); + } + } + } + }); + }); + } + // If stream is not a regular OpenAI Stream (like if using native model, Ollama, or most LangChain interfaces) // we can just iterate the stream content instead. if (!stream.hasOwnProperty("data")) { diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 53a76faebf269ea7bbf4965261d932d1f83d7042..42ed262f952681807fbc70f7e4854eaad81c78e4 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -64,6 +64,9 @@ function getLLMProvider(modelPreference = null) { case "native": const { NativeLLM } = require("../AiProviders/native"); return new NativeLLM(embedder, modelPreference); + case "huggingface": + const { HuggingFaceLLM } = require("../AiProviders/huggingface"); + return new HuggingFaceLLM(embedder, modelPreference); default: throw new Error("ENV: No LLM_PROVIDER value found in environment!"); } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 50b423474a8c12327572520c884b259f835172f0..acd77b2fdb138683a03dfe0e8db68f41a73f1d16 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -95,6 +95,7 @@ const KEY_MAPPING = { checks: [nonZero], }, + // Mistral AI API Settings MistralApiKey: { envKey: "MISTRAL_API_KEY", checks: [isNotEmpty], @@ -109,12 +110,25 @@ const KEY_MAPPING = { envKey: "NATIVE_LLM_MODEL_PREF", checks: [isDownloadedModel], }, - NativeLLMTokenLimit: { envKey: "NATIVE_LLM_MODEL_TOKEN_LIMIT", checks: [nonZero], }, + // Hugging Face LLM Inference Settings + HuggingFaceLLMEndpoint: { + envKey: "HUGGING_FACE_LLM_ENDPOINT", + checks: [isNotEmpty, isValidURL, validHuggingFaceEndpoint], + }, + HuggingFaceLLMAccessToken: { + envKey: "HUGGING_FACE_LLM_API_KEY", + checks: [isNotEmpty], + }, + HuggingFaceLLMTokenLimit: { + envKey: "HUGGING_FACE_LLM_TOKEN_LIMIT", + checks: [nonZero], + }, + EmbeddingEngine: { envKey: "EMBEDDING_ENGINE", checks: [supportedEmbeddingModel], @@ -299,6 +313,7 @@ function supportedLLM(input = "") { "native", "togetherai", "mistral", + "huggingface", ].includes(input); return validSelection ? null : `${input} is not a valid LLM provider.`; } @@ -396,6 +411,12 @@ function validDockerizedUrl(input = "") { return null; } +function validHuggingFaceEndpoint(input = "") { + return input.slice(-6) !== ".cloud" + ? `Your HF Endpoint should end in ".cloud"` + : null; +} + // If the LLMProvider has changed we need to reset all workspace model preferences to // null since the provider<>model name combination will be invalid for whatever the new // provider is.