diff --git a/docker/.env.example b/docker/.env.example
index 4213c3ff507b90cf98da9859a3ff338083e9cf9c..b7674e91b34cd97002517b0939bbece01f049aab 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -48,6 +48,11 @@ GID='1000'
 # MISTRAL_API_KEY='example-mistral-ai-api-key'
 # MISTRAL_MODEL_PREF='mistral-tiny'
 
+# LLM_PROVIDER='huggingface'
+# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud
+# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx
+# HUGGING_FACE_LLM_TOKEN_LIMIT=8000
+
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
diff --git a/frontend/src/components/LLMSelection/HuggingFaceOptions/index.jsx b/frontend/src/components/LLMSelection/HuggingFaceOptions/index.jsx
new file mode 100644
index 0000000000000000000000000000000000000000..7e8747da1a02ae6025bc5422e6e036dd64d13aae
--- /dev/null
+++ b/frontend/src/components/LLMSelection/HuggingFaceOptions/index.jsx
@@ -0,0 +1,56 @@
+export default function HuggingFaceOptions({ settings }) {
+  return (
+    <div className="w-full flex flex-col">
+      <div className="w-full flex items-center gap-4">
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            HuggingFace Inference Endpoint
+          </label>
+          <input
+            type="url"
+            name="HuggingFaceLLMEndpoint"
+            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
+            placeholder="https://example.endpoints.huggingface.cloud"
+            defaultValue={settings?.HuggingFaceLLMEndpoint}
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+        </div>
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            HuggingFace Access Token
+          </label>
+          <input
+            type="password"
+            name="HuggingFaceLLMAccessToken"
+            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
+            placeholder="HuggingFace Access Token"
+            defaultValue={
+              settings?.HuggingFaceLLMAccessToken ? "*".repeat(20) : ""
+            }
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+        </div>
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-4">
+            Model Token Limit
+          </label>
+          <input
+            type="number"
+            name="HuggingFaceLLMTokenLimit"
+            className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
+            placeholder="4096"
+            min={1}
+            onScroll={(e) => e.target.blur()}
+            defaultValue={settings?.HuggingFaceLLMTokenLimit}
+            required={true}
+            autoComplete="off"
+          />
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/media/llmprovider/huggingface.png b/frontend/src/media/llmprovider/huggingface.png
new file mode 100644
index 0000000000000000000000000000000000000000..b1cdac08609e581c136aa60d2eaca1df4eec741e
Binary files /dev/null and b/frontend/src/media/llmprovider/huggingface.png differ
diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
index 1efa818d3e900e60dd75cdf2dfebcd03b7067c15..ac9ab71f8b92894b407689f18ab9f34ce476bf5a 100644
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@@ -13,6 +13,7 @@ import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import TogetherAILogo from "@/media/llmprovider/togetherai.png";
 import MistralLogo from "@/media/llmprovider/mistral.jpeg";
+import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import PreLoader from "@/components/Preloader";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
@@ -24,6 +25,7 @@ import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
 import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
 import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions";
 import MistralOptions from "@/components/LLMSelection/MistralOptions";
+import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions";
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import { MagnifyingGlass } from "@phosphor-icons/react";
 
@@ -107,6 +109,14 @@ export default function GeneralLLMPreference() {
       options: <GeminiLLMOptions settings={settings} />,
       description: "Google's largest and most capable AI model",
     },
+    {
+      name: "HuggingFace",
+      value: "huggingface",
+      logo: HuggingFaceLogo,
+      options: <HuggingFaceOptions settings={settings} />,
+      description:
+        "Access 150,000+ open-source LLMs and the world's AI community",
+    },
     {
       name: "Ollama",
       value: "ollama",
diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
index 60a3b6da4d5c1b2792899750900c4bff108042a8..c86a62a4399d2b99680eb3bb2be4091ba377398b 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@@ -10,6 +10,7 @@ import TogetherAILogo from "@/media/llmprovider/togetherai.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import MistralLogo from "@/media/llmprovider/mistral.jpeg";
+import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import ZillizLogo from "@/media/vectordbs/zilliz.png";
 import AstraDBLogo from "@/media/vectordbs/astraDB.png";
 import ChromaLogo from "@/media/vectordbs/chroma.png";
@@ -101,6 +102,13 @@ const LLM_SELECTION_PRIVACY = {
     ],
     logo: MistralLogo,
   },
+  huggingface: {
+    name: "HuggingFace",
+    description: [
+      "Your prompts and document text used in response are sent to your HuggingFace managed endpoint",
+    ],
+    logo: HuggingFaceLogo,
+  },
 };
 
 const VECTOR_DB_PRIVACY = {
diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
index 9e8ab84a908947e834b9bcf6067b5d42fbe8d1f6..6970dfa1ff9fa550d4ec350963dfb23293ce867d 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@@ -10,6 +10,7 @@ import LocalAiLogo from "@/media/llmprovider/localai.png";
 import TogetherAILogo from "@/media/llmprovider/togetherai.png";
 import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
 import MistralLogo from "@/media/llmprovider/mistral.jpeg";
+import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
 import AnthropicAiOptions from "@/components/LLMSelection/AnthropicAiOptions";
@@ -19,6 +20,7 @@ import NativeLLMOptions from "@/components/LLMSelection/NativeLLMOptions";
 import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
 import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
 import MistralOptions from "@/components/LLMSelection/MistralOptions";
+import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions";
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import System from "@/models/system";
 import paths from "@/utils/paths";
@@ -82,6 +84,14 @@ export default function LLMPreference({
       options: <GeminiLLMOptions settings={settings} />,
       description: "Google's largest and most capable AI model",
     },
+    {
+      name: "HuggingFace",
+      value: "huggingface",
+      logo: HuggingFaceLogo,
+      options: <HuggingFaceOptions settings={settings} />,
+      description:
+        "Access 150,000+ open-source LLMs and the world's AI community",
+    },
     {
       name: "Ollama",
       value: "ollama",
diff --git a/server/.env.example b/server/.env.example
index 96cbd58aec52cae8ec53796d69b7c8ba02985881..ec6abcac9b85d82939f17fa0c2f60618b73c7e37 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -45,6 +45,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
 # MISTRAL_API_KEY='example-mistral-ai-api-key'
 # MISTRAL_MODEL_PREF='mistral-tiny'
 
+# LLM_PROVIDER='huggingface'
+# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud
+# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx
+# HUGGING_FACE_LLM_TOKEN_LIMIT=8000
+
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index b8c46524cfaff11fb8b34f5fe73bb10ae0b5105d..abb930127a7d2c2d9600f6238902b63931f64f40 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -194,6 +194,20 @@ const SystemSettings = {
             AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
           }
         : {}),
+
+      ...(llmProvider === "huggingface"
+        ? {
+            HuggingFaceLLMEndpoint: process.env.HUGGING_FACE_LLM_ENDPOINT,
+            HuggingFaceLLMAccessToken: !!process.env.HUGGING_FACE_LLM_API_KEY,
+            HuggingFaceLLMTokenLimit: process.env.HUGGING_FACE_LLM_TOKEN_LIMIT,
+
+            // For embedding credentials when Anthropic is selected.
+            OpenAiKey: !!process.env.OPEN_AI_KEY,
+            AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
+            AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
+            AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
+          }
+        : {}),
     };
   },
 
diff --git a/server/utils/AiProviders/huggingface/index.js b/server/utils/AiProviders/huggingface/index.js
new file mode 100644
index 0000000000000000000000000000000000000000..4faf9b30f09f92e3219643e758badf3fbd8c708c
--- /dev/null
+++ b/server/utils/AiProviders/huggingface/index.js
@@ -0,0 +1,185 @@
+const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
+const { chatPrompt } = require("../../chats");
+
+class HuggingFaceLLM {
+  constructor(embedder = null, _modelPreference = null) {
+    const { Configuration, OpenAIApi } = require("openai");
+    if (!process.env.HUGGING_FACE_LLM_ENDPOINT)
+      throw new Error("No HuggingFace Inference Endpoint was set.");
+    if (!process.env.HUGGING_FACE_LLM_API_KEY)
+      throw new Error("No HuggingFace Access Token was set.");
+
+    const config = new Configuration({
+      basePath: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`,
+      apiKey: process.env.HUGGING_FACE_LLM_API_KEY,
+    });
+    this.openai = new OpenAIApi(config);
+    // When using HF inference server - the model param is not required so
+    // we can stub it here. HF Endpoints can only run one model at a time.
+    // We set to 'tgi' so that endpoint for HF can accept message format
+    this.model = "tgi";
+    this.limits = {
+      history: this.promptWindowLimit() * 0.15,
+      system: this.promptWindowLimit() * 0.15,
+      user: this.promptWindowLimit() * 0.7,
+    };
+
+    if (!embedder)
+      console.warn(
+        "No embedding provider defined for HuggingFaceLLM - falling back to Native for embedding!"
+      );
+    this.embedder = !embedder ? new OpenAiEmbedder() : new NativeEmbedder();
+    this.defaultTemp = 0.2;
+  }
+
+  #appendContext(contextTexts = []) {
+    if (!contextTexts || !contextTexts.length) return "";
+    return (
+      "\nContext:\n" +
+      contextTexts
+        .map((text, i) => {
+          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+        })
+        .join("")
+    );
+  }
+
+  streamingEnabled() {
+    return "streamChat" in this && "streamGetChatCompletion" in this;
+  }
+
+  promptWindowLimit() {
+    const limit = process.env.HUGGING_FACE_LLM_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No HuggingFace token context limit was set.");
+    return Number(limit);
+  }
+
+  async isValidChatCompletionModel(_ = "") {
+    return true;
+  }
+
+  constructPrompt({
+    systemPrompt = "",
+    contextTexts = [],
+    chatHistory = [],
+    userPrompt = "",
+  }) {
+    // System prompt it not enabled for HF model chats
+    const prompt = {
+      role: "user",
+      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+    };
+    const assistantResponse = {
+      role: "assistant",
+      content: "Okay, I will follow those instructions",
+    };
+    return [
+      prompt,
+      assistantResponse,
+      ...chatHistory,
+      { role: "user", content: userPrompt },
+    ];
+  }
+
+  async isSafe(_input = "") {
+    // Not implemented so must be stubbed
+    return { safe: true, reasons: [] };
+  }
+
+  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
+    const textResponse = await this.openai
+      .createChatCompletion({
+        model: this.model,
+        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+        n: 1,
+        messages: await this.compressMessages(
+          {
+            systemPrompt: chatPrompt(workspace),
+            userPrompt: prompt,
+            chatHistory,
+          },
+          rawHistory
+        ),
+      })
+      .then((json) => {
+        const res = json.data;
+        if (!res.hasOwnProperty("choices"))
+          throw new Error("HuggingFace chat: No results!");
+        if (res.choices.length === 0)
+          throw new Error("HuggingFace chat: No results length!");
+        return res.choices[0].message.content;
+      })
+      .catch((error) => {
+        throw new Error(
+          `HuggingFace::createChatCompletion failed with: ${error.message}`
+        );
+      });
+
+    return textResponse;
+  }
+
+  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
+    const streamRequest = await this.openai.createChatCompletion(
+      {
+        model: this.model,
+        stream: true,
+        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+        n: 1,
+        messages: await this.compressMessages(
+          {
+            systemPrompt: chatPrompt(workspace),
+            userPrompt: prompt,
+            chatHistory,
+          },
+          rawHistory
+        ),
+      },
+      { responseType: "stream" }
+    );
+    return { type: "huggingFaceStream", stream: streamRequest };
+  }
+
+  async getChatCompletion(messages = null, { temperature = 0.7 }) {
+    const { data } = await this.openai.createChatCompletion({
+      model: this.model,
+      messages,
+      temperature,
+    });
+
+    if (!data.hasOwnProperty("choices")) return null;
+    return data.choices[0].message.content;
+  }
+
+  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+    const streamRequest = await this.openai.createChatCompletion(
+      {
+        model: this.model,
+        stream: true,
+        messages,
+        temperature,
+      },
+      { responseType: "stream" }
+    );
+    return { type: "huggingFaceStream", stream: streamRequest };
+  }
+
+  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+  async embedTextInput(textInput) {
+    return await this.embedder.embedTextInput(textInput);
+  }
+  async embedChunks(textChunks = []) {
+    return await this.embedder.embedChunks(textChunks);
+  }
+
+  async compressMessages(promptArgs = {}, rawHistory = []) {
+    const { messageArrayCompressor } = require("../../helpers/chat");
+    const messageArray = this.constructPrompt(promptArgs);
+    return await messageArrayCompressor(this, messageArray, rawHistory);
+  }
+}
+
+module.exports = {
+  HuggingFaceLLM,
+};
diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js
index 1202ab1674f5764374691edc25b5773c106ad574..0ee448a5eef22f56ca2d29bd707c9ac48d904b8a 100644
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@@ -383,6 +383,112 @@ function handleStreamResponses(response, stream, responseProps) {
     });
   }
 
+  if (stream.type === "huggingFaceStream") {
+    return new Promise((resolve) => {
+      let fullText = "";
+      let chunk = "";
+      stream.stream.data.on("data", (data) => {
+        const lines = data
+          ?.toString()
+          ?.split("\n")
+          .filter((line) => line.trim() !== "");
+
+        for (const line of lines) {
+          let validJSON = false;
+          const message = chunk + line.replace(/^data:/, "");
+          if (message !== "[DONE]") {
+            // JSON chunk is incomplete and has not ended yet
+            // so we need to stitch it together. You would think JSON
+            // chunks would only come complete - but they don't!
+            try {
+              JSON.parse(message);
+              validJSON = true;
+            } catch {
+              console.log("Failed to parse message", message);
+            }
+
+            if (!validJSON) {
+              // It can be possible that the chunk decoding is running away
+              // and the message chunk fails to append due to string length.
+              // In this case abort the chunk and reset so we can continue.
+              // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
+              try {
+                chunk += message;
+              } catch (e) {
+                console.error(`Chunk appending error`, e);
+                chunk = "";
+              }
+              continue;
+            } else {
+              chunk = "";
+            }
+          }
+
+          if (message == "[DONE]") {
+            writeResponseChunk(response, {
+              uuid,
+              sources,
+              type: "textResponseChunk",
+              textResponse: "",
+              close: true,
+              error: false,
+            });
+            resolve(fullText);
+          } else {
+            let error = null;
+            let finishReason = null;
+            let token = "";
+            try {
+              const json = JSON.parse(message);
+              error = json?.error || null;
+              token = json?.choices?.[0]?.delta?.content;
+              finishReason = json?.choices?.[0]?.finish_reason || null;
+            } catch {
+              continue;
+            }
+
+            if (!!error) {
+              writeResponseChunk(response, {
+                uuid,
+                sources: [],
+                type: "textResponseChunk",
+                textResponse: null,
+                close: true,
+                error,
+              });
+              resolve("");
+              return;
+            }
+
+            if (token) {
+              fullText += token;
+              writeResponseChunk(response, {
+                uuid,
+                sources: [],
+                type: "textResponseChunk",
+                textResponse: token,
+                close: false,
+                error: false,
+              });
+            }
+
+            if (finishReason !== null) {
+              writeResponseChunk(response, {
+                uuid,
+                sources,
+                type: "textResponseChunk",
+                textResponse: "",
+                close: true,
+                error: false,
+              });
+              resolve(fullText);
+            }
+          }
+        }
+      });
+    });
+  }
+
   // If stream is not a regular OpenAI Stream (like if using native model, Ollama, or most LangChain interfaces)
   // we can just iterate the stream content instead.
   if (!stream.hasOwnProperty("data")) {
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index 53a76faebf269ea7bbf4965261d932d1f83d7042..42ed262f952681807fbc70f7e4854eaad81c78e4 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -64,6 +64,9 @@ function getLLMProvider(modelPreference = null) {
     case "native":
       const { NativeLLM } = require("../AiProviders/native");
       return new NativeLLM(embedder, modelPreference);
+    case "huggingface":
+      const { HuggingFaceLLM } = require("../AiProviders/huggingface");
+      return new HuggingFaceLLM(embedder, modelPreference);
     default:
       throw new Error("ENV: No LLM_PROVIDER value found in environment!");
   }
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index 50b423474a8c12327572520c884b259f835172f0..acd77b2fdb138683a03dfe0e8db68f41a73f1d16 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -95,6 +95,7 @@ const KEY_MAPPING = {
     checks: [nonZero],
   },
 
+  // Mistral AI API Settings
   MistralApiKey: {
     envKey: "MISTRAL_API_KEY",
     checks: [isNotEmpty],
@@ -109,12 +110,25 @@ const KEY_MAPPING = {
     envKey: "NATIVE_LLM_MODEL_PREF",
     checks: [isDownloadedModel],
   },
-
   NativeLLMTokenLimit: {
     envKey: "NATIVE_LLM_MODEL_TOKEN_LIMIT",
     checks: [nonZero],
   },
 
+  // Hugging Face LLM Inference Settings
+  HuggingFaceLLMEndpoint: {
+    envKey: "HUGGING_FACE_LLM_ENDPOINT",
+    checks: [isNotEmpty, isValidURL, validHuggingFaceEndpoint],
+  },
+  HuggingFaceLLMAccessToken: {
+    envKey: "HUGGING_FACE_LLM_API_KEY",
+    checks: [isNotEmpty],
+  },
+  HuggingFaceLLMTokenLimit: {
+    envKey: "HUGGING_FACE_LLM_TOKEN_LIMIT",
+    checks: [nonZero],
+  },
+
   EmbeddingEngine: {
     envKey: "EMBEDDING_ENGINE",
     checks: [supportedEmbeddingModel],
@@ -299,6 +313,7 @@ function supportedLLM(input = "") {
     "native",
     "togetherai",
     "mistral",
+    "huggingface",
   ].includes(input);
   return validSelection ? null : `${input} is not a valid LLM provider.`;
 }
@@ -396,6 +411,12 @@ function validDockerizedUrl(input = "") {
   return null;
 }
 
+function validHuggingFaceEndpoint(input = "") {
+  return input.slice(-6) !== ".cloud"
+    ? `Your HF Endpoint should end in ".cloud"`
+    : null;
+}
+
 // If the LLMProvider has changed we need to reset all workspace model preferences to
 // null since the provider<>model name combination will be invalid for whatever the new
 // provider is.