diff --git a/.changeset/eight-poems-refuse.md b/.changeset/eight-poems-refuse.md
new file mode 100644
index 0000000000000000000000000000000000000000..0d8f6b9852e3a2ee7cdb029de9e41d7d66170fa4
--- /dev/null
+++ b/.changeset/eight-poems-refuse.md
@@ -0,0 +1,6 @@
+---
+"docs": patch
+"llamaindex": patch
+---
+
+feat: DeepInfra LLM implementation
diff --git a/apps/docs/docs/modules/llms/available_llms/deepinfra.md b/apps/docs/docs/modules/llms/available_llms/deepinfra.md
new file mode 100644
index 0000000000000000000000000000000000000000..bef6625d88833844cb9ed96ea9ab0aa90a5ae7f3
--- /dev/null
+++ b/apps/docs/docs/modules/llms/available_llms/deepinfra.md
@@ -0,0 +1,83 @@
+# DeepInfra
+
+Check out available LLMs [here](https://deepinfra.com/models/text-generation).
+
+```ts
+import { DeepInfra, Settings } from "llamaindex";
+
+// Get the API key from `DEEPINFRA_API_TOKEN` environment variable
+import { config } from "dotenv";
+config();
+Settings.llm = new DeepInfra();
+
+// Set the API key
+apiKey = "YOUR_API_KEY";
+Settings.llm = new DeepInfra({ apiKey });
+```
+
+You can setup the apiKey on the environment variables, like:
+
+```bash
+export DEEPINFRA_API_TOKEN="<YOUR_API_KEY>"
+```
+
+## Load and index documents
+
+For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
+
+```ts
+const document = new Document({ text: essay, id_: "essay" });
+
+const index = await VectorStoreIndex.fromDocuments([document]);
+```
+
+## Query
+
+```ts
+const queryEngine = index.asQueryEngine();
+
+const query = "What is the meaning of life?";
+
+const results = await queryEngine.query({
+  query,
+});
+```
+
+## Full Example
+
+```ts
+import { DeepInfra, Document, VectorStoreIndex, Settings } from "llamaindex";
+
+// Use custom LLM
+const model = "meta-llama/Meta-Llama-3-8B-Instruct";
+Settings.llm = new DeepInfra({ model, temperature: 0 });
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+
+  // Load and index documents
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // get retriever
+  const retriever = index.asRetriever();
+
+  // Create a query engine
+  const queryEngine = index.asQueryEngine({
+    retriever,
+  });
+
+  const query = "What is the meaning of life?";
+
+  // Query
+  const response = await queryEngine.query({
+    query,
+  });
+
+  // Log the response
+  console.log(response.response);
+}
+```
+
+## Feedback
+
+If you have any feedback, please reach out to us at [feedback@deepinfra.com](mailto:feedback@deepinfra.com)
diff --git a/examples/deepinfra/chat.ts b/examples/deepinfra/chat.ts
new file mode 100644
index 0000000000000000000000000000000000000000..88bd2b1c18d91494ef50145fb8256fcfd38a4976
--- /dev/null
+++ b/examples/deepinfra/chat.ts
@@ -0,0 +1,19 @@
+import { DeepInfra } from "llamaindex";
+
+(async () => {
+  if (!process.env.DEEPINFRA_API_TOKEN) {
+    throw new Error("Please set the DEEPINFRA_API_TOKEN environment variable.");
+  }
+  const deepinfra = new DeepInfra({});
+  const result = await deepinfra.chat({
+    messages: [
+      { content: "You want to talk in rhymes.", role: "system" },
+      {
+        content:
+          "How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
+        role: "user",
+      },
+    ],
+  });
+  console.log(result);
+})();
diff --git a/packages/core/src/llm/deepinfra.ts b/packages/core/src/llm/deepinfra.ts
new file mode 100644
index 0000000000000000000000000000000000000000..9e2d9e2f8b20958552c07b23d907cc256f112d41
--- /dev/null
+++ b/packages/core/src/llm/deepinfra.ts
@@ -0,0 +1,33 @@
+import { getEnv } from "@llamaindex/env";
+import { OpenAI } from "./openai.js";
+
+const ENV_VARIABLE_NAME = "DEEPINFRA_API_TOKEN";
+const DEFAULT_MODEL = "mistralai/Mixtral-8x22B-Instruct-v0.1";
+const BASE_URL = "https://api.deepinfra.com/v1/openai";
+
+export class DeepInfra extends OpenAI {
+  constructor(init?: Partial<OpenAI>) {
+    const {
+      apiKey = getEnv(ENV_VARIABLE_NAME),
+      additionalSessionOptions = {},
+      model = DEFAULT_MODEL,
+      ...rest
+    } = init ?? {};
+
+    if (!apiKey) {
+      throw new Error(
+        `Set DeepInfra API key in ${ENV_VARIABLE_NAME} env variable`,
+      );
+    }
+
+    additionalSessionOptions.baseURL =
+      additionalSessionOptions.baseURL ?? BASE_URL;
+
+    super({
+      apiKey,
+      additionalSessionOptions,
+      model,
+      ...rest,
+    });
+  }
+}
diff --git a/packages/core/src/llm/index.ts b/packages/core/src/llm/index.ts
index dae1ca7473d03ae15612c1575200b0433ac9b7fa..123fb4fb41b6c00272d6e582aa9180d0bb67945f 100644
--- a/packages/core/src/llm/index.ts
+++ b/packages/core/src/llm/index.ts
@@ -25,6 +25,7 @@ export * from "./openai.js";
 export { Portkey } from "./portkey.js";
 export * from "./replicate_ai.js";
 // Note: The type aliases for replicate are to simplify usage for Llama 2 (we're using replicate for Llama 2 support)
+export { DeepInfra } from "./deepinfra.js";
 export { Ollama, type OllamaParams } from "./ollama.js";
 export {
   ALL_AVAILABLE_REPLICATE_MODELS,
@@ -34,5 +35,6 @@ export {
   ReplicateLLM,
   ReplicateSession,
 } from "./replicate_ai.js";
+
 export { TogetherLLM } from "./together.js";
 export * from "./types.js";