diff --git a/.changeset/early-crabs-nail.md b/.changeset/early-crabs-nail.md
new file mode 100644
index 0000000000000000000000000000000000000000..7978fc549d5a1c52d89b9537c470bf1f5620d113
--- /dev/null
+++ b/.changeset/early-crabs-nail.md
@@ -0,0 +1,8 @@
+---
+"llamaindex": patch
+"@llamaindex/openai": patch
+---
+
+feat: decouple openai from `llamaindex` module
+
+This should be a non-breaking change, but just you can now only install `@llamaindex/openai` to reduce the bundle size in the future
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6325a1a6017ed6c05a3cf8b6e8a9e29e1849bb96..2e48f5e4b8e98042ebadf8c5b9c982d2cae4435c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -142,6 +142,9 @@ jobs:
       - name: Pack @llamaindex/cloud
         run: pnpm pack --pack-destination ${{ runner.temp }}
         working-directory: packages/cloud
+      - name: Pack @llamaindex/openai
+        run: pnpm pack --pack-destination ${{ runner.temp }}
+        working-directory: packages/llm/openai
       - name: Pack @llamaindex/core
         run: pnpm pack --pack-destination ${{ runner.temp }}
         working-directory: packages/core
diff --git a/packages/llamaindex/e2e/fixtures/embeddings/OpenAIEmbedding.ts b/packages/llamaindex/e2e/fixtures/embeddings/OpenAIEmbedding.ts
deleted file mode 100644
index 2ff111e2e05b4532a924c5184f558557df6c33f7..0000000000000000000000000000000000000000
--- a/packages/llamaindex/e2e/fixtures/embeddings/OpenAIEmbedding.ts
+++ /dev/null
@@ -1,51 +0,0 @@
-import { TransformComponent } from "@llamaindex/core/schema";
-import {
-  BaseEmbedding,
-  BaseNode,
-  SimilarityType,
-  type EmbeddingInfo,
-  type MessageContentDetail,
-} from "llamaindex";
-
-export class OpenAIEmbedding
-  extends TransformComponent
-  implements BaseEmbedding
-{
-  embedInfo?: EmbeddingInfo;
-  embedBatchSize = 512;
-
-  constructor() {
-    super(async (nodes: BaseNode[], _options?: any): Promise<BaseNode[]> => {
-      nodes.forEach((node) => (node.embedding = [0]));
-      return nodes;
-    });
-  }
-
-  async getQueryEmbedding(query: MessageContentDetail) {
-    return [0];
-  }
-
-  async getTextEmbedding(text: string) {
-    return [0];
-  }
-
-  async getTextEmbeddings(texts: string[]) {
-    return [[0]];
-  }
-
-  async getTextEmbeddingsBatch(texts: string[]) {
-    return [[0]];
-  }
-
-  similarity(
-    embedding1: number[],
-    embedding2: number[],
-    mode?: SimilarityType,
-  ) {
-    return 1;
-  }
-
-  truncateMaxTokens(input: string[]): string[] {
-    return input;
-  }
-}
diff --git a/packages/llamaindex/e2e/fixtures/llm/openai.ts b/packages/llamaindex/e2e/fixtures/llm/openai.ts
index 2b54919f5e5ad7f70762770886a3f4d7ead2fd66..10fd90c6453dc9ccd5dbe84ba932923bf2e4ddaf 100644
--- a/packages/llamaindex/e2e/fixtures/llm/openai.ts
+++ b/packages/llamaindex/e2e/fixtures/llm/openai.ts
@@ -12,6 +12,15 @@ import type {
 import { deepStrictEqual, strictEqual } from "node:assert";
 import { llmCompleteMockStorage } from "../../node/utils.js";
 
+import { TransformComponent } from "@llamaindex/core/schema";
+import {
+  BaseEmbedding,
+  BaseNode,
+  SimilarityType,
+  type EmbeddingInfo,
+  type MessageContentDetail,
+} from "llamaindex";
+
 export function getOpenAISession() {
   return {};
 }
@@ -22,6 +31,7 @@ export function isFunctionCallingModel() {
 
 export class OpenAI implements LLM {
   supportToolCall = true;
+
   get metadata() {
     return {
       model: "mock-model",
@@ -32,6 +42,7 @@ export class OpenAI implements LLM {
       isFunctionCallingModel: true,
     };
   }
+
   chat(
     params: LLMChatParamsStreaming<Record<string, unknown>>,
   ): Promise<AsyncIterable<ChatResponseChunk>>;
@@ -77,6 +88,7 @@ export class OpenAI implements LLM {
     }
     throw new Error("Method not implemented.");
   }
+
   complete(
     params: LLMCompletionParamsStreaming,
   ): Promise<AsyncIterable<CompletionResponse>>;
@@ -103,3 +115,46 @@ export class OpenAI implements LLM {
     throw new Error("Method not implemented.");
   }
 }
+
+export class OpenAIEmbedding
+  extends TransformComponent
+  implements BaseEmbedding
+{
+  embedInfo?: EmbeddingInfo;
+  embedBatchSize = 512;
+
+  constructor() {
+    super(async (nodes: BaseNode[], _options?: any): Promise<BaseNode[]> => {
+      nodes.forEach((node) => (node.embedding = [0]));
+      return nodes;
+    });
+  }
+
+  async getQueryEmbedding(query: MessageContentDetail) {
+    return [0];
+  }
+
+  async getTextEmbedding(text: string) {
+    return [0];
+  }
+
+  async getTextEmbeddings(texts: string[]) {
+    return [[0]];
+  }
+
+  async getTextEmbeddingsBatch(texts: string[]) {
+    return [[0]];
+  }
+
+  similarity(
+    embedding1: number[],
+    embedding2: number[],
+    mode?: SimilarityType,
+  ) {
+    return 1;
+  }
+
+  truncateMaxTokens(input: string[]): string[] {
+    return input;
+  }
+}
diff --git a/packages/llamaindex/e2e/mock-module.js b/packages/llamaindex/e2e/mock-module.js
index d32e8f1878f9aca65f8ca8d3171a36ddbc3d4f0a..fe6cd62629c8c21d1f601df8d0ec295db16a44f2 100644
--- a/packages/llamaindex/e2e/mock-module.js
+++ b/packages/llamaindex/e2e/mock-module.js
@@ -13,8 +13,14 @@ export async function resolve(specifier, context, nextResolve) {
     return result;
   }
   const targetUrl = fileURLToPath(result.url).replace(/\.js$/, ".ts");
-  const relativePath = relative(packageDistDir, targetUrl);
-  if (relativePath.startsWith(".") || relativePath.startsWith("/")) {
+  let relativePath = relative(packageDistDir, targetUrl);
+  // todo: make it more generic if we have more sub modules fixtures in the future
+  if (relativePath.startsWith("../../llm/openai")) {
+    relativePath = relativePath.replace(
+      "../../llm/openai/dist/index.ts",
+      "llm/openai.ts",
+    );
+  } else if (relativePath.startsWith(".") || relativePath.startsWith("/")) {
     return result;
   }
   const url = pathToFileURL(join(fixturesDir, relativePath)).toString();
diff --git a/packages/llamaindex/e2e/package.json b/packages/llamaindex/e2e/package.json
index 971b369f1c85b46b3bc41022f63ddd05d467d9e9..0f3f588ea45521bd8e684ae50c9be139beca83df 100644
--- a/packages/llamaindex/e2e/package.json
+++ b/packages/llamaindex/e2e/package.json
@@ -10,7 +10,6 @@
   },
   "devDependencies": {
     "@faker-js/faker": "^8.4.1",
-    "@llamaindex/core": "workspace:*",
     "@types/node": "^22.5.1",
     "consola": "^3.2.3",
     "llamaindex": "workspace:*",
diff --git a/packages/llamaindex/package.json b/packages/llamaindex/package.json
index 711907fd19b03e5491c4a89dc14460bdf3ffb3ea..436ded68465bc0af64d34de78de7f5a7a2b89ffb 100644
--- a/packages/llamaindex/package.json
+++ b/packages/llamaindex/package.json
@@ -33,6 +33,7 @@
     "@llamaindex/cloud": "workspace:*",
     "@llamaindex/core": "workspace:*",
     "@llamaindex/env": "workspace:*",
+    "@llamaindex/openai": "workspace:*",
     "@mistralai/mistralai": "^1.0.4",
     "@mixedbread-ai/sdk": "^2.2.11",
     "@pinecone-database/pinecone": "^3.0.2",
diff --git a/packages/llamaindex/src/ChatHistory.ts b/packages/llamaindex/src/ChatHistory.ts
index f681e5261eb88762305a8ec78c9c65d1c26d86f7..4bbd44e4b491a85da240b6c91bcb1876cdaa0d40 100644
--- a/packages/llamaindex/src/ChatHistory.ts
+++ b/packages/llamaindex/src/ChatHistory.ts
@@ -5,7 +5,7 @@ import {
 } from "@llamaindex/core/prompts";
 import { extractText, messagesToHistory } from "@llamaindex/core/utils";
 import { tokenizers, type Tokenizer } from "@llamaindex/env";
-import { OpenAI } from "./llm/openai.js";
+import { OpenAI } from "@llamaindex/openai";
 
 /**
  * A ChatHistory is used to keep the state of back and forth chat messages
diff --git a/packages/llamaindex/src/QuestionGenerator.ts b/packages/llamaindex/src/QuestionGenerator.ts
index 47ea6001eea4dc4dc1ae0582845723c2fe3b49d7..0f69fdfb843a562f346c9ddd40a5dd938efbec0c 100644
--- a/packages/llamaindex/src/QuestionGenerator.ts
+++ b/packages/llamaindex/src/QuestionGenerator.ts
@@ -8,12 +8,12 @@ import {
 import type { QueryType } from "@llamaindex/core/query-engine";
 import type { BaseOutputParser } from "@llamaindex/core/schema";
 import { extractText, toToolDescriptions } from "@llamaindex/core/utils";
+import { OpenAI } from "@llamaindex/openai";
 import { SubQuestionOutputParser } from "./OutputParser.js";
 import type {
   BaseQuestionGenerator,
   SubQuestion,
 } from "./engines/query/types.js";
-import { OpenAI } from "./llm/openai.js";
 import type { StructuredOutput } from "./types.js";
 
 /**
diff --git a/packages/llamaindex/src/ServiceContext.ts b/packages/llamaindex/src/ServiceContext.ts
index c6c1885f295dc3af95b3e4046b5a1b7319c4a4a9..a185423fbdcb68759c9049dbea9514322f82fd89 100644
--- a/packages/llamaindex/src/ServiceContext.ts
+++ b/packages/llamaindex/src/ServiceContext.ts
@@ -5,8 +5,7 @@ import {
   type NodeParser,
   SentenceSplitter,
 } from "@llamaindex/core/node-parser";
-import { OpenAIEmbedding } from "./embeddings/OpenAIEmbedding.js";
-import { OpenAI } from "./llm/openai.js";
+import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
 
 /**
  * The ServiceContext is a collection of components that are used in different parts of the application.
diff --git a/packages/llamaindex/src/Settings.ts b/packages/llamaindex/src/Settings.ts
index a4acce25d429fbb24a5af24d3b1daa2e4bb1f946..a39fc3b1ab05c427b6e264df68b793556eef8e1d 100644
--- a/packages/llamaindex/src/Settings.ts
+++ b/packages/llamaindex/src/Settings.ts
@@ -2,7 +2,7 @@ import {
   type CallbackManager,
   Settings as CoreSettings,
 } from "@llamaindex/core/global";
-import { OpenAI } from "./llm/openai.js";
+import { OpenAI } from "@llamaindex/openai";
 
 import { PromptHelper } from "@llamaindex/core/indices";
 
diff --git a/packages/llamaindex/src/agent/openai.ts b/packages/llamaindex/src/agent/openai.ts
index a85fb4c5a1719984dddaaca8cdf0d66c55897c5b..8cd84bc675ff3972df36eec3b91d6fa158686f31 100644
--- a/packages/llamaindex/src/agent/openai.ts
+++ b/packages/llamaindex/src/agent/openai.ts
@@ -1,5 +1,5 @@
+import { OpenAI } from "@llamaindex/openai";
 import { Settings } from "../Settings.js";
-import { OpenAI } from "../llm/openai.js";
 import { LLMAgent, LLMAgentWorker, type LLMAgentParams } from "./llm.js";
 
 // This is likely not necessary anymore but leaving it here just incase it's in use elsewhere
diff --git a/packages/llamaindex/src/cloud/LlamaCloudIndex.ts b/packages/llamaindex/src/cloud/LlamaCloudIndex.ts
index 7e83ab1360501cf170eea56aa2143cfd7cb6ecd3..14ddb6f8de3884807b08d50f7d48fb4c1f2b8f95 100644
--- a/packages/llamaindex/src/cloud/LlamaCloudIndex.ts
+++ b/packages/llamaindex/src/cloud/LlamaCloudIndex.ts
@@ -13,8 +13,8 @@ import { getAppBaseUrl, getProjectId, initService } from "./utils.js";
 import { PipelinesService, ProjectsService } from "@llamaindex/cloud/api";
 import { SentenceSplitter } from "@llamaindex/core/node-parser";
 import { getEnv } from "@llamaindex/env";
+import { OpenAIEmbedding } from "@llamaindex/openai";
 import { Settings } from "../Settings.js";
-import { OpenAIEmbedding } from "../embeddings/OpenAIEmbedding.js";
 
 export class LlamaCloudIndex {
   params: CloudConstructorParams;
diff --git a/packages/llamaindex/src/cloud/config.ts b/packages/llamaindex/src/cloud/config.ts
index 0a562fd58662c94c584d4f3c6fd56a2c6d2ef2c8..5b3b7a48e38f101943fbc22a98d8c0e037b7d575 100644
--- a/packages/llamaindex/src/cloud/config.ts
+++ b/packages/llamaindex/src/cloud/config.ts
@@ -5,7 +5,7 @@ import type {
 } from "@llamaindex/cloud/api";
 import { SentenceSplitter } from "@llamaindex/core/node-parser";
 import { BaseNode, type TransformComponent } from "@llamaindex/core/schema";
-import { OpenAIEmbedding } from "../embeddings/OpenAIEmbedding.js";
+import { OpenAIEmbedding } from "@llamaindex/openai";
 
 export type GetPipelineCreateParams = {
   pipelineName: string;
diff --git a/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts b/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts
index 72386410e1dfd3a97568bf31c6f709ffcaaa0e55..02781efcb42bde6e5598eb279af193c85ec9158f 100644
--- a/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts
+++ b/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts
@@ -1,152 +1 @@
-import { BaseEmbedding } from "@llamaindex/core/embeddings";
-import { Tokenizers } from "@llamaindex/env";
-import type { ClientOptions as OpenAIClientOptions } from "openai";
-import type { AzureOpenAIConfig } from "../llm/azure.js";
-import {
-  getAzureConfigFromEnv,
-  getAzureModel,
-  shouldUseAzure,
-} from "../llm/azure.js";
-import type { OpenAISession } from "../llm/openai.js";
-import { getOpenAISession } from "../llm/openai.js";
-
-export const ALL_OPENAI_EMBEDDING_MODELS = {
-  "text-embedding-ada-002": {
-    dimensions: 1536,
-    maxTokens: 8192,
-    tokenizer: Tokenizers.CL100K_BASE,
-  },
-  "text-embedding-3-small": {
-    dimensions: 1536,
-    dimensionOptions: [512, 1536],
-    maxTokens: 8192,
-    tokenizer: Tokenizers.CL100K_BASE,
-  },
-  "text-embedding-3-large": {
-    dimensions: 3072,
-    dimensionOptions: [256, 1024, 3072],
-    maxTokens: 8192,
-    tokenizer: Tokenizers.CL100K_BASE,
-  },
-};
-
-type ModelKeys = keyof typeof ALL_OPENAI_EMBEDDING_MODELS;
-
-export class OpenAIEmbedding extends BaseEmbedding {
-  /** embeddding model. defaults to "text-embedding-ada-002" */
-  model: string;
-  /** number of dimensions of the resulting vector, for models that support choosing fewer dimensions. undefined will default to model default */
-  dimensions?: number | undefined;
-
-  // OpenAI session params
-
-  /** api key */
-  apiKey?: string | undefined = undefined;
-  /** maximum number of retries, default 10 */
-  maxRetries: number;
-  /** timeout in ms, default 60 seconds  */
-  timeout?: number | undefined;
-  /** other session options for OpenAI */
-  additionalSessionOptions?:
-    | Omit<Partial<OpenAIClientOptions>, "apiKey" | "maxRetries" | "timeout">
-    | undefined;
-
-  /** session object */
-  session: OpenAISession;
-
-  /**
-   * OpenAI Embedding
-   * @param init - initial parameters
-   */
-  constructor(init?: Partial<OpenAIEmbedding> & { azure?: AzureOpenAIConfig }) {
-    super();
-
-    this.model = init?.model ?? "text-embedding-ada-002";
-    this.dimensions = init?.dimensions; // if no dimensions provided, will be undefined/not sent to OpenAI
-
-    this.embedBatchSize = init?.embedBatchSize ?? 10;
-    this.maxRetries = init?.maxRetries ?? 10;
-
-    this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
-    this.additionalSessionOptions = init?.additionalSessionOptions;
-
-    // find metadata for model
-    const key = Object.keys(ALL_OPENAI_EMBEDDING_MODELS).find(
-      (key) => key === this.model,
-    ) as ModelKeys | undefined;
-    if (key) {
-      this.embedInfo = ALL_OPENAI_EMBEDDING_MODELS[key];
-    }
-
-    if (init?.azure || shouldUseAzure()) {
-      const azureConfig = {
-        ...getAzureConfigFromEnv({
-          model: getAzureModel(this.model),
-        }),
-        ...init?.azure,
-      };
-
-      this.apiKey = azureConfig.apiKey;
-      this.session =
-        init?.session ??
-        getOpenAISession({
-          azure: true,
-          maxRetries: this.maxRetries,
-          timeout: this.timeout,
-          ...this.additionalSessionOptions,
-          ...azureConfig,
-        });
-    } else {
-      this.apiKey = init?.apiKey ?? undefined;
-      this.session =
-        init?.session ??
-        getOpenAISession({
-          apiKey: this.apiKey,
-          maxRetries: this.maxRetries,
-          timeout: this.timeout,
-          ...this.additionalSessionOptions,
-        });
-    }
-  }
-
-  /**
-   * Get embeddings for a batch of texts
-   * @param texts
-   * @param options
-   */
-  private async getOpenAIEmbedding(input: string[]): Promise<number[][]> {
-    // TODO: ensure this for every sub class by calling it in the base class
-    input = this.truncateMaxTokens(input);
-
-    const { data } = await this.session.openai.embeddings.create(
-      this.dimensions
-        ? {
-            model: this.model,
-            dimensions: this.dimensions, // only sent to OpenAI if set by user
-            input,
-          }
-        : {
-            model: this.model,
-            input,
-          },
-    );
-
-    return data.map((d) => d.embedding);
-  }
-
-  /**
-   * Get embeddings for a batch of texts
-   * @param texts
-   */
-  getTextEmbeddings = async (texts: string[]): Promise<number[][]> => {
-    return this.getOpenAIEmbedding(texts);
-  };
-
-  /**
-   * Get embeddings for a single text
-   * @param texts
-   */
-  async getTextEmbedding(text: string): Promise<number[]> {
-    return (await this.getOpenAIEmbedding([text]))[0]!;
-  }
-}
+export * from "@llamaindex/openai";
diff --git a/packages/llamaindex/src/embeddings/fireworks.ts b/packages/llamaindex/src/embeddings/fireworks.ts
index a48039e1022e91c06561e08937a02b39bc3f5e7e..8338884c12955937b009a28c1d482d6799a2bfd7 100644
--- a/packages/llamaindex/src/embeddings/fireworks.ts
+++ b/packages/llamaindex/src/embeddings/fireworks.ts
@@ -1,5 +1,5 @@
 import { getEnv } from "@llamaindex/env";
-import { OpenAIEmbedding } from "./OpenAIEmbedding.js";
+import { OpenAIEmbedding } from "@llamaindex/openai";
 
 export class FireworksEmbedding extends OpenAIEmbedding {
   constructor(init?: Partial<OpenAIEmbedding>) {
diff --git a/packages/llamaindex/src/embeddings/together.ts b/packages/llamaindex/src/embeddings/together.ts
index b284daf6a701fe42000b3184dfc68c5f62d7e127..1ed43fef7f582bb557c9afe961fec41eb1c182b0 100644
--- a/packages/llamaindex/src/embeddings/together.ts
+++ b/packages/llamaindex/src/embeddings/together.ts
@@ -1,5 +1,5 @@
 import { getEnv } from "@llamaindex/env";
-import { OpenAIEmbedding } from "./OpenAIEmbedding.js";
+import { OpenAIEmbedding } from "@llamaindex/openai";
 
 export class TogetherEmbedding extends OpenAIEmbedding {
   constructor(init?: Partial<OpenAIEmbedding>) {
diff --git a/packages/llamaindex/src/extractors/MetadataExtractors.ts b/packages/llamaindex/src/extractors/MetadataExtractors.ts
index b7ca6c49f9c8fef157ef172b2c22ddaa6134d828..b1d0ce5a47412627745c0375ef86b1a0f30e27b9 100644
--- a/packages/llamaindex/src/extractors/MetadataExtractors.ts
+++ b/packages/llamaindex/src/extractors/MetadataExtractors.ts
@@ -1,7 +1,7 @@
 import type { LLM } from "@llamaindex/core/llms";
 import type { BaseNode } from "@llamaindex/core/schema";
 import { MetadataMode, TextNode } from "@llamaindex/core/schema";
-import { OpenAI } from "../llm/index.js";
+import { OpenAI } from "@llamaindex/openai";
 import {
   defaultKeywordExtractorPromptTemplate,
   defaultQuestionAnswerPromptTemplate,
diff --git a/packages/llamaindex/src/internal/settings/EmbedModel.ts b/packages/llamaindex/src/internal/settings/EmbedModel.ts
index fab2331a8702e0d077330258a5c18d404e2f743e..d912d20b9c4121891751f1317d347b4ca9ca9dfc 100644
--- a/packages/llamaindex/src/internal/settings/EmbedModel.ts
+++ b/packages/llamaindex/src/internal/settings/EmbedModel.ts
@@ -1,6 +1,6 @@
 import type { BaseEmbedding } from "@llamaindex/core/embeddings";
 import { AsyncLocalStorage } from "@llamaindex/env";
-import { OpenAIEmbedding } from "../../embeddings/OpenAIEmbedding.js";
+import { OpenAIEmbedding } from "@llamaindex/openai";
 
 const embeddedModelAsyncLocalStorage = new AsyncLocalStorage<BaseEmbedding>();
 let globalEmbeddedModel: BaseEmbedding | null = null;
diff --git a/packages/llamaindex/src/llm/deepinfra.ts b/packages/llamaindex/src/llm/deepinfra.ts
index 9e2d9e2f8b20958552c07b23d907cc256f112d41..c2c8bde814bd8ef1be6ae0a39abb6d2e53c3edca 100644
--- a/packages/llamaindex/src/llm/deepinfra.ts
+++ b/packages/llamaindex/src/llm/deepinfra.ts
@@ -1,5 +1,5 @@
 import { getEnv } from "@llamaindex/env";
-import { OpenAI } from "./openai.js";
+import { OpenAI } from "@llamaindex/openai";
 
 const ENV_VARIABLE_NAME = "DEEPINFRA_API_TOKEN";
 const DEFAULT_MODEL = "mistralai/Mixtral-8x22B-Instruct-v0.1";
diff --git a/packages/llamaindex/src/llm/deepseek.ts b/packages/llamaindex/src/llm/deepseek.ts
index 7c4f15466df2d81192197a61a96b33dcdc1d10b2..d8a2586838ab763e1a0f98a07f9746f274b407d7 100644
--- a/packages/llamaindex/src/llm/deepseek.ts
+++ b/packages/llamaindex/src/llm/deepseek.ts
@@ -1,5 +1,5 @@
 import { getEnv } from "@llamaindex/env";
-import { OpenAI } from "./openai.js";
+import { OpenAI } from "@llamaindex/openai";
 
 export const DEEPSEEK_MODELS = {
   "deepseek-coder": { contextWindow: 128000 },
diff --git a/packages/llamaindex/src/llm/fireworks.ts b/packages/llamaindex/src/llm/fireworks.ts
index 0385b62f25206b8eb2fa3d2cdcdef0115abbfd8d..3e5979f4d921708c01016ef4a24f190e8b7e5a84 100644
--- a/packages/llamaindex/src/llm/fireworks.ts
+++ b/packages/llamaindex/src/llm/fireworks.ts
@@ -1,5 +1,5 @@
 import { getEnv } from "@llamaindex/env";
-import { OpenAI } from "./openai.js";
+import { OpenAI } from "@llamaindex/openai";
 
 export class FireworksLLM extends OpenAI {
   constructor(init?: Partial<OpenAI>) {
diff --git a/packages/llamaindex/src/llm/groq.ts b/packages/llamaindex/src/llm/groq.ts
index 8f77a9389cebcc24c04c2e996f779b73fb6c88b0..5c058f923ed7847cadbf02654fc4a3c3ebea3a7b 100644
--- a/packages/llamaindex/src/llm/groq.ts
+++ b/packages/llamaindex/src/llm/groq.ts
@@ -1,6 +1,6 @@
 import { getEnv } from "@llamaindex/env";
+import { OpenAI } from "@llamaindex/openai";
 import GroqSDK, { type ClientOptions } from "groq-sdk";
-import { OpenAI } from "./openai.js";
 
 export class Groq extends OpenAI {
   constructor(
diff --git a/packages/llamaindex/src/llm/index.ts b/packages/llamaindex/src/llm/index.ts
index de4dbad211b0b58c4979935c6f76cb6bca3c76a9..722a006657d0c8672a79aebd01e4b56de9dd7014 100644
--- a/packages/llamaindex/src/llm/index.ts
+++ b/packages/llamaindex/src/llm/index.ts
@@ -11,7 +11,6 @@ export {
   GEMINI_MODEL,
   type GoogleGeminiSessionOptions,
 } from "./gemini/types.js";
-
 export { Groq } from "./groq.js";
 export { HuggingFaceInferenceAPI, HuggingFaceLLM } from "./huggingface.js";
 export {
diff --git a/packages/llamaindex/src/llm/openai.ts b/packages/llamaindex/src/llm/openai.ts
index 1a5b237a160d346465a05402611e79381e92cd2f..02781efcb42bde6e5598eb279af193c85ec9158f 100644
--- a/packages/llamaindex/src/llm/openai.ts
+++ b/packages/llamaindex/src/llm/openai.ts
@@ -1,502 +1 @@
-import { getEnv } from "@llamaindex/env";
-import _ from "lodash";
-import type OpenAILLM from "openai";
-import type {
-  ClientOptions,
-  ClientOptions as OpenAIClientOptions,
-} from "openai";
-import { AzureOpenAI, OpenAI as OrigOpenAI } from "openai";
-import type { ChatModel } from "openai/resources/chat/chat";
-
-import {
-  type BaseTool,
-  type ChatMessage,
-  type ChatResponse,
-  type ChatResponseChunk,
-  type LLM,
-  type LLMChatParamsNonStreaming,
-  type LLMChatParamsStreaming,
-  type LLMMetadata,
-  type MessageType,
-  type PartialToolCall,
-  ToolCallLLM,
-  type ToolCallLLMMessageOptions,
-} from "@llamaindex/core/llms";
-import {
-  extractText,
-  wrapEventCaller,
-  wrapLLMEvent,
-} from "@llamaindex/core/utils";
-import { Tokenizers } from "@llamaindex/env";
-import type {
-  ChatCompletionAssistantMessageParam,
-  ChatCompletionMessageToolCall,
-  ChatCompletionRole,
-  ChatCompletionSystemMessageParam,
-  ChatCompletionTool,
-  ChatCompletionToolMessageParam,
-  ChatCompletionUserMessageParam,
-} from "openai/resources/chat/completions";
-import type { ChatCompletionMessageParam } from "openai/resources/index.js";
-import type { AzureOpenAIConfig } from "./azure.js";
-import {
-  getAzureConfigFromEnv,
-  getAzureModel,
-  shouldUseAzure,
-} from "./azure.js";
-
-export class OpenAISession {
-  openai: Pick<OrigOpenAI, "chat" | "embeddings">;
-
-  constructor(options: ClientOptions & { azure?: boolean } = {}) {
-    if (options.azure) {
-      this.openai = new AzureOpenAI(options as AzureOpenAIConfig);
-    } else {
-      if (!options.apiKey) {
-        options.apiKey = getEnv("OPENAI_API_KEY");
-      }
-
-      if (!options.apiKey) {
-        throw new Error("Set OpenAI Key in OPENAI_API_KEY env variable"); // Overriding OpenAI package's error message
-      }
-
-      this.openai = new OrigOpenAI({
-        ...options,
-      });
-    }
-  }
-}
-
-// I'm not 100% sure this is necessary vs. just starting a new session
-// every time we make a call. They say they try to reuse connections
-// so in theory this is more efficient, but we should test it in the future.
-const defaultOpenAISession: {
-  session: OpenAISession;
-  options: ClientOptions;
-}[] = [];
-
-/**
- * Get a session for the OpenAI API. If one already exists with the same options,
- * it will be returned. Otherwise, a new session will be created.
- * @param options
- * @returns
- */
-export function getOpenAISession(
-  options: ClientOptions & { azure?: boolean } = {},
-) {
-  let session = defaultOpenAISession.find((session) => {
-    return _.isEqual(session.options, options);
-  })?.session;
-
-  if (!session) {
-    session = new OpenAISession(options);
-    defaultOpenAISession.push({ session, options });
-  }
-
-  return session;
-}
-
-export const GPT4_MODELS = {
-  "chatgpt-4o-latest": {
-    contextWindow: 128000,
-  },
-  "gpt-4": { contextWindow: 8192 },
-  "gpt-4-32k": { contextWindow: 32768 },
-  "gpt-4-32k-0613": { contextWindow: 32768 },
-  "gpt-4-turbo": { contextWindow: 128000 },
-  "gpt-4-turbo-preview": { contextWindow: 128000 },
-  "gpt-4-1106-preview": { contextWindow: 128000 },
-  "gpt-4-0125-preview": { contextWindow: 128000 },
-  "gpt-4-vision-preview": { contextWindow: 128000 },
-  "gpt-4o": { contextWindow: 128000 },
-  "gpt-4o-2024-05-13": { contextWindow: 128000 },
-  "gpt-4o-mini": { contextWindow: 128000 },
-  "gpt-4o-mini-2024-07-18": { contextWindow: 128000 },
-  "gpt-4o-2024-08-06": { contextWindow: 128000 },
-  "gpt-4o-2024-09-14": { contextWindow: 128000 },
-  "gpt-4o-2024-10-14": { contextWindow: 128000 },
-  "gpt-4-0613": { contextWindow: 128000 },
-  "gpt-4-turbo-2024-04-09": { contextWindow: 128000 },
-  "gpt-4-0314": { contextWindow: 128000 },
-  "gpt-4-32k-0314": { contextWindow: 32768 },
-};
-
-// NOTE we don't currently support gpt-3.5-turbo-instruct and don't plan to in the near future
-export const GPT35_MODELS = {
-  "gpt-3.5-turbo": { contextWindow: 16385 },
-  "gpt-3.5-turbo-0613": { contextWindow: 4096 },
-  "gpt-3.5-turbo-16k": { contextWindow: 16385 },
-  "gpt-3.5-turbo-16k-0613": { contextWindow: 16385 },
-  "gpt-3.5-turbo-1106": { contextWindow: 16385 },
-  "gpt-3.5-turbo-0125": { contextWindow: 16385 },
-  "gpt-3.5-turbo-0301": { contextWindow: 16385 },
-};
-
-export const O1_MODELS = {
-  "o1-preview": {
-    contextWindow: 128000,
-  },
-  "o1-preview-2024-09-12": {
-    contextWindow: 128000,
-  },
-  "o1-mini": {
-    contextWindow: 128000,
-  },
-  "o1-mini-2024-09-12": {
-    contextWindow: 128000,
-  },
-};
-
-/**
- * We currently support GPT-3.5 and GPT-4 models
- */
-export const ALL_AVAILABLE_OPENAI_MODELS = {
-  ...GPT4_MODELS,
-  ...GPT35_MODELS,
-  ...O1_MODELS,
-} satisfies Record<ChatModel, { contextWindow: number }>;
-
-export function isFunctionCallingModel(llm: LLM): llm is OpenAI {
-  let model: string;
-  if (llm instanceof OpenAI) {
-    model = llm.model;
-  } else if ("model" in llm && typeof llm.model === "string") {
-    model = llm.model;
-  } else {
-    return false;
-  }
-  const isChatModel = Object.keys(ALL_AVAILABLE_OPENAI_MODELS).includes(model);
-  const isOld = model.includes("0314") || model.includes("0301");
-  const isO1 = model.startsWith("o1");
-  return isChatModel && !isOld && !isO1;
-}
-
-export type OpenAIAdditionalMetadata = {};
-
-export type OpenAIAdditionalChatOptions = Omit<
-  Partial<OpenAILLM.Chat.ChatCompletionCreateParams>,
-  | "max_tokens"
-  | "messages"
-  | "model"
-  | "temperature"
-  | "top_p"
-  | "stream"
-  | "tools"
-  | "toolChoice"
->;
-
-export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
-  model:
-    | ChatModel
-    // string & {} is a hack to allow any string, but still give autocomplete
-    | (string & {});
-  temperature: number;
-  topP: number;
-  maxTokens?: number | undefined;
-  additionalChatOptions?: OpenAIAdditionalChatOptions | undefined;
-
-  // OpenAI session params
-  apiKey?: string | undefined = undefined;
-  maxRetries: number;
-  timeout?: number;
-  session: OpenAISession;
-  additionalSessionOptions?:
-    | undefined
-    | Omit<Partial<OpenAIClientOptions>, "apiKey" | "maxRetries" | "timeout">;
-
-  constructor(
-    init?: Partial<OpenAI> & {
-      azure?: AzureOpenAIConfig;
-    },
-  ) {
-    super();
-    this.model = init?.model ?? "gpt-4o";
-    this.temperature = init?.temperature ?? 0.1;
-    this.topP = init?.topP ?? 1;
-    this.maxTokens = init?.maxTokens ?? undefined;
-
-    this.maxRetries = init?.maxRetries ?? 10;
-    this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
-    this.additionalChatOptions = init?.additionalChatOptions;
-    this.additionalSessionOptions = init?.additionalSessionOptions;
-
-    if (init?.azure || shouldUseAzure()) {
-      const azureConfig = {
-        ...getAzureConfigFromEnv({
-          model: getAzureModel(this.model),
-        }),
-        ...init?.azure,
-      };
-
-      this.apiKey = azureConfig.apiKey;
-      this.session =
-        init?.session ??
-        getOpenAISession({
-          azure: true,
-          maxRetries: this.maxRetries,
-          timeout: this.timeout,
-          ...this.additionalSessionOptions,
-          ...azureConfig,
-        });
-    } else {
-      this.apiKey = init?.apiKey ?? undefined;
-      this.session =
-        init?.session ??
-        getOpenAISession({
-          apiKey: this.apiKey,
-          maxRetries: this.maxRetries,
-          timeout: this.timeout,
-          ...this.additionalSessionOptions,
-        });
-    }
-  }
-
-  get supportToolCall() {
-    return isFunctionCallingModel(this);
-  }
-
-  get metadata(): LLMMetadata & OpenAIAdditionalMetadata {
-    const contextWindow =
-      ALL_AVAILABLE_OPENAI_MODELS[
-        this.model as keyof typeof ALL_AVAILABLE_OPENAI_MODELS
-      ]?.contextWindow ?? 1024;
-    return {
-      model: this.model,
-      temperature: this.temperature,
-      topP: this.topP,
-      maxTokens: this.maxTokens,
-      contextWindow,
-      tokenizer: Tokenizers.CL100K_BASE,
-    };
-  }
-
-  static toOpenAIRole(messageType: MessageType): ChatCompletionRole {
-    switch (messageType) {
-      case "user":
-        return "user";
-      case "assistant":
-        return "assistant";
-      case "system":
-        return "system";
-      default:
-        return "user";
-    }
-  }
-
-  static toOpenAIMessage(
-    messages: ChatMessage<ToolCallLLMMessageOptions>[],
-  ): ChatCompletionMessageParam[] {
-    return messages.map((message) => {
-      const options = message.options ?? {};
-      if ("toolResult" in options) {
-        return {
-          tool_call_id: options.toolResult.id,
-          role: "tool",
-          content: extractText(message.content),
-        } satisfies ChatCompletionToolMessageParam;
-      } else if ("toolCall" in options) {
-        return {
-          role: "assistant",
-          content: extractText(message.content),
-          tool_calls: options.toolCall.map((toolCall) => {
-            return {
-              id: toolCall.id,
-              type: "function",
-              function: {
-                name: toolCall.name,
-                arguments:
-                  typeof toolCall.input === "string"
-                    ? toolCall.input
-                    : JSON.stringify(toolCall.input),
-              },
-            };
-          }),
-        } satisfies ChatCompletionAssistantMessageParam;
-      } else if (message.role === "user") {
-        return {
-          role: "user",
-          content: message.content,
-        } satisfies ChatCompletionUserMessageParam;
-      }
-
-      const response:
-        | ChatCompletionSystemMessageParam
-        | ChatCompletionUserMessageParam
-        | ChatCompletionMessageToolCall = {
-        // fixme(alex): type assertion
-        role: OpenAI.toOpenAIRole(message.role) as never,
-        // fixme: should not extract text, but assert content is string
-        content: extractText(message.content),
-      };
-      return response;
-    });
-  }
-
-  chat(
-    params: LLMChatParamsStreaming<
-      OpenAIAdditionalChatOptions,
-      ToolCallLLMMessageOptions
-    >,
-  ): Promise<AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>>;
-  chat(
-    params: LLMChatParamsNonStreaming<
-      OpenAIAdditionalChatOptions,
-      ToolCallLLMMessageOptions
-    >,
-  ): Promise<ChatResponse<ToolCallLLMMessageOptions>>;
-  @wrapEventCaller
-  @wrapLLMEvent
-  async chat(
-    params:
-      | LLMChatParamsNonStreaming<
-          OpenAIAdditionalChatOptions,
-          ToolCallLLMMessageOptions
-        >
-      | LLMChatParamsStreaming<
-          OpenAIAdditionalChatOptions,
-          ToolCallLLMMessageOptions
-        >,
-  ): Promise<
-    | ChatResponse<ToolCallLLMMessageOptions>
-    | AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>
-  > {
-    const { messages, stream, tools, additionalChatOptions } = params;
-    const baseRequestParams = <OpenAILLM.Chat.ChatCompletionCreateParams>{
-      model: this.model,
-      temperature: this.temperature,
-      max_tokens: this.maxTokens,
-      tools: tools?.map(OpenAI.toTool),
-      messages: OpenAI.toOpenAIMessage(messages),
-      top_p: this.topP,
-      ...Object.assign({}, this.additionalChatOptions, additionalChatOptions),
-    };
-
-    if (
-      Array.isArray(baseRequestParams.tools) &&
-      baseRequestParams.tools.length === 0
-    ) {
-      // remove empty tools array to avoid OpenAI error
-      delete baseRequestParams.tools;
-    }
-
-    // Streaming
-    if (stream) {
-      return this.streamChat(baseRequestParams);
-    }
-
-    // Non-streaming
-    const response = await this.session.openai.chat.completions.create({
-      ...baseRequestParams,
-      stream: false,
-    });
-
-    const content = response.choices[0]!.message?.content ?? "";
-
-    return {
-      raw: response,
-      message: {
-        content,
-        role: response.choices[0]!.message.role,
-        options: response.choices[0]!.message?.tool_calls
-          ? {
-              toolCall: response.choices[0]!.message.tool_calls.map(
-                (toolCall) => ({
-                  id: toolCall.id,
-                  name: toolCall.function.name,
-                  input: toolCall.function.arguments,
-                }),
-              ),
-            }
-          : {},
-      },
-    };
-  }
-
-  // todo: this wrapper is ugly, refactor it
-  @wrapEventCaller
-  protected async *streamChat(
-    baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams,
-  ): AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> {
-    const stream: AsyncIterable<OpenAILLM.Chat.ChatCompletionChunk> =
-      await this.session.openai.chat.completions.create({
-        ...baseRequestParams,
-        stream: true,
-      });
-
-    // TODO: add callback to streamConverter and use streamConverter here
-    // this will be used to keep track of the current tool call, make sure input are valid json object.
-    let currentToolCall: PartialToolCall | null = null;
-    const toolCallMap = new Map<string, PartialToolCall>();
-    for await (const part of stream) {
-      if (part.choices.length === 0) continue;
-      const choice = part.choices[0]!;
-      // skip parts that don't have any content
-      if (!(choice.delta.content || choice.delta.tool_calls)) continue;
-
-      let shouldEmitToolCall: PartialToolCall | null = null;
-      if (
-        choice.delta.tool_calls?.[0]!.id &&
-        currentToolCall &&
-        choice.delta.tool_calls?.[0].id !== currentToolCall.id
-      ) {
-        shouldEmitToolCall = {
-          ...currentToolCall,
-          input: JSON.parse(currentToolCall.input),
-        };
-      }
-      if (choice.delta.tool_calls?.[0]!.id) {
-        currentToolCall = {
-          name: choice.delta.tool_calls[0].function!.name!,
-          id: choice.delta.tool_calls[0].id,
-          input: choice.delta.tool_calls[0].function!.arguments!,
-        };
-        toolCallMap.set(choice.delta.tool_calls[0].id, currentToolCall);
-      } else {
-        if (choice.delta.tool_calls?.[0]!.function?.arguments) {
-          currentToolCall!.input +=
-            choice.delta.tool_calls[0].function.arguments;
-        }
-      }
-
-      const isDone: boolean = choice.finish_reason !== null;
-
-      if (isDone && currentToolCall) {
-        // for the last one, we need to emit the tool call
-        shouldEmitToolCall = {
-          ...currentToolCall,
-          input: JSON.parse(currentToolCall.input),
-        };
-      }
-
-      yield {
-        raw: part,
-        options: shouldEmitToolCall
-          ? { toolCall: [shouldEmitToolCall] }
-          : currentToolCall
-            ? {
-                toolCall: [currentToolCall],
-              }
-            : {},
-        delta: choice.delta.content ?? "",
-      };
-    }
-    toolCallMap.clear();
-    return;
-  }
-
-  static toTool(tool: BaseTool): ChatCompletionTool {
-    return {
-      type: "function",
-      function: tool.metadata.parameters
-        ? {
-            name: tool.metadata.name,
-            description: tool.metadata.description,
-            parameters: tool.metadata.parameters,
-          }
-        : {
-            name: tool.metadata.name,
-            description: tool.metadata.description,
-          },
-    };
-  }
-}
+export * from "@llamaindex/openai";
diff --git a/packages/llamaindex/src/llm/together.ts b/packages/llamaindex/src/llm/together.ts
index 3772b2be2bf08cd2b7735a1419d9e09684c1873c..4d314bcc0a84261580dac5502651dffd7d0f48d6 100644
--- a/packages/llamaindex/src/llm/together.ts
+++ b/packages/llamaindex/src/llm/together.ts
@@ -1,5 +1,5 @@
 import { getEnv } from "@llamaindex/env";
-import { OpenAI } from "./openai.js";
+import { OpenAI } from "@llamaindex/openai";
 
 export class TogetherLLM extends OpenAI {
   constructor(init?: Partial<OpenAI>) {
diff --git a/packages/llm/openai/package.json b/packages/llm/openai/package.json
new file mode 100644
index 0000000000000000000000000000000000000000..5fb001d0066a61176b754398848f0169fad628df
--- /dev/null
+++ b/packages/llm/openai/package.json
@@ -0,0 +1,42 @@
+{
+  "name": "@llamaindex/openai",
+  "description": "OpenAI Adapter for LlamaIndex",
+  "version": "0.1.0",
+  "type": "module",
+  "main": "./dist/index.cjs",
+  "module": "./dist/index.js",
+  "exports": {
+    ".": {
+      "require": {
+        "types": "./dist/index.d.cts",
+        "default": "./dist/index.cjs"
+      },
+      "import": {
+        "types": "./dist/index.d.ts",
+        "default": "./dist/index.js"
+      }
+    }
+  },
+  "files": [
+    "dist"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/run-llama/LlamaIndexTS.git",
+    "directory": "packages/llm/openai"
+  },
+  "private": true,
+  "scripts": {
+    "build": "bunchee",
+    "dev": "bunchee --watch"
+  },
+  "devDependencies": {
+    "bunchee": "5.3.2"
+  },
+  "dependencies": {
+    "@llamaindex/core": "workspace:*",
+    "@llamaindex/env": "workspace:*",
+    "openai": "^4.60.0",
+    "remeda": "^2.12.0"
+  }
+}
diff --git a/packages/llamaindex/src/llm/azure.ts b/packages/llm/openai/src/azure.ts
similarity index 100%
rename from packages/llamaindex/src/llm/azure.ts
rename to packages/llm/openai/src/azure.ts
diff --git a/packages/llm/openai/src/embedding.ts b/packages/llm/openai/src/embedding.ts
new file mode 100644
index 0000000000000000000000000000000000000000..e4b5024488e5cf1ec6a7a554a38211b6432f6d2c
--- /dev/null
+++ b/packages/llm/openai/src/embedding.ts
@@ -0,0 +1,152 @@
+import { BaseEmbedding } from "@llamaindex/core/embeddings";
+import { Tokenizers } from "@llamaindex/env";
+import type { ClientOptions as OpenAIClientOptions } from "openai";
+import type { AzureOpenAIConfig } from "./azure.js";
+import {
+  getAzureConfigFromEnv,
+  getAzureModel,
+  shouldUseAzure,
+} from "./azure.js";
+import type { OpenAISession } from "./llm.js";
+import { getOpenAISession } from "./llm.js";
+
+export const ALL_OPENAI_EMBEDDING_MODELS = {
+  "text-embedding-ada-002": {
+    dimensions: 1536,
+    maxTokens: 8192,
+    tokenizer: Tokenizers.CL100K_BASE,
+  },
+  "text-embedding-3-small": {
+    dimensions: 1536,
+    dimensionOptions: [512, 1536],
+    maxTokens: 8192,
+    tokenizer: Tokenizers.CL100K_BASE,
+  },
+  "text-embedding-3-large": {
+    dimensions: 3072,
+    dimensionOptions: [256, 1024, 3072],
+    maxTokens: 8192,
+    tokenizer: Tokenizers.CL100K_BASE,
+  },
+};
+
+type ModelKeys = keyof typeof ALL_OPENAI_EMBEDDING_MODELS;
+
+export class OpenAIEmbedding extends BaseEmbedding {
+  /** embeddding model. defaults to "text-embedding-ada-002" */
+  model: string;
+  /** number of dimensions of the resulting vector, for models that support choosing fewer dimensions. undefined will default to model default */
+  dimensions?: number | undefined;
+
+  // OpenAI session params
+
+  /** api key */
+  apiKey?: string | undefined = undefined;
+  /** maximum number of retries, default 10 */
+  maxRetries: number;
+  /** timeout in ms, default 60 seconds  */
+  timeout?: number | undefined;
+  /** other session options for OpenAI */
+  additionalSessionOptions?:
+    | Omit<Partial<OpenAIClientOptions>, "apiKey" | "maxRetries" | "timeout">
+    | undefined;
+
+  /** session object */
+  session: OpenAISession;
+
+  /**
+   * OpenAI Embedding
+   * @param init - initial parameters
+   */
+  constructor(init?: Partial<OpenAIEmbedding> & { azure?: AzureOpenAIConfig }) {
+    super();
+
+    this.model = init?.model ?? "text-embedding-ada-002";
+    this.dimensions = init?.dimensions; // if no dimensions provided, will be undefined/not sent to OpenAI
+
+    this.embedBatchSize = init?.embedBatchSize ?? 10;
+    this.maxRetries = init?.maxRetries ?? 10;
+
+    this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
+    this.additionalSessionOptions = init?.additionalSessionOptions;
+
+    // find metadata for model
+    const key = Object.keys(ALL_OPENAI_EMBEDDING_MODELS).find(
+      (key) => key === this.model,
+    ) as ModelKeys | undefined;
+    if (key) {
+      this.embedInfo = ALL_OPENAI_EMBEDDING_MODELS[key];
+    }
+
+    if (init?.azure || shouldUseAzure()) {
+      const azureConfig = {
+        ...getAzureConfigFromEnv({
+          model: getAzureModel(this.model),
+        }),
+        ...init?.azure,
+      };
+
+      this.apiKey = azureConfig.apiKey;
+      this.session =
+        init?.session ??
+        getOpenAISession({
+          azure: true,
+          maxRetries: this.maxRetries,
+          timeout: this.timeout,
+          ...this.additionalSessionOptions,
+          ...azureConfig,
+        });
+    } else {
+      this.apiKey = init?.apiKey ?? undefined;
+      this.session =
+        init?.session ??
+        getOpenAISession({
+          apiKey: this.apiKey,
+          maxRetries: this.maxRetries,
+          timeout: this.timeout,
+          ...this.additionalSessionOptions,
+        });
+    }
+  }
+
+  /**
+   * Get embeddings for a batch of texts
+   * @param texts
+   * @param options
+   */
+  private async getOpenAIEmbedding(input: string[]): Promise<number[][]> {
+    // TODO: ensure this for every sub class by calling it in the base class
+    input = this.truncateMaxTokens(input);
+
+    const { data } = await this.session.openai.embeddings.create(
+      this.dimensions
+        ? {
+            model: this.model,
+            dimensions: this.dimensions, // only sent to OpenAI if set by user
+            input,
+          }
+        : {
+            model: this.model,
+            input,
+          },
+    );
+
+    return data.map((d) => d.embedding);
+  }
+
+  /**
+   * Get embeddings for a batch of texts
+   * @param texts
+   */
+  getTextEmbeddings = async (texts: string[]): Promise<number[][]> => {
+    return this.getOpenAIEmbedding(texts);
+  };
+
+  /**
+   * Get embeddings for a single text
+   * @param texts
+   */
+  async getTextEmbedding(text: string): Promise<number[]> {
+    return (await this.getOpenAIEmbedding([text]))[0]!;
+  }
+}
diff --git a/packages/llm/openai/src/index.ts b/packages/llm/openai/src/index.ts
new file mode 100644
index 0000000000000000000000000000000000000000..f1e663639c19225d50f1142545b3ac92f75488bf
--- /dev/null
+++ b/packages/llm/openai/src/index.ts
@@ -0,0 +1,13 @@
+export { ALL_OPENAI_EMBEDDING_MODELS, OpenAIEmbedding } from "./embedding";
+export {
+  ALL_AVAILABLE_OPENAI_MODELS,
+  GPT35_MODELS,
+  GPT4_MODELS,
+  O1_MODELS,
+  OpenAI,
+  OpenAISession,
+  type OpenAIAdditionalChatOptions,
+  type OpenAIAdditionalMetadata,
+} from "./llm";
+
+export { type AzureOpenAIConfig } from "./azure";
diff --git a/packages/llm/openai/src/llm.ts b/packages/llm/openai/src/llm.ts
new file mode 100644
index 0000000000000000000000000000000000000000..58d7aa1d22d710450ef3b282c94fd959bc883a9a
--- /dev/null
+++ b/packages/llm/openai/src/llm.ts
@@ -0,0 +1,502 @@
+import { getEnv } from "@llamaindex/env";
+import type OpenAILLM from "openai";
+import type {
+  ClientOptions,
+  ClientOptions as OpenAIClientOptions,
+} from "openai";
+import { AzureOpenAI, OpenAI as OrigOpenAI } from "openai";
+import type { ChatModel } from "openai/resources/chat/chat";
+import { isDeepEqual } from "remeda";
+
+import {
+  type BaseTool,
+  type ChatMessage,
+  type ChatResponse,
+  type ChatResponseChunk,
+  type LLM,
+  type LLMChatParamsNonStreaming,
+  type LLMChatParamsStreaming,
+  type LLMMetadata,
+  type MessageType,
+  type PartialToolCall,
+  ToolCallLLM,
+  type ToolCallLLMMessageOptions,
+} from "@llamaindex/core/llms";
+import {
+  extractText,
+  wrapEventCaller,
+  wrapLLMEvent,
+} from "@llamaindex/core/utils";
+import { Tokenizers } from "@llamaindex/env";
+import type {
+  ChatCompletionAssistantMessageParam,
+  ChatCompletionMessageToolCall,
+  ChatCompletionRole,
+  ChatCompletionSystemMessageParam,
+  ChatCompletionTool,
+  ChatCompletionToolMessageParam,
+  ChatCompletionUserMessageParam,
+} from "openai/resources/chat/completions";
+import type { ChatCompletionMessageParam } from "openai/resources/index.js";
+import type { AzureOpenAIConfig } from "./azure.js";
+import {
+  getAzureConfigFromEnv,
+  getAzureModel,
+  shouldUseAzure,
+} from "./azure.js";
+
+export class OpenAISession {
+  openai: Pick<OrigOpenAI, "chat" | "embeddings">;
+
+  constructor(options: ClientOptions & { azure?: boolean } = {}) {
+    if (options.azure) {
+      this.openai = new AzureOpenAI(options as AzureOpenAIConfig);
+    } else {
+      if (!options.apiKey) {
+        options.apiKey = getEnv("OPENAI_API_KEY");
+      }
+
+      if (!options.apiKey) {
+        throw new Error("Set OpenAI Key in OPENAI_API_KEY env variable"); // Overriding OpenAI package's error message
+      }
+
+      this.openai = new OrigOpenAI({
+        ...options,
+      });
+    }
+  }
+}
+
+// I'm not 100% sure this is necessary vs. just starting a new session
+// every time we make a call. They say they try to reuse connections
+// so in theory this is more efficient, but we should test it in the future.
+const defaultOpenAISession: {
+  session: OpenAISession;
+  options: ClientOptions;
+}[] = [];
+
+/**
+ * Get a session for the OpenAI API. If one already exists with the same options,
+ * it will be returned. Otherwise, a new session will be created.
+ * @param options
+ * @returns
+ */
+export function getOpenAISession(
+  options: ClientOptions & { azure?: boolean } = {},
+) {
+  let session = defaultOpenAISession.find((session) => {
+    return isDeepEqual(session.options, options);
+  })?.session;
+
+  if (!session) {
+    session = new OpenAISession(options);
+    defaultOpenAISession.push({ session, options });
+  }
+
+  return session;
+}
+
+export const GPT4_MODELS = {
+  "chatgpt-4o-latest": {
+    contextWindow: 128000,
+  },
+  "gpt-4": { contextWindow: 8192 },
+  "gpt-4-32k": { contextWindow: 32768 },
+  "gpt-4-32k-0613": { contextWindow: 32768 },
+  "gpt-4-turbo": { contextWindow: 128000 },
+  "gpt-4-turbo-preview": { contextWindow: 128000 },
+  "gpt-4-1106-preview": { contextWindow: 128000 },
+  "gpt-4-0125-preview": { contextWindow: 128000 },
+  "gpt-4-vision-preview": { contextWindow: 128000 },
+  "gpt-4o": { contextWindow: 128000 },
+  "gpt-4o-2024-05-13": { contextWindow: 128000 },
+  "gpt-4o-mini": { contextWindow: 128000 },
+  "gpt-4o-mini-2024-07-18": { contextWindow: 128000 },
+  "gpt-4o-2024-08-06": { contextWindow: 128000 },
+  "gpt-4o-2024-09-14": { contextWindow: 128000 },
+  "gpt-4o-2024-10-14": { contextWindow: 128000 },
+  "gpt-4-0613": { contextWindow: 128000 },
+  "gpt-4-turbo-2024-04-09": { contextWindow: 128000 },
+  "gpt-4-0314": { contextWindow: 128000 },
+  "gpt-4-32k-0314": { contextWindow: 32768 },
+};
+
+// NOTE we don't currently support gpt-3.5-turbo-instruct and don't plan to in the near future
+export const GPT35_MODELS = {
+  "gpt-3.5-turbo": { contextWindow: 16385 },
+  "gpt-3.5-turbo-0613": { contextWindow: 4096 },
+  "gpt-3.5-turbo-16k": { contextWindow: 16385 },
+  "gpt-3.5-turbo-16k-0613": { contextWindow: 16385 },
+  "gpt-3.5-turbo-1106": { contextWindow: 16385 },
+  "gpt-3.5-turbo-0125": { contextWindow: 16385 },
+  "gpt-3.5-turbo-0301": { contextWindow: 16385 },
+};
+
+export const O1_MODELS = {
+  "o1-preview": {
+    contextWindow: 128000,
+  },
+  "o1-preview-2024-09-12": {
+    contextWindow: 128000,
+  },
+  "o1-mini": {
+    contextWindow: 128000,
+  },
+  "o1-mini-2024-09-12": {
+    contextWindow: 128000,
+  },
+};
+
+/**
+ * We currently support GPT-3.5 and GPT-4 models
+ */
+export const ALL_AVAILABLE_OPENAI_MODELS = {
+  ...GPT4_MODELS,
+  ...GPT35_MODELS,
+  ...O1_MODELS,
+} satisfies Record<ChatModel, { contextWindow: number }>;
+
+export function isFunctionCallingModel(llm: LLM): llm is OpenAI {
+  let model: string;
+  if (llm instanceof OpenAI) {
+    model = llm.model;
+  } else if ("model" in llm && typeof llm.model === "string") {
+    model = llm.model;
+  } else {
+    return false;
+  }
+  const isChatModel = Object.keys(ALL_AVAILABLE_OPENAI_MODELS).includes(model);
+  const isOld = model.includes("0314") || model.includes("0301");
+  const isO1 = model.startsWith("o1");
+  return isChatModel && !isOld && !isO1;
+}
+
+export type OpenAIAdditionalMetadata = {};
+
+export type OpenAIAdditionalChatOptions = Omit<
+  Partial<OpenAILLM.Chat.ChatCompletionCreateParams>,
+  | "max_tokens"
+  | "messages"
+  | "model"
+  | "temperature"
+  | "top_p"
+  | "stream"
+  | "tools"
+  | "toolChoice"
+>;
+
+export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
+  model:
+    | ChatModel
+    // string & {} is a hack to allow any string, but still give autocomplete
+    | (string & {});
+  temperature: number;
+  topP: number;
+  maxTokens?: number | undefined;
+  additionalChatOptions?: OpenAIAdditionalChatOptions | undefined;
+
+  // OpenAI session params
+  apiKey?: string | undefined = undefined;
+  maxRetries: number;
+  timeout?: number;
+  session: OpenAISession;
+  additionalSessionOptions?:
+    | undefined
+    | Omit<Partial<OpenAIClientOptions>, "apiKey" | "maxRetries" | "timeout">;
+
+  constructor(
+    init?: Partial<OpenAI> & {
+      azure?: AzureOpenAIConfig;
+    },
+  ) {
+    super();
+    this.model = init?.model ?? "gpt-4o";
+    this.temperature = init?.temperature ?? 0.1;
+    this.topP = init?.topP ?? 1;
+    this.maxTokens = init?.maxTokens ?? undefined;
+
+    this.maxRetries = init?.maxRetries ?? 10;
+    this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
+    this.additionalChatOptions = init?.additionalChatOptions;
+    this.additionalSessionOptions = init?.additionalSessionOptions;
+
+    if (init?.azure || shouldUseAzure()) {
+      const azureConfig = {
+        ...getAzureConfigFromEnv({
+          model: getAzureModel(this.model),
+        }),
+        ...init?.azure,
+      };
+
+      this.apiKey = azureConfig.apiKey;
+      this.session =
+        init?.session ??
+        getOpenAISession({
+          azure: true,
+          maxRetries: this.maxRetries,
+          timeout: this.timeout,
+          ...this.additionalSessionOptions,
+          ...azureConfig,
+        });
+    } else {
+      this.apiKey = init?.apiKey ?? undefined;
+      this.session =
+        init?.session ??
+        getOpenAISession({
+          apiKey: this.apiKey,
+          maxRetries: this.maxRetries,
+          timeout: this.timeout,
+          ...this.additionalSessionOptions,
+        });
+    }
+  }
+
+  get supportToolCall() {
+    return isFunctionCallingModel(this);
+  }
+
+  get metadata(): LLMMetadata & OpenAIAdditionalMetadata {
+    const contextWindow =
+      ALL_AVAILABLE_OPENAI_MODELS[
+        this.model as keyof typeof ALL_AVAILABLE_OPENAI_MODELS
+      ]?.contextWindow ?? 1024;
+    return {
+      model: this.model,
+      temperature: this.temperature,
+      topP: this.topP,
+      maxTokens: this.maxTokens,
+      contextWindow,
+      tokenizer: Tokenizers.CL100K_BASE,
+    };
+  }
+
+  static toOpenAIRole(messageType: MessageType): ChatCompletionRole {
+    switch (messageType) {
+      case "user":
+        return "user";
+      case "assistant":
+        return "assistant";
+      case "system":
+        return "system";
+      default:
+        return "user";
+    }
+  }
+
+  static toOpenAIMessage(
+    messages: ChatMessage<ToolCallLLMMessageOptions>[],
+  ): ChatCompletionMessageParam[] {
+    return messages.map((message) => {
+      const options = message.options ?? {};
+      if ("toolResult" in options) {
+        return {
+          tool_call_id: options.toolResult.id,
+          role: "tool",
+          content: extractText(message.content),
+        } satisfies ChatCompletionToolMessageParam;
+      } else if ("toolCall" in options) {
+        return {
+          role: "assistant",
+          content: extractText(message.content),
+          tool_calls: options.toolCall.map((toolCall) => {
+            return {
+              id: toolCall.id,
+              type: "function",
+              function: {
+                name: toolCall.name,
+                arguments:
+                  typeof toolCall.input === "string"
+                    ? toolCall.input
+                    : JSON.stringify(toolCall.input),
+              },
+            };
+          }),
+        } satisfies ChatCompletionAssistantMessageParam;
+      } else if (message.role === "user") {
+        return {
+          role: "user",
+          content: message.content,
+        } satisfies ChatCompletionUserMessageParam;
+      }
+
+      const response:
+        | ChatCompletionSystemMessageParam
+        | ChatCompletionUserMessageParam
+        | ChatCompletionMessageToolCall = {
+        // fixme(alex): type assertion
+        role: OpenAI.toOpenAIRole(message.role) as never,
+        // fixme: should not extract text, but assert content is string
+        content: extractText(message.content),
+      };
+      return response;
+    });
+  }
+
+  chat(
+    params: LLMChatParamsStreaming<
+      OpenAIAdditionalChatOptions,
+      ToolCallLLMMessageOptions
+    >,
+  ): Promise<AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>>;
+  chat(
+    params: LLMChatParamsNonStreaming<
+      OpenAIAdditionalChatOptions,
+      ToolCallLLMMessageOptions
+    >,
+  ): Promise<ChatResponse<ToolCallLLMMessageOptions>>;
+  @wrapEventCaller
+  @wrapLLMEvent
+  async chat(
+    params:
+      | LLMChatParamsNonStreaming<
+          OpenAIAdditionalChatOptions,
+          ToolCallLLMMessageOptions
+        >
+      | LLMChatParamsStreaming<
+          OpenAIAdditionalChatOptions,
+          ToolCallLLMMessageOptions
+        >,
+  ): Promise<
+    | ChatResponse<ToolCallLLMMessageOptions>
+    | AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>
+  > {
+    const { messages, stream, tools, additionalChatOptions } = params;
+    const baseRequestParams = <OpenAILLM.Chat.ChatCompletionCreateParams>{
+      model: this.model,
+      temperature: this.temperature,
+      max_tokens: this.maxTokens,
+      tools: tools?.map(OpenAI.toTool),
+      messages: OpenAI.toOpenAIMessage(messages),
+      top_p: this.topP,
+      ...Object.assign({}, this.additionalChatOptions, additionalChatOptions),
+    };
+
+    if (
+      Array.isArray(baseRequestParams.tools) &&
+      baseRequestParams.tools.length === 0
+    ) {
+      // remove empty tools array to avoid OpenAI error
+      delete baseRequestParams.tools;
+    }
+
+    // Streaming
+    if (stream) {
+      return this.streamChat(baseRequestParams);
+    }
+
+    // Non-streaming
+    const response = await this.session.openai.chat.completions.create({
+      ...baseRequestParams,
+      stream: false,
+    });
+
+    const content = response.choices[0]!.message?.content ?? "";
+
+    return {
+      raw: response,
+      message: {
+        content,
+        role: response.choices[0]!.message.role,
+        options: response.choices[0]!.message?.tool_calls
+          ? {
+              toolCall: response.choices[0]!.message.tool_calls.map(
+                (toolCall) => ({
+                  id: toolCall.id,
+                  name: toolCall.function.name,
+                  input: toolCall.function.arguments,
+                }),
+              ),
+            }
+          : {},
+      },
+    };
+  }
+
+  // todo: this wrapper is ugly, refactor it
+  @wrapEventCaller
+  protected async *streamChat(
+    baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams,
+  ): AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> {
+    const stream: AsyncIterable<OpenAILLM.Chat.ChatCompletionChunk> =
+      await this.session.openai.chat.completions.create({
+        ...baseRequestParams,
+        stream: true,
+      });
+
+    // TODO: add callback to streamConverter and use streamConverter here
+    // this will be used to keep track of the current tool call, make sure input are valid json object.
+    let currentToolCall: PartialToolCall | null = null;
+    const toolCallMap = new Map<string, PartialToolCall>();
+    for await (const part of stream) {
+      if (part.choices.length === 0) continue;
+      const choice = part.choices[0]!;
+      // skip parts that don't have any content
+      if (!(choice.delta.content || choice.delta.tool_calls)) continue;
+
+      let shouldEmitToolCall: PartialToolCall | null = null;
+      if (
+        choice.delta.tool_calls?.[0]!.id &&
+        currentToolCall &&
+        choice.delta.tool_calls?.[0].id !== currentToolCall.id
+      ) {
+        shouldEmitToolCall = {
+          ...currentToolCall,
+          input: JSON.parse(currentToolCall.input),
+        };
+      }
+      if (choice.delta.tool_calls?.[0]!.id) {
+        currentToolCall = {
+          name: choice.delta.tool_calls[0].function!.name!,
+          id: choice.delta.tool_calls[0].id,
+          input: choice.delta.tool_calls[0].function!.arguments!,
+        };
+        toolCallMap.set(choice.delta.tool_calls[0].id, currentToolCall);
+      } else {
+        if (choice.delta.tool_calls?.[0]!.function?.arguments) {
+          currentToolCall!.input +=
+            choice.delta.tool_calls[0].function.arguments;
+        }
+      }
+
+      const isDone: boolean = choice.finish_reason !== null;
+
+      if (isDone && currentToolCall) {
+        // for the last one, we need to emit the tool call
+        shouldEmitToolCall = {
+          ...currentToolCall,
+          input: JSON.parse(currentToolCall.input),
+        };
+      }
+
+      yield {
+        raw: part,
+        options: shouldEmitToolCall
+          ? { toolCall: [shouldEmitToolCall] }
+          : currentToolCall
+            ? {
+                toolCall: [currentToolCall],
+              }
+            : {},
+        delta: choice.delta.content ?? "",
+      };
+    }
+    toolCallMap.clear();
+    return;
+  }
+
+  static toTool(tool: BaseTool): ChatCompletionTool {
+    return {
+      type: "function",
+      function: tool.metadata.parameters
+        ? {
+            name: tool.metadata.name,
+            description: tool.metadata.description,
+            parameters: tool.metadata.parameters,
+          }
+        : {
+            name: tool.metadata.name,
+            description: tool.metadata.description,
+          },
+    };
+  }
+}
diff --git a/packages/llm/openai/tsconfig.json b/packages/llm/openai/tsconfig.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a94aa033116766429d5c2d42f337c5b1393ba86
--- /dev/null
+++ b/packages/llm/openai/tsconfig.json
@@ -0,0 +1,18 @@
+{
+  "extends": "../../../tsconfig.json",
+  "compilerOptions": {
+    "target": "ESNext",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "outDir": "./lib"
+  },
+  "include": ["./src"],
+  "references": [
+    {
+      "path": "../../llamaindex/tsconfig.json"
+    },
+    {
+      "path": "../../env/tsconfig.json"
+    }
+  ]
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index c7ed08d7392b6c652f19cfdfa4996fb516f08a10..8186577fe5ea7585961ecaf302f3701a95746da2 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -516,6 +516,9 @@ importers:
       '@llamaindex/env':
         specifier: workspace:*
         version: link:../env
+      '@llamaindex/openai':
+        specifier: workspace:*
+        version: link:../llm/openai
       '@mistralai/mistralai':
         specifier: ^1.0.4
         version: 1.0.4(zod@3.23.8)
@@ -652,9 +655,6 @@ importers:
       '@faker-js/faker':
         specifier: ^8.4.1
         version: 8.4.1
-      '@llamaindex/core':
-        specifier: workspace:*
-        version: link:../../core
       '@types/node':
         specifier: ^22.5.1
         version: 22.5.1
@@ -853,6 +853,25 @@ importers:
         specifier: ^2.0.5
         version: 2.0.5(@types/node@22.5.1)(terser@5.31.6)
 
+  packages/llm/openai:
+    dependencies:
+      '@llamaindex/core':
+        specifier: workspace:*
+        version: link:../../core
+      '@llamaindex/env':
+        specifier: workspace:*
+        version: link:../../env
+      openai:
+        specifier: ^4.60.0
+        version: 4.60.0(encoding@0.1.13)(zod@3.23.8)
+      remeda:
+        specifier: ^2.12.0
+        version: 2.12.0
+    devDependencies:
+      bunchee:
+        specifier: 5.3.2
+        version: 5.3.2(typescript@5.5.4)
+
   packages/wasm-tools:
     dependencies:
       '@assemblyscript/loader':
@@ -9472,6 +9491,9 @@ packages:
   remark-stringify@11.0.0:
     resolution: {integrity: sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw==}
 
+  remeda@2.12.0:
+    resolution: {integrity: sha512-VAlyhh1os8boCA9/7yN9sXzo0tfCeOwScGXztwBspS0DXQmbIN8xTBfEABvbAW8rMJMPzqxQ1UymHquuESh/pg==}
+
   renderkid@3.0.0:
     resolution: {integrity: sha512-q/7VIQA8lmM1hF+jn+sFSPWGlMkSAeNYcPLmDQx2zzuiDfaLrOmumR8iaUKlenFgh0XRPIUeSPlH3A+AW3Z5pg==}
 
@@ -10456,6 +10478,10 @@ packages:
     resolution: {integrity: sha512-RAH822pAdBgcNMAfWnCBU3CFZcfZ/i1eZjwFU/dsLKumyuuP3niueg2UAukXYF0E2AAoc82ZSSf9J0WQBinzHA==}
     engines: {node: '>=12.20'}
 
+  type-fest@4.26.1:
+    resolution: {integrity: sha512-yOGpmOAL7CkKe/91I5O3gPICmJNLJ1G4zFYVAsRHg7M64biSnPtRj0WNQt++bRkjYOqjWXrhnUw1utzmVErAdg==}
+    engines: {node: '>=16'}
+
   type-is@1.6.18:
     resolution: {integrity: sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==}
     engines: {node: '>= 0.6'}
@@ -14763,7 +14789,7 @@ snapshots:
 
   '@smithy/is-array-buffer@2.2.0':
     dependencies:
-      tslib: 2.6.3
+      tslib: 2.7.0
 
   '@smithy/is-array-buffer@3.0.0':
     dependencies:
@@ -22166,6 +22192,10 @@ snapshots:
       mdast-util-to-markdown: 2.1.0
       unified: 11.0.5
 
+  remeda@2.12.0:
+    dependencies:
+      type-fest: 4.26.1
+
   renderkid@3.0.0:
     dependencies:
       css-select: 4.3.0
@@ -23263,6 +23293,8 @@ snapshots:
 
   type-fest@2.19.0: {}
 
+  type-fest@4.26.1: {}
+
   type-is@1.6.18:
     dependencies:
       media-typer: 0.3.0
diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml
index 79bfed33e0bbc99608bb4c778da032e07c8b5b4d..78e2bf350152e5bf4ec6356c5c525bd84fd26f79 100644
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -1,6 +1,7 @@
 packages:
   - "apps/*"
   - "packages/*"
+  - "packages/llm/*"
   - "packages/core/tests"
   - "packages/llamaindex/tests"
   - "packages/llamaindex/e2e"
diff --git a/tsconfig.json b/tsconfig.json
index 3a11d68c083c49212e2dd620109e5ac9f4e8b0a6..11288b4933d01f94ba7bc12cf5766135c0079814 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -26,6 +26,9 @@
     {
       "path": "./packages/community/tsconfig.json"
     },
+    {
+      "path": "./packages/llm/openai/tsconfig.json"
+    },
     {
       "path": "./packages/cloud/tsconfig.json"
     },