diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6a0cb7271f2c8e5d83af19d9937b87c967aee2e3..a13b0d6f26a0f9f792ed7e79a2a8a75bff5fd0f5 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -151,6 +151,9 @@ jobs:
       - name: Pack @llamaindex/groq
         run: pnpm pack --pack-destination ${{ runner.temp }}
         working-directory: packages/llm/groq
+      - name: Pack @llamaindex/ollama
+        run: pnpm pack --pack-destination ${{ runner.temp }}
+        working-directory: packages/llm/ollama
       - name: Pack @llamaindex/core
         run: pnpm pack --pack-destination ${{ runner.temp }}
         working-directory: packages/core
diff --git a/packages/llamaindex/package.json b/packages/llamaindex/package.json
index 201d6c6327991397c77145e8a1844e1cd2210887..d90b61321249c52f0b6bacf353edf2c0528585e3 100644
--- a/packages/llamaindex/package.json
+++ b/packages/llamaindex/package.json
@@ -34,6 +34,7 @@
     "@llamaindex/core": "workspace:*",
     "@llamaindex/env": "workspace:*",
     "@llamaindex/groq": "workspace:*",
+    "@llamaindex/ollama": "workspace:*",
     "@llamaindex/openai": "workspace:*",
     "@mistralai/mistralai": "^1.0.4",
     "@mixedbread-ai/sdk": "^2.2.11",
diff --git a/packages/llamaindex/src/embeddings/OllamaEmbedding.ts b/packages/llamaindex/src/embeddings/OllamaEmbedding.ts
index f6323c1495566453a43a23df661aef76d5a50773..2bd40a48eeaa17652ef2d5a46e15fc3e0aa9b2b1 100644
--- a/packages/llamaindex/src/embeddings/OllamaEmbedding.ts
+++ b/packages/llamaindex/src/embeddings/OllamaEmbedding.ts
@@ -1,5 +1,5 @@
 import type { BaseEmbedding } from "@llamaindex/core/embeddings";
-import { Ollama } from "../llm/ollama.js";
+import { Ollama } from "@llamaindex/ollama";
 
 /**
  * OllamaEmbedding is an alias for Ollama that implements the BaseEmbedding interface.
diff --git a/packages/llamaindex/src/internal/deps/ollama.d.ts b/packages/llamaindex/src/internal/deps/ollama.d.ts
deleted file mode 100644
index 39e6218f7c9c8e33073b2ceae6f00fe7eb484882..0000000000000000000000000000000000000000
--- a/packages/llamaindex/src/internal/deps/ollama.d.ts
+++ /dev/null
@@ -1,264 +0,0 @@
-type Fetch = typeof fetch;
-interface Config {
-  host: string;
-  fetch?: Fetch;
-  proxy?: boolean;
-}
-interface Options {
-  numa: boolean;
-  num_ctx: number;
-  num_batch: number;
-  main_gpu: number;
-  low_vram: boolean;
-  f16_kv: boolean;
-  logits_all: boolean;
-  vocab_only: boolean;
-  use_mmap: boolean;
-  use_mlock: boolean;
-  embedding_only: boolean;
-  num_thread: number;
-  num_keep: number;
-  seed: number;
-  num_predict: number;
-  top_k: number;
-  top_p: number;
-  tfs_z: number;
-  typical_p: number;
-  repeat_last_n: number;
-  temperature: number;
-  repeat_penalty: number;
-  presence_penalty: number;
-  frequency_penalty: number;
-  mirostat: number;
-  mirostat_tau: number;
-  mirostat_eta: number;
-  penalize_newline: boolean;
-  stop: string[];
-}
-interface GenerateRequest {
-  model: string;
-  prompt: string;
-  system?: string;
-  template?: string;
-  context?: number[];
-  stream?: boolean;
-  raw?: boolean;
-  format?: string;
-  images?: Uint8Array[] | string[];
-  keep_alive?: string | number;
-  options?: Partial<Options>;
-}
-interface Message {
-  role: string;
-  content: string;
-  images?: Uint8Array[] | string[];
-}
-interface ChatRequest {
-  model: string;
-  messages?: Message[];
-  stream?: boolean;
-  format?: string;
-  keep_alive?: string | number;
-  options?: Partial<Options>;
-}
-interface PullRequest {
-  model: string;
-  insecure?: boolean;
-  stream?: boolean;
-}
-interface PushRequest {
-  model: string;
-  insecure?: boolean;
-  stream?: boolean;
-}
-interface CreateRequest {
-  model: string;
-  path?: string;
-  modelfile?: string;
-  stream?: boolean;
-}
-interface DeleteRequest {
-  model: string;
-}
-interface CopyRequest {
-  source: string;
-  destination: string;
-}
-interface ShowRequest {
-  model: string;
-  system?: string;
-  template?: string;
-  options?: Partial<Options>;
-}
-interface EmbeddingsRequest {
-  model: string;
-  prompt: string;
-  keep_alive?: string | number;
-  options?: Partial<Options>;
-}
-interface GenerateResponse {
-  model: string;
-  created_at: Date;
-  response: string;
-  done: boolean;
-  context: number[];
-  total_duration: number;
-  load_duration: number;
-  prompt_eval_count: number;
-  prompt_eval_duration: number;
-  eval_count: number;
-  eval_duration: number;
-}
-interface ChatResponse {
-  model: string;
-  created_at: Date;
-  message: Message;
-  done: boolean;
-  total_duration: number;
-  load_duration: number;
-  prompt_eval_count: number;
-  prompt_eval_duration: number;
-  eval_count: number;
-  eval_duration: number;
-}
-interface EmbeddingsResponse {
-  embedding: number[];
-}
-interface ProgressResponse {
-  status: string;
-  digest: string;
-  total: number;
-  completed: number;
-}
-interface ModelResponse {
-  name: string;
-  modified_at: Date;
-  size: number;
-  digest: string;
-  details: ModelDetails;
-}
-interface ModelDetails {
-  parent_model: string;
-  format: string;
-  family: string;
-  families: string[];
-  parameter_size: string;
-  quantization_level: string;
-}
-interface ShowResponse {
-  license: string;
-  modelfile: string;
-  parameters: string;
-  template: string;
-  system: string;
-  details: ModelDetails;
-  messages: Message[];
-}
-interface ListResponse {
-  models: ModelResponse[];
-}
-interface ErrorResponse {
-  error: string;
-}
-interface StatusResponse {
-  status: string;
-}
-
-declare class Ollama {
-  protected readonly config: Config;
-  protected readonly fetch: Fetch;
-  private abortController;
-  constructor(config?: Partial<Config>);
-  abort(): void;
-  protected processStreamableRequest<T extends object>(
-    endpoint: string,
-    request: {
-      stream?: boolean;
-    } & Record<string, any>,
-  ): Promise<T | AsyncGenerator<T>>;
-  encodeImage(image: Uint8Array | string): Promise<string>;
-  generate(
-    request: GenerateRequest & {
-      stream: true;
-    },
-  ): Promise<AsyncGenerator<GenerateResponse>>;
-  generate(
-    request: GenerateRequest & {
-      stream?: false;
-    },
-  ): Promise<GenerateResponse>;
-  chat(
-    request: ChatRequest & {
-      stream: true;
-    },
-  ): Promise<AsyncGenerator<ChatResponse>>;
-  chat(
-    request: ChatRequest & {
-      stream?: false;
-    },
-  ): Promise<ChatResponse>;
-  create(
-    request: CreateRequest & {
-      stream: true;
-    },
-  ): Promise<AsyncGenerator<ProgressResponse>>;
-  create(
-    request: CreateRequest & {
-      stream?: false;
-    },
-  ): Promise<ProgressResponse>;
-  pull(
-    request: PullRequest & {
-      stream: true;
-    },
-  ): Promise<AsyncGenerator<ProgressResponse>>;
-  pull(
-    request: PullRequest & {
-      stream?: false;
-    },
-  ): Promise<ProgressResponse>;
-  push(
-    request: PushRequest & {
-      stream: true;
-    },
-  ): Promise<AsyncGenerator<ProgressResponse>>;
-  push(
-    request: PushRequest & {
-      stream?: false;
-    },
-  ): Promise<ProgressResponse>;
-  delete(request: DeleteRequest): Promise<StatusResponse>;
-  copy(request: CopyRequest): Promise<StatusResponse>;
-  list(): Promise<ListResponse>;
-  show(request: ShowRequest): Promise<ShowResponse>;
-  embeddings(request: EmbeddingsRequest): Promise<EmbeddingsResponse>;
-}
-declare const _default: Ollama;
-
-export {
-  Ollama,
-  _default as default,
-  type ChatRequest,
-  type ChatResponse,
-  type Config,
-  type CopyRequest,
-  type CreateRequest,
-  type DeleteRequest,
-  type EmbeddingsRequest,
-  type EmbeddingsResponse,
-  type ErrorResponse,
-  type Fetch,
-  type GenerateRequest,
-  type GenerateResponse,
-  type ListResponse,
-  type Message,
-  type ModelDetails,
-  type ModelResponse,
-  type Options,
-  type ProgressResponse,
-  type PullRequest,
-  type PushRequest,
-  type ShowRequest,
-  type ShowResponse,
-  type StatusResponse,
-};
diff --git a/packages/llamaindex/src/internal/deps/ollama.js b/packages/llamaindex/src/internal/deps/ollama.js
deleted file mode 100644
index db189e10a1cc3196352378efe1f2b7f753aa6ab7..0000000000000000000000000000000000000000
--- a/packages/llamaindex/src/internal/deps/ollama.js
+++ /dev/null
@@ -1,462 +0,0 @@
-// generate from "tsup ./src/browser.js --format esm --dts"
-var __defProp = Object.defineProperty;
-var __getOwnPropSymbols = Object.getOwnPropertySymbols;
-var __hasOwnProp = Object.prototype.hasOwnProperty;
-var __propIsEnum = Object.prototype.propertyIsEnumerable;
-var __knownSymbol = (name, symbol) => {
-  return (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
-};
-var __defNormalProp = (obj, key, value) =>
-  key in obj
-    ? __defProp(obj, key, {
-        enumerable: true,
-        configurable: true,
-        writable: true,
-        value,
-      })
-    : (obj[key] = value);
-var __spreadValues = (a, b) => {
-  for (var prop in b || (b = {}))
-    if (__hasOwnProp.call(b, prop)) __defNormalProp(a, prop, b[prop]);
-  if (__getOwnPropSymbols)
-    for (var prop of __getOwnPropSymbols(b)) {
-      if (__propIsEnum.call(b, prop)) __defNormalProp(a, prop, b[prop]);
-    }
-  return a;
-};
-var __async = (__this, __arguments, generator) => {
-  return new Promise((resolve, reject) => {
-    var fulfilled = (value) => {
-      try {
-        step(generator.next(value));
-      } catch (e) {
-        reject(e);
-      }
-    };
-    var rejected = (value) => {
-      try {
-        step(generator.throw(value));
-      } catch (e) {
-        reject(e);
-      }
-    };
-    var step = (x) =>
-      x.done
-        ? resolve(x.value)
-        : Promise.resolve(x.value).then(fulfilled, rejected);
-    step((generator = generator.apply(__this, __arguments)).next());
-  });
-};
-var __await = function (promise, isYieldStar) {
-  this[0] = promise;
-  this[1] = isYieldStar;
-};
-var __asyncGenerator = (__this, __arguments, generator) => {
-  var resume = (k, v, yes, no) => {
-    try {
-      var x = generator[k](v),
-        isAwait = (v = x.value) instanceof __await,
-        done = x.done;
-      Promise.resolve(isAwait ? v[0] : v)
-        .then((y) =>
-          isAwait
-            ? resume(
-                k === "return" ? k : "next",
-                v[1] ? { done: y.done, value: y.value } : y,
-                yes,
-                no,
-              )
-            : yes({ value: y, done }),
-        )
-        .catch((e) => resume("throw", e, yes, no));
-    } catch (e) {
-      no(e);
-    }
-  };
-  var method = (k) =>
-    (it[k] = (x) => new Promise((yes, no) => resume(k, x, yes, no)));
-  var it = {};
-  return (
-    (generator = generator.apply(__this, __arguments)),
-    (it[__knownSymbol("asyncIterator")] = () => it),
-    method("next"),
-    method("throw"),
-    method("return"),
-    it
-  );
-};
-var __forAwait = (obj, it, method) =>
-  (it = obj[__knownSymbol("asyncIterator")])
-    ? it.call(obj)
-    : ((obj = obj[__knownSymbol("iterator")]()),
-      (it = {}),
-      (method = (key, fn) =>
-        (fn = obj[key]) &&
-        (it[key] = (arg) =>
-          new Promise(
-            (yes, no, done) => (
-              (arg = fn.call(obj, arg)),
-              (done = arg.done),
-              Promise.resolve(arg.value).then(
-                (value) => yes({ value, done }),
-                no,
-              )
-            ),
-          ))),
-      method("next"),
-      method("return"),
-      it);
-
-// src/version.ts
-var version = "0.0.0";
-
-// src/utils.ts
-var ResponseError = class _ResponseError extends Error {
-  constructor(error, status_code) {
-    super(error);
-    this.error = error;
-    this.status_code = status_code;
-    this.name = "ResponseError";
-    if (Error.captureStackTrace) {
-      Error.captureStackTrace(this, _ResponseError);
-    }
-  }
-};
-var checkOk = (response) =>
-  __async(void 0, null, function* () {
-    var _a;
-    if (!response.ok) {
-      let message = `Error ${response.status}: ${response.statusText}`;
-      let errorData = null;
-      if (
-        (_a = response.headers.get("content-type")) == null
-          ? void 0
-          : _a.includes("application/json")
-      ) {
-        try {
-          errorData = yield response.json();
-          message = errorData.error || message;
-        } catch (error) {
-          console.log("Failed to parse error response as JSON");
-        }
-      } else {
-        try {
-          console.log("Getting text from response");
-          const textResponse = yield response.text();
-          message = textResponse || message;
-        } catch (error) {
-          console.log("Failed to get text from error response");
-        }
-      }
-      throw new ResponseError(message, response.status);
-    }
-  });
-function getPlatform() {
-  if (typeof window !== "undefined" && window.navigator) {
-    return `${window.navigator.platform.toLowerCase()} Browser/${navigator.userAgent};`;
-  } else if (typeof process !== "undefined") {
-    return `${process.arch} ${process.platform} Node.js/${process.version}`;
-  }
-  return "";
-}
-var fetchWithHeaders = (_0, _1, ..._2) =>
-  __async(void 0, [_0, _1, ..._2], function* (fetch2, url, options = {}) {
-    const defaultHeaders = {
-      "Content-Type": "application/json",
-      Accept: "application/json",
-      "User-Agent": `ollama-js/${version} (${getPlatform()})`,
-    };
-    if (!options.headers) {
-      options.headers = {};
-    }
-    options.headers = __spreadValues(
-      __spreadValues({}, defaultHeaders),
-      options.headers,
-    );
-    return fetch2(url, options);
-  });
-var get = (fetch2, host) =>
-  __async(void 0, null, function* () {
-    const response = yield fetchWithHeaders(fetch2, host);
-    yield checkOk(response);
-    return response;
-  });
-var post = (fetch2, host, data, options) =>
-  __async(void 0, null, function* () {
-    const isRecord = (input) => {
-      return (
-        input !== null && typeof input === "object" && !Array.isArray(input)
-      );
-    };
-    const formattedData = isRecord(data) ? JSON.stringify(data) : data;
-    const response = yield fetchWithHeaders(fetch2, host, {
-      method: "POST",
-      body: formattedData,
-      signal: options == null ? void 0 : options.signal,
-    });
-    yield checkOk(response);
-    return response;
-  });
-var del = (fetch2, host, data) =>
-  __async(void 0, null, function* () {
-    const response = yield fetchWithHeaders(fetch2, host, {
-      method: "DELETE",
-      body: JSON.stringify(data),
-    });
-    yield checkOk(response);
-    return response;
-  });
-var parseJSON = function (itr) {
-  return __asyncGenerator(this, null, function* () {
-    var _a;
-    const decoder = new TextDecoder("utf-8");
-    let buffer = "";
-    const reader = itr.getReader();
-    while (true) {
-      const { done, value: chunk } = yield new __await(reader.read());
-      if (done) {
-        break;
-      }
-      buffer += decoder.decode(chunk);
-      const parts = buffer.split("\n");
-      buffer = (_a = parts.pop()) != null ? _a : "";
-      for (const part of parts) {
-        try {
-          yield JSON.parse(part);
-        } catch (error) {
-          console.warn("invalid json: ", part);
-        }
-      }
-    }
-    for (const part of buffer.split("\n").filter((p) => p !== "")) {
-      try {
-        yield JSON.parse(part);
-      } catch (error) {
-        console.warn("invalid json: ", part);
-      }
-    }
-  });
-};
-var formatHost = (host) => {
-  if (!host) {
-    return "http://127.0.0.1:11434";
-  }
-  let isExplicitProtocol = host.includes("://");
-  if (host.startsWith(":")) {
-    host = `http://127.0.0.1${host}`;
-    isExplicitProtocol = false;
-  }
-  if (!isExplicitProtocol) {
-    host = `http://${host}`;
-  }
-  const url = new URL(host);
-  let port = url.port;
-  if (!port) {
-    if (!isExplicitProtocol) {
-      port = "11434";
-    } else {
-      port = url.protocol === "https:" ? "443" : "80";
-    }
-  }
-  let formattedHost = `${url.protocol}//${url.hostname}:${port}${url.pathname}`;
-  if (formattedHost.endsWith("/")) {
-    formattedHost = formattedHost.slice(0, -1);
-  }
-  return formattedHost;
-};
-
-// src/browser.ts
-// import "whatwg-fetch";
-var Ollama = class {
-  constructor(config) {
-    var _a;
-    this.config = {
-      host: "",
-    };
-    if (!(config == null ? void 0 : config.proxy)) {
-      this.config.host = formatHost(
-        (_a = config == null ? void 0 : config.host) != null
-          ? _a
-          : "http://127.0.0.1:11434",
-      );
-    }
-    this.fetch = fetch;
-    if ((config == null ? void 0 : config.fetch) != null) {
-      this.fetch = config.fetch;
-    }
-    this.abortController = new AbortController();
-  }
-  // Abort any ongoing requests to Ollama
-  abort() {
-    this.abortController.abort();
-    this.abortController = new AbortController();
-  }
-  processStreamableRequest(endpoint, request) {
-    return __async(this, null, function* () {
-      var _a;
-      request.stream = (_a = request.stream) != null ? _a : false;
-      const response = yield post(
-        this.fetch,
-        `${this.config.host}/api/${endpoint}`,
-        __spreadValues({}, request),
-        { signal: this.abortController.signal },
-      );
-      if (!response.body) {
-        throw new Error("Missing body");
-      }
-      const itr = parseJSON(response.body);
-      if (request.stream) {
-        return (function () {
-          return __asyncGenerator(this, null, function* () {
-            try {
-              for (
-                var iter = __forAwait(itr), more, temp, error;
-                (more = !(temp = yield new __await(iter.next())).done);
-                more = false
-              ) {
-                const message = temp.value;
-                if ("error" in message) {
-                  throw new Error(message.error);
-                }
-                yield message;
-                if (message.done || message.status === "success") {
-                  return;
-                }
-              }
-            } catch (temp) {
-              error = [temp];
-            } finally {
-              try {
-                more &&
-                  (temp = iter.return) &&
-                  (yield new __await(temp.call(iter)));
-              } finally {
-                if (error) throw error[0];
-              }
-            }
-            throw new Error(
-              "Did not receive done or success response in stream.",
-            );
-          });
-        })();
-      } else {
-        const message = yield itr.next();
-        if (!message.value.done && message.value.status !== "success") {
-          throw new Error("Expected a completed response.");
-        }
-        return message.value;
-      }
-    });
-  }
-  encodeImage(image) {
-    return __async(this, null, function* () {
-      if (typeof image !== "string") {
-        const uint8Array = new Uint8Array(image);
-        const numberArray = Array.from(uint8Array);
-        const base64String = btoa(String.fromCharCode.apply(null, numberArray));
-        return base64String;
-      }
-      return image;
-    });
-  }
-  generate(request) {
-    return __async(this, null, function* () {
-      if (request.images) {
-        request.images = yield Promise.all(
-          request.images.map(this.encodeImage.bind(this)),
-        );
-      }
-      return this.processStreamableRequest("generate", request);
-    });
-  }
-  chat(request) {
-    return __async(this, null, function* () {
-      if (request.messages) {
-        for (const message of request.messages) {
-          if (message.images) {
-            message.images = yield Promise.all(
-              message.images.map(this.encodeImage.bind(this)),
-            );
-          }
-        }
-      }
-      return this.processStreamableRequest("chat", request);
-    });
-  }
-  create(request) {
-    return __async(this, null, function* () {
-      return this.processStreamableRequest("create", {
-        name: request.model,
-        stream: request.stream,
-        modelfile: request.modelfile,
-      });
-    });
-  }
-  pull(request) {
-    return __async(this, null, function* () {
-      return this.processStreamableRequest("pull", {
-        name: request.model,
-        stream: request.stream,
-        insecure: request.insecure,
-      });
-    });
-  }
-  push(request) {
-    return __async(this, null, function* () {
-      return this.processStreamableRequest("push", {
-        name: request.model,
-        stream: request.stream,
-        insecure: request.insecure,
-      });
-    });
-  }
-  delete(request) {
-    return __async(this, null, function* () {
-      yield del(this.fetch, `${this.config.host}/api/delete`, {
-        name: request.model,
-      });
-      return { status: "success" };
-    });
-  }
-  copy(request) {
-    return __async(this, null, function* () {
-      yield post(
-        this.fetch,
-        `${this.config.host}/api/copy`,
-        __spreadValues({}, request),
-      );
-      return { status: "success" };
-    });
-  }
-  list() {
-    return __async(this, null, function* () {
-      const response = yield get(this.fetch, `${this.config.host}/api/tags`);
-      const listResponse = yield response.json();
-      return listResponse;
-    });
-  }
-  show(request) {
-    return __async(this, null, function* () {
-      const response = yield post(
-        this.fetch,
-        `${this.config.host}/api/show`,
-        __spreadValues({}, request),
-      );
-      const showResponse = yield response.json();
-      return showResponse;
-    });
-  }
-  embeddings(request) {
-    return __async(this, null, function* () {
-      const response = yield post(
-        this.fetch,
-        `${this.config.host}/api/embeddings`,
-        __spreadValues({}, request),
-      );
-      const embeddingsResponse = yield response.json();
-      return embeddingsResponse;
-    });
-  }
-};
-var browser_default = new Ollama();
-export { Ollama, browser_default as default };
diff --git a/packages/llamaindex/src/internal/deps/ollama.license b/packages/llamaindex/src/internal/deps/ollama.license
deleted file mode 100644
index 49bd8b185208cba029f60faf87388482ab647775..0000000000000000000000000000000000000000
--- a/packages/llamaindex/src/internal/deps/ollama.license
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2023 Saul
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/packages/llamaindex/src/llm/index.ts b/packages/llamaindex/src/llm/index.ts
index 61698abcb587e633ab194121d0f94d2d7b85e676..5596440a0ce24099adeb6d692f4299e67c5855e8 100644
--- a/packages/llamaindex/src/llm/index.ts
+++ b/packages/llamaindex/src/llm/index.ts
@@ -23,7 +23,7 @@ export { Portkey } from "./portkey.js";
 export * from "./replicate_ai.js";
 // Note: The type aliases for replicate are to simplify usage for Llama 2 (we're using replicate for Llama 2 support)
 export { DeepInfra } from "./deepinfra.js";
-export { Ollama, type OllamaParams } from "./ollama.js";
+export * from "./ollama.js";
 export {
   ALL_AVAILABLE_REPLICATE_MODELS,
   DeuceChatStrategy,
diff --git a/packages/llamaindex/src/llm/ollama.ts b/packages/llamaindex/src/llm/ollama.ts
index 1fea1b7d0467dcb86e7dcc97d532d3ab0e8edd27..0047c5bc9ee5ba43ce94887c74e81791c0600f24 100644
--- a/packages/llamaindex/src/llm/ollama.ts
+++ b/packages/llamaindex/src/llm/ollama.ts
@@ -1,253 +1 @@
-import { BaseEmbedding } from "@llamaindex/core/embeddings";
-import type {
-  ChatResponse,
-  ChatResponseChunk,
-  CompletionResponse,
-  LLM,
-  LLMChatParamsNonStreaming,
-  LLMChatParamsStreaming,
-  LLMCompletionParamsNonStreaming,
-  LLMCompletionParamsStreaming,
-  LLMMetadata,
-} from "@llamaindex/core/llms";
-import { extractText, streamConverter } from "@llamaindex/core/utils";
-import {
-  Ollama as OllamaBase,
-  type Config,
-  type CopyRequest,
-  type CreateRequest,
-  type DeleteRequest,
-  type EmbeddingsRequest,
-  type EmbeddingsResponse,
-  type GenerateRequest,
-  type ListResponse,
-  type ChatResponse as OllamaChatResponse,
-  type GenerateResponse as OllamaGenerateResponse,
-  type Options,
-  type ProgressResponse,
-  type PullRequest,
-  type PushRequest,
-  type ShowRequest,
-  type ShowResponse,
-  type StatusResponse,
-} from "../internal/deps/ollama.js";
-
-const messageAccessor = (part: OllamaChatResponse): ChatResponseChunk => {
-  return {
-    raw: part,
-    delta: part.message.content,
-  };
-};
-
-const completionAccessor = (
-  part: OllamaGenerateResponse,
-): CompletionResponse => {
-  return { text: part.response, raw: part };
-};
-
-export type OllamaParams = {
-  model: string;
-  config?: Partial<Config>;
-  options?: Partial<Options>;
-};
-
-/**
- * This class both implements the LLM and Embedding interfaces.
- */
-export class Ollama
-  extends BaseEmbedding
-  implements LLM, Omit<OllamaBase, "chat">
-{
-  readonly hasStreaming = true;
-
-  ollama: OllamaBase;
-
-  // https://ollama.ai/library
-  model: string;
-
-  options: Partial<Omit<Options, "num_ctx" | "top_p" | "temperature">> &
-    Pick<Options, "num_ctx" | "top_p" | "temperature"> = {
-    num_ctx: 4096,
-    top_p: 0.9,
-    temperature: 0.7,
-  };
-
-  constructor(params: OllamaParams) {
-    super();
-    this.model = params.model;
-    this.ollama = new OllamaBase(params.config);
-    if (params.options) {
-      this.options = {
-        ...this.options,
-        ...params.options,
-      };
-    }
-  }
-
-  get metadata(): LLMMetadata {
-    const { temperature, top_p, num_ctx } = this.options;
-    return {
-      model: this.model,
-      temperature: temperature,
-      topP: top_p,
-      maxTokens: undefined,
-      contextWindow: num_ctx,
-      tokenizer: undefined,
-    };
-  }
-
-  chat(
-    params: LLMChatParamsStreaming,
-  ): Promise<AsyncIterable<ChatResponseChunk>>;
-  chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
-  async chat(
-    params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
-  ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
-    const { messages, stream } = params;
-    const payload = {
-      model: this.model,
-      messages: messages.map((message) => ({
-        role: message.role,
-        content: extractText(message.content),
-      })),
-      stream: !!stream,
-      options: {
-        ...this.options,
-      },
-    };
-    if (!stream) {
-      const chatResponse = await this.ollama.chat({
-        ...payload,
-        stream: false,
-      });
-
-      return {
-        message: {
-          role: "assistant",
-          content: chatResponse.message.content,
-        },
-        raw: chatResponse,
-      };
-    } else {
-      const stream = await this.ollama.chat({
-        ...payload,
-        stream: true,
-      });
-      return streamConverter(stream, messageAccessor);
-    }
-  }
-
-  complete(
-    params: LLMCompletionParamsStreaming,
-  ): Promise<AsyncIterable<CompletionResponse>>;
-  complete(
-    params: LLMCompletionParamsNonStreaming,
-  ): Promise<CompletionResponse>;
-  async complete(
-    params: LLMCompletionParamsStreaming | LLMCompletionParamsNonStreaming,
-  ): Promise<CompletionResponse | AsyncIterable<CompletionResponse>> {
-    const { prompt, stream } = params;
-    const payload = {
-      model: this.model,
-      prompt: extractText(prompt),
-      stream: !!stream,
-      options: {
-        ...this.options,
-      },
-    };
-    if (!stream) {
-      const response = await this.ollama.generate({
-        ...payload,
-        stream: false,
-      });
-      return {
-        text: response.response,
-        raw: response,
-      };
-    } else {
-      const stream = await this.ollama.generate({
-        ...payload,
-        stream: true,
-      });
-      return streamConverter(stream, completionAccessor);
-    }
-  }
-
-  private async getEmbedding(prompt: string): Promise<number[]> {
-    const payload = {
-      model: this.model,
-      prompt,
-      options: {
-        ...this.options,
-      },
-    };
-    const response = await this.ollama.embeddings({
-      ...payload,
-    });
-    return response.embedding;
-  }
-
-  async getTextEmbedding(text: string): Promise<number[]> {
-    return this.getEmbedding(text);
-  }
-
-  // Inherited from OllamaBase
-
-  push(
-    request: PushRequest & { stream: true },
-  ): Promise<AsyncGenerator<ProgressResponse, any, unknown>>;
-  push(
-    request: PushRequest & { stream?: false | undefined },
-  ): Promise<ProgressResponse>;
-  push(request: any): any {
-    return this.ollama.push(request);
-  }
-  abort(): void {
-    return this.ollama.abort();
-  }
-  encodeImage(image: string | Uint8Array): Promise<string> {
-    return this.ollama.encodeImage(image);
-  }
-  generate(
-    request: GenerateRequest & { stream: true },
-  ): Promise<AsyncGenerator<OllamaGenerateResponse>>;
-  generate(
-    request: GenerateRequest & { stream?: false | undefined },
-  ): Promise<OllamaGenerateResponse>;
-  generate(request: any): any {
-    return this.ollama.generate(request);
-  }
-  create(
-    request: CreateRequest & { stream: true },
-  ): Promise<AsyncGenerator<ProgressResponse>>;
-  create(
-    request: CreateRequest & { stream?: false | undefined },
-  ): Promise<ProgressResponse>;
-  create(request: any): any {
-    return this.ollama.create(request);
-  }
-  pull(
-    request: PullRequest & { stream: true },
-  ): Promise<AsyncGenerator<ProgressResponse>>;
-  pull(
-    request: PullRequest & { stream?: false | undefined },
-  ): Promise<ProgressResponse>;
-  pull(request: any): any {
-    return this.ollama.pull(request);
-  }
-  delete(request: DeleteRequest): Promise<StatusResponse> {
-    return this.ollama.delete(request);
-  }
-  copy(request: CopyRequest): Promise<StatusResponse> {
-    return this.ollama.copy(request);
-  }
-  list(): Promise<ListResponse> {
-    return this.ollama.list();
-  }
-  show(request: ShowRequest): Promise<ShowResponse> {
-    return this.ollama.show(request);
-  }
-  embeddings(request: EmbeddingsRequest): Promise<EmbeddingsResponse> {
-    return this.ollama.embeddings(request);
-  }
-}
+export { Ollama, type OllamaParams } from "@llamaindex/ollama";
diff --git a/packages/llm/ollama/package.json b/packages/llm/ollama/package.json
new file mode 100644
index 0000000000000000000000000000000000000000..40bf722b93dc3afdbf507c652bbe46b1966d3bb5
--- /dev/null
+++ b/packages/llm/ollama/package.json
@@ -0,0 +1,41 @@
+{
+  "name": "@llamaindex/ollama",
+  "description": "Ollama Adapter for LlamaIndex",
+  "version": "0.0.1",
+  "type": "module",
+  "main": "./dist/index.cjs",
+  "module": "./dist/index.js",
+  "exports": {
+    ".": {
+      "require": {
+        "types": "./dist/index.d.cts",
+        "default": "./dist/index.cjs"
+      },
+      "import": {
+        "types": "./dist/index.d.ts",
+        "default": "./dist/index.js"
+      }
+    }
+  },
+  "files": [
+    "dist"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/run-llama/LlamaIndexTS.git",
+    "directory": "packages/llm/openai"
+  },
+  "scripts": {
+    "build": "bunchee",
+    "dev": "bunchee --watch"
+  },
+  "devDependencies": {
+    "bunchee": "5.3.2"
+  },
+  "dependencies": {
+    "@llamaindex/core": "workspace:*",
+    "@llamaindex/env": "workspace:*",
+    "ollama": "^0.5.9",
+    "remeda": "^2.12.0"
+  }
+}
diff --git a/packages/llm/ollama/src/index.ts b/packages/llm/ollama/src/index.ts
new file mode 100644
index 0000000000000000000000000000000000000000..1bdcd4b81af422e56ac4fe09ce13a1fc1faead57
--- /dev/null
+++ b/packages/llm/ollama/src/index.ts
@@ -0,0 +1,172 @@
+import { BaseEmbedding } from "@llamaindex/core/embeddings";
+import type {
+  ChatResponse,
+  ChatResponseChunk,
+  CompletionResponse,
+  LLM,
+  LLMChatParamsNonStreaming,
+  LLMChatParamsStreaming,
+  LLMCompletionParamsNonStreaming,
+  LLMCompletionParamsStreaming,
+  LLMMetadata,
+} from "@llamaindex/core/llms";
+import { extractText, streamConverter } from "@llamaindex/core/utils";
+import {
+  Ollama as OllamaBase,
+  type Config,
+  type ChatResponse as OllamaChatResponse,
+  type GenerateResponse as OllamaGenerateResponse,
+  type Options,
+} from "ollama/browser";
+
+const messageAccessor = (part: OllamaChatResponse): ChatResponseChunk => {
+  return {
+    raw: part,
+    delta: part.message.content,
+  };
+};
+
+const completionAccessor = (
+  part: OllamaGenerateResponse,
+): CompletionResponse => {
+  return { text: part.response, raw: part };
+};
+
+export type OllamaParams = {
+  model: string;
+  config?: Partial<Config>;
+  options?: Partial<Options>;
+};
+
+export class Ollama extends BaseEmbedding implements LLM {
+  public readonly ollama: OllamaBase;
+
+  // https://ollama.ai/library
+  model: string;
+
+  options: Partial<Omit<Options, "num_ctx" | "top_p" | "temperature">> &
+    Pick<Options, "num_ctx" | "top_p" | "temperature"> = {
+    num_ctx: 4096,
+    top_p: 0.9,
+    temperature: 0.7,
+  };
+
+  constructor(params: OllamaParams) {
+    super();
+    this.model = params.model;
+    this.ollama = new OllamaBase(params.config);
+    if (params.options) {
+      this.options = {
+        ...this.options,
+        ...params.options,
+      };
+    }
+  }
+
+  get metadata(): LLMMetadata {
+    const { temperature, top_p, num_ctx } = this.options;
+    return {
+      model: this.model,
+      temperature: temperature,
+      topP: top_p,
+      maxTokens: this.options.num_ctx,
+      contextWindow: num_ctx,
+      tokenizer: undefined,
+    };
+  }
+
+  chat(
+    params: LLMChatParamsStreaming,
+  ): Promise<AsyncIterable<ChatResponseChunk>>;
+  chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
+  async chat(
+    params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
+  ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
+    const { messages, stream } = params;
+    const payload = {
+      model: this.model,
+      messages: messages.map((message) => ({
+        role: message.role,
+        content: extractText(message.content),
+      })),
+      stream: !!stream,
+      options: {
+        ...this.options,
+      },
+    };
+    if (!stream) {
+      const chatResponse = await this.ollama.chat({
+        ...payload,
+        stream: false,
+      });
+
+      return {
+        message: {
+          role: "assistant",
+          content: chatResponse.message.content,
+        },
+        raw: chatResponse,
+      };
+    } else {
+      const stream = await this.ollama.chat({
+        ...payload,
+        stream: true,
+      });
+      return streamConverter(stream, messageAccessor);
+    }
+  }
+
+  complete(
+    params: LLMCompletionParamsStreaming,
+  ): Promise<AsyncIterable<CompletionResponse>>;
+  complete(
+    params: LLMCompletionParamsNonStreaming,
+  ): Promise<CompletionResponse>;
+  async complete(
+    params: LLMCompletionParamsStreaming | LLMCompletionParamsNonStreaming,
+  ): Promise<CompletionResponse | AsyncIterable<CompletionResponse>> {
+    const { prompt, stream } = params;
+    const payload = {
+      model: this.model,
+      prompt: extractText(prompt),
+      stream: !!stream,
+      options: {
+        ...this.options,
+      },
+    };
+    if (!stream) {
+      const response = await this.ollama.generate({
+        ...payload,
+        stream: false,
+      });
+      return {
+        text: response.response,
+        raw: response,
+      };
+    } else {
+      const stream = await this.ollama.generate({
+        ...payload,
+        stream: true,
+      });
+      return streamConverter(stream, completionAccessor);
+    }
+  }
+
+  private async getEmbedding(prompt: string): Promise<number[]> {
+    const payload = {
+      model: this.model,
+      prompt,
+      options: {
+        ...this.options,
+      },
+    };
+    const response = await this.ollama.embeddings({
+      ...payload,
+    });
+    return response.embedding;
+  }
+
+  async getTextEmbedding(text: string): Promise<number[]> {
+    return this.getEmbedding(text);
+  }
+}
diff --git a/packages/llm/ollama/tsconfig.json b/packages/llm/ollama/tsconfig.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a94aa033116766429d5c2d42f337c5b1393ba86
--- /dev/null
+++ b/packages/llm/ollama/tsconfig.json
@@ -0,0 +1,18 @@
+{
+  "extends": "../../../tsconfig.json",
+  "compilerOptions": {
+    "target": "ESNext",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "outDir": "./lib"
+  },
+  "include": ["./src"],
+  "references": [
+    {
+      "path": "../../llamaindex/tsconfig.json"
+    },
+    {
+      "path": "../../env/tsconfig.json"
+    }
+  ]
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 056bf0720ccad02eee799760773cef272908e50b..e3d2abce4fadcdf35abc4a0ee9443708cbec7165 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -541,6 +541,9 @@ importers:
       '@llamaindex/groq':
         specifier: workspace:*
         version: link:../llm/groq
+      '@llamaindex/ollama':
+        specifier: workspace:*
+        version: link:../llm/ollama
       '@llamaindex/openai':
         specifier: workspace:*
         version: link:../llm/openai
@@ -922,6 +925,25 @@ importers:
         specifier: 5.3.2
         version: 5.3.2(typescript@5.6.2)
 
+  packages/llm/ollama:
+    dependencies:
+      '@llamaindex/core':
+        specifier: workspace:*
+        version: link:../../core
+      '@llamaindex/env':
+        specifier: workspace:*
+        version: link:../../env
+      ollama:
+        specifier: ^0.5.9
+        version: 0.5.9
+      remeda:
+        specifier: ^2.12.0
+        version: 2.12.0
+    devDependencies:
+      bunchee:
+        specifier: 5.3.2
+        version: 5.3.2(typescript@5.6.2)
+
   packages/llm/openai:
     dependencies:
       '@llamaindex/core':
@@ -8885,6 +8907,9 @@ packages:
   ohash@1.1.3:
     resolution: {integrity: sha512-zuHHiGTYTA1sYJ/wZN+t5HKZaH23i4yI1HMwbuXm24Nid7Dv0KcuRlKoNKS9UNfAVSBlnGLcuQrnOKWOZoEGaw==}
 
+  ollama@0.5.9:
+    resolution: {integrity: sha512-F/KZuDRC+ZsVCuMvcOYuQ6zj42/idzCkkuknGyyGVmNStMZ/sU3jQpvhnl4SyC0+zBzLiKNZJnJeuPFuieWZvQ==}
+
   on-finished@2.4.1:
     resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
     engines: {node: '>= 0.8'}
@@ -22795,6 +22820,10 @@ snapshots:
 
   ohash@1.1.3: {}
 
+  ollama@0.5.9:
+    dependencies:
+      whatwg-fetch: 3.6.20
+
   on-finished@2.4.1:
     dependencies:
       ee-first: 1.1.1
diff --git a/tsconfig.json b/tsconfig.json
index c6db3c526a2195073d43276818c7d56c911c39b3..a03509a020cd58c7ac7585d02def554ff69a2444 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -32,6 +32,9 @@
     {
       "path": "./packages/llm/groq/tsconfig.json"
     },
+    {
+      "path": "./packages/llm/ollama/tsconfig.json"
+    },
     {
       "path": "./packages/cloud/tsconfig.json"
     },