diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6a0cb7271f2c8e5d83af19d9937b87c967aee2e3..a13b0d6f26a0f9f792ed7e79a2a8a75bff5fd0f5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -151,6 +151,9 @@ jobs: - name: Pack @llamaindex/groq run: pnpm pack --pack-destination ${{ runner.temp }} working-directory: packages/llm/groq + - name: Pack @llamaindex/ollama + run: pnpm pack --pack-destination ${{ runner.temp }} + working-directory: packages/llm/ollama - name: Pack @llamaindex/core run: pnpm pack --pack-destination ${{ runner.temp }} working-directory: packages/core diff --git a/packages/llamaindex/package.json b/packages/llamaindex/package.json index 201d6c6327991397c77145e8a1844e1cd2210887..d90b61321249c52f0b6bacf353edf2c0528585e3 100644 --- a/packages/llamaindex/package.json +++ b/packages/llamaindex/package.json @@ -34,6 +34,7 @@ "@llamaindex/core": "workspace:*", "@llamaindex/env": "workspace:*", "@llamaindex/groq": "workspace:*", + "@llamaindex/ollama": "workspace:*", "@llamaindex/openai": "workspace:*", "@mistralai/mistralai": "^1.0.4", "@mixedbread-ai/sdk": "^2.2.11", diff --git a/packages/llamaindex/src/embeddings/OllamaEmbedding.ts b/packages/llamaindex/src/embeddings/OllamaEmbedding.ts index f6323c1495566453a43a23df661aef76d5a50773..2bd40a48eeaa17652ef2d5a46e15fc3e0aa9b2b1 100644 --- a/packages/llamaindex/src/embeddings/OllamaEmbedding.ts +++ b/packages/llamaindex/src/embeddings/OllamaEmbedding.ts @@ -1,5 +1,5 @@ import type { BaseEmbedding } from "@llamaindex/core/embeddings"; -import { Ollama } from "../llm/ollama.js"; +import { Ollama } from "@llamaindex/ollama"; /** * OllamaEmbedding is an alias for Ollama that implements the BaseEmbedding interface. diff --git a/packages/llamaindex/src/internal/deps/ollama.d.ts b/packages/llamaindex/src/internal/deps/ollama.d.ts deleted file mode 100644 index 39e6218f7c9c8e33073b2ceae6f00fe7eb484882..0000000000000000000000000000000000000000 --- a/packages/llamaindex/src/internal/deps/ollama.d.ts +++ /dev/null @@ -1,264 +0,0 @@ -type Fetch = typeof fetch; -interface Config { - host: string; - fetch?: Fetch; - proxy?: boolean; -} -interface Options { - numa: boolean; - num_ctx: number; - num_batch: number; - main_gpu: number; - low_vram: boolean; - f16_kv: boolean; - logits_all: boolean; - vocab_only: boolean; - use_mmap: boolean; - use_mlock: boolean; - embedding_only: boolean; - num_thread: number; - num_keep: number; - seed: number; - num_predict: number; - top_k: number; - top_p: number; - tfs_z: number; - typical_p: number; - repeat_last_n: number; - temperature: number; - repeat_penalty: number; - presence_penalty: number; - frequency_penalty: number; - mirostat: number; - mirostat_tau: number; - mirostat_eta: number; - penalize_newline: boolean; - stop: string[]; -} -interface GenerateRequest { - model: string; - prompt: string; - system?: string; - template?: string; - context?: number[]; - stream?: boolean; - raw?: boolean; - format?: string; - images?: Uint8Array[] | string[]; - keep_alive?: string | number; - options?: Partial<Options>; -} -interface Message { - role: string; - content: string; - images?: Uint8Array[] | string[]; -} -interface ChatRequest { - model: string; - messages?: Message[]; - stream?: boolean; - format?: string; - keep_alive?: string | number; - options?: Partial<Options>; -} -interface PullRequest { - model: string; - insecure?: boolean; - stream?: boolean; -} -interface PushRequest { - model: string; - insecure?: boolean; - stream?: boolean; -} -interface CreateRequest { - model: string; - path?: string; - modelfile?: string; - stream?: boolean; -} -interface DeleteRequest { - model: string; -} -interface CopyRequest { - source: string; - destination: string; -} -interface ShowRequest { - model: string; - system?: string; - template?: string; - options?: Partial<Options>; -} -interface EmbeddingsRequest { - model: string; - prompt: string; - keep_alive?: string | number; - options?: Partial<Options>; -} -interface GenerateResponse { - model: string; - created_at: Date; - response: string; - done: boolean; - context: number[]; - total_duration: number; - load_duration: number; - prompt_eval_count: number; - prompt_eval_duration: number; - eval_count: number; - eval_duration: number; -} -interface ChatResponse { - model: string; - created_at: Date; - message: Message; - done: boolean; - total_duration: number; - load_duration: number; - prompt_eval_count: number; - prompt_eval_duration: number; - eval_count: number; - eval_duration: number; -} -interface EmbeddingsResponse { - embedding: number[]; -} -interface ProgressResponse { - status: string; - digest: string; - total: number; - completed: number; -} -interface ModelResponse { - name: string; - modified_at: Date; - size: number; - digest: string; - details: ModelDetails; -} -interface ModelDetails { - parent_model: string; - format: string; - family: string; - families: string[]; - parameter_size: string; - quantization_level: string; -} -interface ShowResponse { - license: string; - modelfile: string; - parameters: string; - template: string; - system: string; - details: ModelDetails; - messages: Message[]; -} -interface ListResponse { - models: ModelResponse[]; -} -interface ErrorResponse { - error: string; -} -interface StatusResponse { - status: string; -} - -declare class Ollama { - protected readonly config: Config; - protected readonly fetch: Fetch; - private abortController; - constructor(config?: Partial<Config>); - abort(): void; - protected processStreamableRequest<T extends object>( - endpoint: string, - request: { - stream?: boolean; - } & Record<string, any>, - ): Promise<T | AsyncGenerator<T>>; - encodeImage(image: Uint8Array | string): Promise<string>; - generate( - request: GenerateRequest & { - stream: true; - }, - ): Promise<AsyncGenerator<GenerateResponse>>; - generate( - request: GenerateRequest & { - stream?: false; - }, - ): Promise<GenerateResponse>; - chat( - request: ChatRequest & { - stream: true; - }, - ): Promise<AsyncGenerator<ChatResponse>>; - chat( - request: ChatRequest & { - stream?: false; - }, - ): Promise<ChatResponse>; - create( - request: CreateRequest & { - stream: true; - }, - ): Promise<AsyncGenerator<ProgressResponse>>; - create( - request: CreateRequest & { - stream?: false; - }, - ): Promise<ProgressResponse>; - pull( - request: PullRequest & { - stream: true; - }, - ): Promise<AsyncGenerator<ProgressResponse>>; - pull( - request: PullRequest & { - stream?: false; - }, - ): Promise<ProgressResponse>; - push( - request: PushRequest & { - stream: true; - }, - ): Promise<AsyncGenerator<ProgressResponse>>; - push( - request: PushRequest & { - stream?: false; - }, - ): Promise<ProgressResponse>; - delete(request: DeleteRequest): Promise<StatusResponse>; - copy(request: CopyRequest): Promise<StatusResponse>; - list(): Promise<ListResponse>; - show(request: ShowRequest): Promise<ShowResponse>; - embeddings(request: EmbeddingsRequest): Promise<EmbeddingsResponse>; -} -declare const _default: Ollama; - -export { - Ollama, - _default as default, - type ChatRequest, - type ChatResponse, - type Config, - type CopyRequest, - type CreateRequest, - type DeleteRequest, - type EmbeddingsRequest, - type EmbeddingsResponse, - type ErrorResponse, - type Fetch, - type GenerateRequest, - type GenerateResponse, - type ListResponse, - type Message, - type ModelDetails, - type ModelResponse, - type Options, - type ProgressResponse, - type PullRequest, - type PushRequest, - type ShowRequest, - type ShowResponse, - type StatusResponse, -}; diff --git a/packages/llamaindex/src/internal/deps/ollama.js b/packages/llamaindex/src/internal/deps/ollama.js deleted file mode 100644 index db189e10a1cc3196352378efe1f2b7f753aa6ab7..0000000000000000000000000000000000000000 --- a/packages/llamaindex/src/internal/deps/ollama.js +++ /dev/null @@ -1,462 +0,0 @@ -// generate from "tsup ./src/browser.js --format esm --dts" -var __defProp = Object.defineProperty; -var __getOwnPropSymbols = Object.getOwnPropertySymbols; -var __hasOwnProp = Object.prototype.hasOwnProperty; -var __propIsEnum = Object.prototype.propertyIsEnumerable; -var __knownSymbol = (name, symbol) => { - return (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name); -}; -var __defNormalProp = (obj, key, value) => - key in obj - ? __defProp(obj, key, { - enumerable: true, - configurable: true, - writable: true, - value, - }) - : (obj[key] = value); -var __spreadValues = (a, b) => { - for (var prop in b || (b = {})) - if (__hasOwnProp.call(b, prop)) __defNormalProp(a, prop, b[prop]); - if (__getOwnPropSymbols) - for (var prop of __getOwnPropSymbols(b)) { - if (__propIsEnum.call(b, prop)) __defNormalProp(a, prop, b[prop]); - } - return a; -}; -var __async = (__this, __arguments, generator) => { - return new Promise((resolve, reject) => { - var fulfilled = (value) => { - try { - step(generator.next(value)); - } catch (e) { - reject(e); - } - }; - var rejected = (value) => { - try { - step(generator.throw(value)); - } catch (e) { - reject(e); - } - }; - var step = (x) => - x.done - ? resolve(x.value) - : Promise.resolve(x.value).then(fulfilled, rejected); - step((generator = generator.apply(__this, __arguments)).next()); - }); -}; -var __await = function (promise, isYieldStar) { - this[0] = promise; - this[1] = isYieldStar; -}; -var __asyncGenerator = (__this, __arguments, generator) => { - var resume = (k, v, yes, no) => { - try { - var x = generator[k](v), - isAwait = (v = x.value) instanceof __await, - done = x.done; - Promise.resolve(isAwait ? v[0] : v) - .then((y) => - isAwait - ? resume( - k === "return" ? k : "next", - v[1] ? { done: y.done, value: y.value } : y, - yes, - no, - ) - : yes({ value: y, done }), - ) - .catch((e) => resume("throw", e, yes, no)); - } catch (e) { - no(e); - } - }; - var method = (k) => - (it[k] = (x) => new Promise((yes, no) => resume(k, x, yes, no))); - var it = {}; - return ( - (generator = generator.apply(__this, __arguments)), - (it[__knownSymbol("asyncIterator")] = () => it), - method("next"), - method("throw"), - method("return"), - it - ); -}; -var __forAwait = (obj, it, method) => - (it = obj[__knownSymbol("asyncIterator")]) - ? it.call(obj) - : ((obj = obj[__knownSymbol("iterator")]()), - (it = {}), - (method = (key, fn) => - (fn = obj[key]) && - (it[key] = (arg) => - new Promise( - (yes, no, done) => ( - (arg = fn.call(obj, arg)), - (done = arg.done), - Promise.resolve(arg.value).then( - (value) => yes({ value, done }), - no, - ) - ), - ))), - method("next"), - method("return"), - it); - -// src/version.ts -var version = "0.0.0"; - -// src/utils.ts -var ResponseError = class _ResponseError extends Error { - constructor(error, status_code) { - super(error); - this.error = error; - this.status_code = status_code; - this.name = "ResponseError"; - if (Error.captureStackTrace) { - Error.captureStackTrace(this, _ResponseError); - } - } -}; -var checkOk = (response) => - __async(void 0, null, function* () { - var _a; - if (!response.ok) { - let message = `Error ${response.status}: ${response.statusText}`; - let errorData = null; - if ( - (_a = response.headers.get("content-type")) == null - ? void 0 - : _a.includes("application/json") - ) { - try { - errorData = yield response.json(); - message = errorData.error || message; - } catch (error) { - console.log("Failed to parse error response as JSON"); - } - } else { - try { - console.log("Getting text from response"); - const textResponse = yield response.text(); - message = textResponse || message; - } catch (error) { - console.log("Failed to get text from error response"); - } - } - throw new ResponseError(message, response.status); - } - }); -function getPlatform() { - if (typeof window !== "undefined" && window.navigator) { - return `${window.navigator.platform.toLowerCase()} Browser/${navigator.userAgent};`; - } else if (typeof process !== "undefined") { - return `${process.arch} ${process.platform} Node.js/${process.version}`; - } - return ""; -} -var fetchWithHeaders = (_0, _1, ..._2) => - __async(void 0, [_0, _1, ..._2], function* (fetch2, url, options = {}) { - const defaultHeaders = { - "Content-Type": "application/json", - Accept: "application/json", - "User-Agent": `ollama-js/${version} (${getPlatform()})`, - }; - if (!options.headers) { - options.headers = {}; - } - options.headers = __spreadValues( - __spreadValues({}, defaultHeaders), - options.headers, - ); - return fetch2(url, options); - }); -var get = (fetch2, host) => - __async(void 0, null, function* () { - const response = yield fetchWithHeaders(fetch2, host); - yield checkOk(response); - return response; - }); -var post = (fetch2, host, data, options) => - __async(void 0, null, function* () { - const isRecord = (input) => { - return ( - input !== null && typeof input === "object" && !Array.isArray(input) - ); - }; - const formattedData = isRecord(data) ? JSON.stringify(data) : data; - const response = yield fetchWithHeaders(fetch2, host, { - method: "POST", - body: formattedData, - signal: options == null ? void 0 : options.signal, - }); - yield checkOk(response); - return response; - }); -var del = (fetch2, host, data) => - __async(void 0, null, function* () { - const response = yield fetchWithHeaders(fetch2, host, { - method: "DELETE", - body: JSON.stringify(data), - }); - yield checkOk(response); - return response; - }); -var parseJSON = function (itr) { - return __asyncGenerator(this, null, function* () { - var _a; - const decoder = new TextDecoder("utf-8"); - let buffer = ""; - const reader = itr.getReader(); - while (true) { - const { done, value: chunk } = yield new __await(reader.read()); - if (done) { - break; - } - buffer += decoder.decode(chunk); - const parts = buffer.split("\n"); - buffer = (_a = parts.pop()) != null ? _a : ""; - for (const part of parts) { - try { - yield JSON.parse(part); - } catch (error) { - console.warn("invalid json: ", part); - } - } - } - for (const part of buffer.split("\n").filter((p) => p !== "")) { - try { - yield JSON.parse(part); - } catch (error) { - console.warn("invalid json: ", part); - } - } - }); -}; -var formatHost = (host) => { - if (!host) { - return "http://127.0.0.1:11434"; - } - let isExplicitProtocol = host.includes("://"); - if (host.startsWith(":")) { - host = `http://127.0.0.1${host}`; - isExplicitProtocol = false; - } - if (!isExplicitProtocol) { - host = `http://${host}`; - } - const url = new URL(host); - let port = url.port; - if (!port) { - if (!isExplicitProtocol) { - port = "11434"; - } else { - port = url.protocol === "https:" ? "443" : "80"; - } - } - let formattedHost = `${url.protocol}//${url.hostname}:${port}${url.pathname}`; - if (formattedHost.endsWith("/")) { - formattedHost = formattedHost.slice(0, -1); - } - return formattedHost; -}; - -// src/browser.ts -// import "whatwg-fetch"; -var Ollama = class { - constructor(config) { - var _a; - this.config = { - host: "", - }; - if (!(config == null ? void 0 : config.proxy)) { - this.config.host = formatHost( - (_a = config == null ? void 0 : config.host) != null - ? _a - : "http://127.0.0.1:11434", - ); - } - this.fetch = fetch; - if ((config == null ? void 0 : config.fetch) != null) { - this.fetch = config.fetch; - } - this.abortController = new AbortController(); - } - // Abort any ongoing requests to Ollama - abort() { - this.abortController.abort(); - this.abortController = new AbortController(); - } - processStreamableRequest(endpoint, request) { - return __async(this, null, function* () { - var _a; - request.stream = (_a = request.stream) != null ? _a : false; - const response = yield post( - this.fetch, - `${this.config.host}/api/${endpoint}`, - __spreadValues({}, request), - { signal: this.abortController.signal }, - ); - if (!response.body) { - throw new Error("Missing body"); - } - const itr = parseJSON(response.body); - if (request.stream) { - return (function () { - return __asyncGenerator(this, null, function* () { - try { - for ( - var iter = __forAwait(itr), more, temp, error; - (more = !(temp = yield new __await(iter.next())).done); - more = false - ) { - const message = temp.value; - if ("error" in message) { - throw new Error(message.error); - } - yield message; - if (message.done || message.status === "success") { - return; - } - } - } catch (temp) { - error = [temp]; - } finally { - try { - more && - (temp = iter.return) && - (yield new __await(temp.call(iter))); - } finally { - if (error) throw error[0]; - } - } - throw new Error( - "Did not receive done or success response in stream.", - ); - }); - })(); - } else { - const message = yield itr.next(); - if (!message.value.done && message.value.status !== "success") { - throw new Error("Expected a completed response."); - } - return message.value; - } - }); - } - encodeImage(image) { - return __async(this, null, function* () { - if (typeof image !== "string") { - const uint8Array = new Uint8Array(image); - const numberArray = Array.from(uint8Array); - const base64String = btoa(String.fromCharCode.apply(null, numberArray)); - return base64String; - } - return image; - }); - } - generate(request) { - return __async(this, null, function* () { - if (request.images) { - request.images = yield Promise.all( - request.images.map(this.encodeImage.bind(this)), - ); - } - return this.processStreamableRequest("generate", request); - }); - } - chat(request) { - return __async(this, null, function* () { - if (request.messages) { - for (const message of request.messages) { - if (message.images) { - message.images = yield Promise.all( - message.images.map(this.encodeImage.bind(this)), - ); - } - } - } - return this.processStreamableRequest("chat", request); - }); - } - create(request) { - return __async(this, null, function* () { - return this.processStreamableRequest("create", { - name: request.model, - stream: request.stream, - modelfile: request.modelfile, - }); - }); - } - pull(request) { - return __async(this, null, function* () { - return this.processStreamableRequest("pull", { - name: request.model, - stream: request.stream, - insecure: request.insecure, - }); - }); - } - push(request) { - return __async(this, null, function* () { - return this.processStreamableRequest("push", { - name: request.model, - stream: request.stream, - insecure: request.insecure, - }); - }); - } - delete(request) { - return __async(this, null, function* () { - yield del(this.fetch, `${this.config.host}/api/delete`, { - name: request.model, - }); - return { status: "success" }; - }); - } - copy(request) { - return __async(this, null, function* () { - yield post( - this.fetch, - `${this.config.host}/api/copy`, - __spreadValues({}, request), - ); - return { status: "success" }; - }); - } - list() { - return __async(this, null, function* () { - const response = yield get(this.fetch, `${this.config.host}/api/tags`); - const listResponse = yield response.json(); - return listResponse; - }); - } - show(request) { - return __async(this, null, function* () { - const response = yield post( - this.fetch, - `${this.config.host}/api/show`, - __spreadValues({}, request), - ); - const showResponse = yield response.json(); - return showResponse; - }); - } - embeddings(request) { - return __async(this, null, function* () { - const response = yield post( - this.fetch, - `${this.config.host}/api/embeddings`, - __spreadValues({}, request), - ); - const embeddingsResponse = yield response.json(); - return embeddingsResponse; - }); - } -}; -var browser_default = new Ollama(); -export { Ollama, browser_default as default }; diff --git a/packages/llamaindex/src/internal/deps/ollama.license b/packages/llamaindex/src/internal/deps/ollama.license deleted file mode 100644 index 49bd8b185208cba029f60faf87388482ab647775..0000000000000000000000000000000000000000 --- a/packages/llamaindex/src/internal/deps/ollama.license +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2023 Saul - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/packages/llamaindex/src/llm/index.ts b/packages/llamaindex/src/llm/index.ts index 61698abcb587e633ab194121d0f94d2d7b85e676..5596440a0ce24099adeb6d692f4299e67c5855e8 100644 --- a/packages/llamaindex/src/llm/index.ts +++ b/packages/llamaindex/src/llm/index.ts @@ -23,7 +23,7 @@ export { Portkey } from "./portkey.js"; export * from "./replicate_ai.js"; // Note: The type aliases for replicate are to simplify usage for Llama 2 (we're using replicate for Llama 2 support) export { DeepInfra } from "./deepinfra.js"; -export { Ollama, type OllamaParams } from "./ollama.js"; +export * from "./ollama.js"; export { ALL_AVAILABLE_REPLICATE_MODELS, DeuceChatStrategy, diff --git a/packages/llamaindex/src/llm/ollama.ts b/packages/llamaindex/src/llm/ollama.ts index 1fea1b7d0467dcb86e7dcc97d532d3ab0e8edd27..0047c5bc9ee5ba43ce94887c74e81791c0600f24 100644 --- a/packages/llamaindex/src/llm/ollama.ts +++ b/packages/llamaindex/src/llm/ollama.ts @@ -1,253 +1 @@ -import { BaseEmbedding } from "@llamaindex/core/embeddings"; -import type { - ChatResponse, - ChatResponseChunk, - CompletionResponse, - LLM, - LLMChatParamsNonStreaming, - LLMChatParamsStreaming, - LLMCompletionParamsNonStreaming, - LLMCompletionParamsStreaming, - LLMMetadata, -} from "@llamaindex/core/llms"; -import { extractText, streamConverter } from "@llamaindex/core/utils"; -import { - Ollama as OllamaBase, - type Config, - type CopyRequest, - type CreateRequest, - type DeleteRequest, - type EmbeddingsRequest, - type EmbeddingsResponse, - type GenerateRequest, - type ListResponse, - type ChatResponse as OllamaChatResponse, - type GenerateResponse as OllamaGenerateResponse, - type Options, - type ProgressResponse, - type PullRequest, - type PushRequest, - type ShowRequest, - type ShowResponse, - type StatusResponse, -} from "../internal/deps/ollama.js"; - -const messageAccessor = (part: OllamaChatResponse): ChatResponseChunk => { - return { - raw: part, - delta: part.message.content, - }; -}; - -const completionAccessor = ( - part: OllamaGenerateResponse, -): CompletionResponse => { - return { text: part.response, raw: part }; -}; - -export type OllamaParams = { - model: string; - config?: Partial<Config>; - options?: Partial<Options>; -}; - -/** - * This class both implements the LLM and Embedding interfaces. - */ -export class Ollama - extends BaseEmbedding - implements LLM, Omit<OllamaBase, "chat"> -{ - readonly hasStreaming = true; - - ollama: OllamaBase; - - // https://ollama.ai/library - model: string; - - options: Partial<Omit<Options, "num_ctx" | "top_p" | "temperature">> & - Pick<Options, "num_ctx" | "top_p" | "temperature"> = { - num_ctx: 4096, - top_p: 0.9, - temperature: 0.7, - }; - - constructor(params: OllamaParams) { - super(); - this.model = params.model; - this.ollama = new OllamaBase(params.config); - if (params.options) { - this.options = { - ...this.options, - ...params.options, - }; - } - } - - get metadata(): LLMMetadata { - const { temperature, top_p, num_ctx } = this.options; - return { - model: this.model, - temperature: temperature, - topP: top_p, - maxTokens: undefined, - contextWindow: num_ctx, - tokenizer: undefined, - }; - } - - chat( - params: LLMChatParamsStreaming, - ): Promise<AsyncIterable<ChatResponseChunk>>; - chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>; - async chat( - params: LLMChatParamsNonStreaming | LLMChatParamsStreaming, - ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> { - const { messages, stream } = params; - const payload = { - model: this.model, - messages: messages.map((message) => ({ - role: message.role, - content: extractText(message.content), - })), - stream: !!stream, - options: { - ...this.options, - }, - }; - if (!stream) { - const chatResponse = await this.ollama.chat({ - ...payload, - stream: false, - }); - - return { - message: { - role: "assistant", - content: chatResponse.message.content, - }, - raw: chatResponse, - }; - } else { - const stream = await this.ollama.chat({ - ...payload, - stream: true, - }); - return streamConverter(stream, messageAccessor); - } - } - - complete( - params: LLMCompletionParamsStreaming, - ): Promise<AsyncIterable<CompletionResponse>>; - complete( - params: LLMCompletionParamsNonStreaming, - ): Promise<CompletionResponse>; - async complete( - params: LLMCompletionParamsStreaming | LLMCompletionParamsNonStreaming, - ): Promise<CompletionResponse | AsyncIterable<CompletionResponse>> { - const { prompt, stream } = params; - const payload = { - model: this.model, - prompt: extractText(prompt), - stream: !!stream, - options: { - ...this.options, - }, - }; - if (!stream) { - const response = await this.ollama.generate({ - ...payload, - stream: false, - }); - return { - text: response.response, - raw: response, - }; - } else { - const stream = await this.ollama.generate({ - ...payload, - stream: true, - }); - return streamConverter(stream, completionAccessor); - } - } - - private async getEmbedding(prompt: string): Promise<number[]> { - const payload = { - model: this.model, - prompt, - options: { - ...this.options, - }, - }; - const response = await this.ollama.embeddings({ - ...payload, - }); - return response.embedding; - } - - async getTextEmbedding(text: string): Promise<number[]> { - return this.getEmbedding(text); - } - - // Inherited from OllamaBase - - push( - request: PushRequest & { stream: true }, - ): Promise<AsyncGenerator<ProgressResponse, any, unknown>>; - push( - request: PushRequest & { stream?: false | undefined }, - ): Promise<ProgressResponse>; - push(request: any): any { - return this.ollama.push(request); - } - abort(): void { - return this.ollama.abort(); - } - encodeImage(image: string | Uint8Array): Promise<string> { - return this.ollama.encodeImage(image); - } - generate( - request: GenerateRequest & { stream: true }, - ): Promise<AsyncGenerator<OllamaGenerateResponse>>; - generate( - request: GenerateRequest & { stream?: false | undefined }, - ): Promise<OllamaGenerateResponse>; - generate(request: any): any { - return this.ollama.generate(request); - } - create( - request: CreateRequest & { stream: true }, - ): Promise<AsyncGenerator<ProgressResponse>>; - create( - request: CreateRequest & { stream?: false | undefined }, - ): Promise<ProgressResponse>; - create(request: any): any { - return this.ollama.create(request); - } - pull( - request: PullRequest & { stream: true }, - ): Promise<AsyncGenerator<ProgressResponse>>; - pull( - request: PullRequest & { stream?: false | undefined }, - ): Promise<ProgressResponse>; - pull(request: any): any { - return this.ollama.pull(request); - } - delete(request: DeleteRequest): Promise<StatusResponse> { - return this.ollama.delete(request); - } - copy(request: CopyRequest): Promise<StatusResponse> { - return this.ollama.copy(request); - } - list(): Promise<ListResponse> { - return this.ollama.list(); - } - show(request: ShowRequest): Promise<ShowResponse> { - return this.ollama.show(request); - } - embeddings(request: EmbeddingsRequest): Promise<EmbeddingsResponse> { - return this.ollama.embeddings(request); - } -} +export { Ollama, type OllamaParams } from "@llamaindex/ollama"; diff --git a/packages/llm/ollama/package.json b/packages/llm/ollama/package.json new file mode 100644 index 0000000000000000000000000000000000000000..40bf722b93dc3afdbf507c652bbe46b1966d3bb5 --- /dev/null +++ b/packages/llm/ollama/package.json @@ -0,0 +1,41 @@ +{ + "name": "@llamaindex/ollama", + "description": "Ollama Adapter for LlamaIndex", + "version": "0.0.1", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "exports": { + ".": { + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + }, + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + } + }, + "files": [ + "dist" + ], + "repository": { + "type": "git", + "url": "https://github.com/run-llama/LlamaIndexTS.git", + "directory": "packages/llm/openai" + }, + "scripts": { + "build": "bunchee", + "dev": "bunchee --watch" + }, + "devDependencies": { + "bunchee": "5.3.2" + }, + "dependencies": { + "@llamaindex/core": "workspace:*", + "@llamaindex/env": "workspace:*", + "ollama": "^0.5.9", + "remeda": "^2.12.0" + } +} diff --git a/packages/llm/ollama/src/index.ts b/packages/llm/ollama/src/index.ts new file mode 100644 index 0000000000000000000000000000000000000000..1bdcd4b81af422e56ac4fe09ce13a1fc1faead57 --- /dev/null +++ b/packages/llm/ollama/src/index.ts @@ -0,0 +1,172 @@ +import { BaseEmbedding } from "@llamaindex/core/embeddings"; +import type { + ChatResponse, + ChatResponseChunk, + CompletionResponse, + LLM, + LLMChatParamsNonStreaming, + LLMChatParamsStreaming, + LLMCompletionParamsNonStreaming, + LLMCompletionParamsStreaming, + LLMMetadata, +} from "@llamaindex/core/llms"; +import { extractText, streamConverter } from "@llamaindex/core/utils"; +import { + Ollama as OllamaBase, + type Config, + type ChatResponse as OllamaChatResponse, + type GenerateResponse as OllamaGenerateResponse, + type Options, +} from "ollama/browser"; + +const messageAccessor = (part: OllamaChatResponse): ChatResponseChunk => { + return { + raw: part, + delta: part.message.content, + }; +}; + +const completionAccessor = ( + part: OllamaGenerateResponse, +): CompletionResponse => { + return { text: part.response, raw: part }; +}; + +export type OllamaParams = { + model: string; + config?: Partial<Config>; + options?: Partial<Options>; +}; + +export class Ollama extends BaseEmbedding implements LLM { + public readonly ollama: OllamaBase; + + // https://ollama.ai/library + model: string; + + options: Partial<Omit<Options, "num_ctx" | "top_p" | "temperature">> & + Pick<Options, "num_ctx" | "top_p" | "temperature"> = { + num_ctx: 4096, + top_p: 0.9, + temperature: 0.7, + }; + + constructor(params: OllamaParams) { + super(); + this.model = params.model; + this.ollama = new OllamaBase(params.config); + if (params.options) { + this.options = { + ...this.options, + ...params.options, + }; + } + } + + get metadata(): LLMMetadata { + const { temperature, top_p, num_ctx } = this.options; + return { + model: this.model, + temperature: temperature, + topP: top_p, + maxTokens: this.options.num_ctx, + contextWindow: num_ctx, + tokenizer: undefined, + }; + } + + chat( + params: LLMChatParamsStreaming, + ): Promise<AsyncIterable<ChatResponseChunk>>; + chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>; + async chat( + params: LLMChatParamsNonStreaming | LLMChatParamsStreaming, + ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> { + const { messages, stream } = params; + const payload = { + model: this.model, + messages: messages.map((message) => ({ + role: message.role, + content: extractText(message.content), + })), + stream: !!stream, + options: { + ...this.options, + }, + }; + if (!stream) { + const chatResponse = await this.ollama.chat({ + ...payload, + stream: false, + }); + + return { + message: { + role: "assistant", + content: chatResponse.message.content, + }, + raw: chatResponse, + }; + } else { + const stream = await this.ollama.chat({ + ...payload, + stream: true, + }); + return streamConverter(stream, messageAccessor); + } + } + + complete( + params: LLMCompletionParamsStreaming, + ): Promise<AsyncIterable<CompletionResponse>>; + complete( + params: LLMCompletionParamsNonStreaming, + ): Promise<CompletionResponse>; + async complete( + params: LLMCompletionParamsStreaming | LLMCompletionParamsNonStreaming, + ): Promise<CompletionResponse | AsyncIterable<CompletionResponse>> { + const { prompt, stream } = params; + const payload = { + model: this.model, + prompt: extractText(prompt), + stream: !!stream, + options: { + ...this.options, + }, + }; + if (!stream) { + const response = await this.ollama.generate({ + ...payload, + stream: false, + }); + return { + text: response.response, + raw: response, + }; + } else { + const stream = await this.ollama.generate({ + ...payload, + stream: true, + }); + return streamConverter(stream, completionAccessor); + } + } + + private async getEmbedding(prompt: string): Promise<number[]> { + const payload = { + model: this.model, + prompt, + options: { + ...this.options, + }, + }; + const response = await this.ollama.embeddings({ + ...payload, + }); + return response.embedding; + } + + async getTextEmbedding(text: string): Promise<number[]> { + return this.getEmbedding(text); + } +} diff --git a/packages/llm/ollama/tsconfig.json b/packages/llm/ollama/tsconfig.json new file mode 100644 index 0000000000000000000000000000000000000000..5a94aa033116766429d5c2d42f337c5b1393ba86 --- /dev/null +++ b/packages/llm/ollama/tsconfig.json @@ -0,0 +1,18 @@ +{ + "extends": "../../../tsconfig.json", + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "bundler", + "outDir": "./lib" + }, + "include": ["./src"], + "references": [ + { + "path": "../../llamaindex/tsconfig.json" + }, + { + "path": "../../env/tsconfig.json" + } + ] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 056bf0720ccad02eee799760773cef272908e50b..e3d2abce4fadcdf35abc4a0ee9443708cbec7165 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -541,6 +541,9 @@ importers: '@llamaindex/groq': specifier: workspace:* version: link:../llm/groq + '@llamaindex/ollama': + specifier: workspace:* + version: link:../llm/ollama '@llamaindex/openai': specifier: workspace:* version: link:../llm/openai @@ -922,6 +925,25 @@ importers: specifier: 5.3.2 version: 5.3.2(typescript@5.6.2) + packages/llm/ollama: + dependencies: + '@llamaindex/core': + specifier: workspace:* + version: link:../../core + '@llamaindex/env': + specifier: workspace:* + version: link:../../env + ollama: + specifier: ^0.5.9 + version: 0.5.9 + remeda: + specifier: ^2.12.0 + version: 2.12.0 + devDependencies: + bunchee: + specifier: 5.3.2 + version: 5.3.2(typescript@5.6.2) + packages/llm/openai: dependencies: '@llamaindex/core': @@ -8885,6 +8907,9 @@ packages: ohash@1.1.3: resolution: {integrity: sha512-zuHHiGTYTA1sYJ/wZN+t5HKZaH23i4yI1HMwbuXm24Nid7Dv0KcuRlKoNKS9UNfAVSBlnGLcuQrnOKWOZoEGaw==} + ollama@0.5.9: + resolution: {integrity: sha512-F/KZuDRC+ZsVCuMvcOYuQ6zj42/idzCkkuknGyyGVmNStMZ/sU3jQpvhnl4SyC0+zBzLiKNZJnJeuPFuieWZvQ==} + on-finished@2.4.1: resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==} engines: {node: '>= 0.8'} @@ -22795,6 +22820,10 @@ snapshots: ohash@1.1.3: {} + ollama@0.5.9: + dependencies: + whatwg-fetch: 3.6.20 + on-finished@2.4.1: dependencies: ee-first: 1.1.1 diff --git a/tsconfig.json b/tsconfig.json index c6db3c526a2195073d43276818c7d56c911c39b3..a03509a020cd58c7ac7585d02def554ff69a2444 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -32,6 +32,9 @@ { "path": "./packages/llm/groq/tsconfig.json" }, + { + "path": "./packages/llm/ollama/tsconfig.json" + }, { "path": "./packages/cloud/tsconfig.json" },