diff --git a/.changeset/early-crabs-nail.md b/.changeset/early-crabs-nail.md new file mode 100644 index 0000000000000000000000000000000000000000..7978fc549d5a1c52d89b9537c470bf1f5620d113 --- /dev/null +++ b/.changeset/early-crabs-nail.md @@ -0,0 +1,8 @@ +--- +"llamaindex": patch +"@llamaindex/openai": patch +--- + +feat: decouple openai from `llamaindex` module + +This should be a non-breaking change, but just you can now only install `@llamaindex/openai` to reduce the bundle size in the future diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6325a1a6017ed6c05a3cf8b6e8a9e29e1849bb96..2e48f5e4b8e98042ebadf8c5b9c982d2cae4435c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -142,6 +142,9 @@ jobs: - name: Pack @llamaindex/cloud run: pnpm pack --pack-destination ${{ runner.temp }} working-directory: packages/cloud + - name: Pack @llamaindex/openai + run: pnpm pack --pack-destination ${{ runner.temp }} + working-directory: packages/llm/openai - name: Pack @llamaindex/core run: pnpm pack --pack-destination ${{ runner.temp }} working-directory: packages/core diff --git a/packages/llamaindex/e2e/fixtures/embeddings/OpenAIEmbedding.ts b/packages/llamaindex/e2e/fixtures/embeddings/OpenAIEmbedding.ts deleted file mode 100644 index 2ff111e2e05b4532a924c5184f558557df6c33f7..0000000000000000000000000000000000000000 --- a/packages/llamaindex/e2e/fixtures/embeddings/OpenAIEmbedding.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { TransformComponent } from "@llamaindex/core/schema"; -import { - BaseEmbedding, - BaseNode, - SimilarityType, - type EmbeddingInfo, - type MessageContentDetail, -} from "llamaindex"; - -export class OpenAIEmbedding - extends TransformComponent - implements BaseEmbedding -{ - embedInfo?: EmbeddingInfo; - embedBatchSize = 512; - - constructor() { - super(async (nodes: BaseNode[], _options?: any): Promise<BaseNode[]> => { - nodes.forEach((node) => (node.embedding = [0])); - return nodes; - }); - } - - async getQueryEmbedding(query: MessageContentDetail) { - return [0]; - } - - async getTextEmbedding(text: string) { - return [0]; - } - - async getTextEmbeddings(texts: string[]) { - return [[0]]; - } - - async getTextEmbeddingsBatch(texts: string[]) { - return [[0]]; - } - - similarity( - embedding1: number[], - embedding2: number[], - mode?: SimilarityType, - ) { - return 1; - } - - truncateMaxTokens(input: string[]): string[] { - return input; - } -} diff --git a/packages/llamaindex/e2e/fixtures/llm/openai.ts b/packages/llamaindex/e2e/fixtures/llm/openai.ts index 2b54919f5e5ad7f70762770886a3f4d7ead2fd66..10fd90c6453dc9ccd5dbe84ba932923bf2e4ddaf 100644 --- a/packages/llamaindex/e2e/fixtures/llm/openai.ts +++ b/packages/llamaindex/e2e/fixtures/llm/openai.ts @@ -12,6 +12,15 @@ import type { import { deepStrictEqual, strictEqual } from "node:assert"; import { llmCompleteMockStorage } from "../../node/utils.js"; +import { TransformComponent } from "@llamaindex/core/schema"; +import { + BaseEmbedding, + BaseNode, + SimilarityType, + type EmbeddingInfo, + type MessageContentDetail, +} from "llamaindex"; + export function getOpenAISession() { return {}; } @@ -22,6 +31,7 @@ export function isFunctionCallingModel() { export class OpenAI implements LLM { supportToolCall = true; + get metadata() { return { model: "mock-model", @@ -32,6 +42,7 @@ export class OpenAI implements LLM { isFunctionCallingModel: true, }; } + chat( params: LLMChatParamsStreaming<Record<string, unknown>>, ): Promise<AsyncIterable<ChatResponseChunk>>; @@ -77,6 +88,7 @@ export class OpenAI implements LLM { } throw new Error("Method not implemented."); } + complete( params: LLMCompletionParamsStreaming, ): Promise<AsyncIterable<CompletionResponse>>; @@ -103,3 +115,46 @@ export class OpenAI implements LLM { throw new Error("Method not implemented."); } } + +export class OpenAIEmbedding + extends TransformComponent + implements BaseEmbedding +{ + embedInfo?: EmbeddingInfo; + embedBatchSize = 512; + + constructor() { + super(async (nodes: BaseNode[], _options?: any): Promise<BaseNode[]> => { + nodes.forEach((node) => (node.embedding = [0])); + return nodes; + }); + } + + async getQueryEmbedding(query: MessageContentDetail) { + return [0]; + } + + async getTextEmbedding(text: string) { + return [0]; + } + + async getTextEmbeddings(texts: string[]) { + return [[0]]; + } + + async getTextEmbeddingsBatch(texts: string[]) { + return [[0]]; + } + + similarity( + embedding1: number[], + embedding2: number[], + mode?: SimilarityType, + ) { + return 1; + } + + truncateMaxTokens(input: string[]): string[] { + return input; + } +} diff --git a/packages/llamaindex/e2e/mock-module.js b/packages/llamaindex/e2e/mock-module.js index d32e8f1878f9aca65f8ca8d3171a36ddbc3d4f0a..fe6cd62629c8c21d1f601df8d0ec295db16a44f2 100644 --- a/packages/llamaindex/e2e/mock-module.js +++ b/packages/llamaindex/e2e/mock-module.js @@ -13,8 +13,14 @@ export async function resolve(specifier, context, nextResolve) { return result; } const targetUrl = fileURLToPath(result.url).replace(/\.js$/, ".ts"); - const relativePath = relative(packageDistDir, targetUrl); - if (relativePath.startsWith(".") || relativePath.startsWith("/")) { + let relativePath = relative(packageDistDir, targetUrl); + // todo: make it more generic if we have more sub modules fixtures in the future + if (relativePath.startsWith("../../llm/openai")) { + relativePath = relativePath.replace( + "../../llm/openai/dist/index.ts", + "llm/openai.ts", + ); + } else if (relativePath.startsWith(".") || relativePath.startsWith("/")) { return result; } const url = pathToFileURL(join(fixturesDir, relativePath)).toString(); diff --git a/packages/llamaindex/e2e/package.json b/packages/llamaindex/e2e/package.json index 971b369f1c85b46b3bc41022f63ddd05d467d9e9..0f3f588ea45521bd8e684ae50c9be139beca83df 100644 --- a/packages/llamaindex/e2e/package.json +++ b/packages/llamaindex/e2e/package.json @@ -10,7 +10,6 @@ }, "devDependencies": { "@faker-js/faker": "^8.4.1", - "@llamaindex/core": "workspace:*", "@types/node": "^22.5.1", "consola": "^3.2.3", "llamaindex": "workspace:*", diff --git a/packages/llamaindex/package.json b/packages/llamaindex/package.json index 711907fd19b03e5491c4a89dc14460bdf3ffb3ea..436ded68465bc0af64d34de78de7f5a7a2b89ffb 100644 --- a/packages/llamaindex/package.json +++ b/packages/llamaindex/package.json @@ -33,6 +33,7 @@ "@llamaindex/cloud": "workspace:*", "@llamaindex/core": "workspace:*", "@llamaindex/env": "workspace:*", + "@llamaindex/openai": "workspace:*", "@mistralai/mistralai": "^1.0.4", "@mixedbread-ai/sdk": "^2.2.11", "@pinecone-database/pinecone": "^3.0.2", diff --git a/packages/llamaindex/src/ChatHistory.ts b/packages/llamaindex/src/ChatHistory.ts index f681e5261eb88762305a8ec78c9c65d1c26d86f7..4bbd44e4b491a85da240b6c91bcb1876cdaa0d40 100644 --- a/packages/llamaindex/src/ChatHistory.ts +++ b/packages/llamaindex/src/ChatHistory.ts @@ -5,7 +5,7 @@ import { } from "@llamaindex/core/prompts"; import { extractText, messagesToHistory } from "@llamaindex/core/utils"; import { tokenizers, type Tokenizer } from "@llamaindex/env"; -import { OpenAI } from "./llm/openai.js"; +import { OpenAI } from "@llamaindex/openai"; /** * A ChatHistory is used to keep the state of back and forth chat messages diff --git a/packages/llamaindex/src/QuestionGenerator.ts b/packages/llamaindex/src/QuestionGenerator.ts index 47ea6001eea4dc4dc1ae0582845723c2fe3b49d7..0f69fdfb843a562f346c9ddd40a5dd938efbec0c 100644 --- a/packages/llamaindex/src/QuestionGenerator.ts +++ b/packages/llamaindex/src/QuestionGenerator.ts @@ -8,12 +8,12 @@ import { import type { QueryType } from "@llamaindex/core/query-engine"; import type { BaseOutputParser } from "@llamaindex/core/schema"; import { extractText, toToolDescriptions } from "@llamaindex/core/utils"; +import { OpenAI } from "@llamaindex/openai"; import { SubQuestionOutputParser } from "./OutputParser.js"; import type { BaseQuestionGenerator, SubQuestion, } from "./engines/query/types.js"; -import { OpenAI } from "./llm/openai.js"; import type { StructuredOutput } from "./types.js"; /** diff --git a/packages/llamaindex/src/ServiceContext.ts b/packages/llamaindex/src/ServiceContext.ts index c6c1885f295dc3af95b3e4046b5a1b7319c4a4a9..a185423fbdcb68759c9049dbea9514322f82fd89 100644 --- a/packages/llamaindex/src/ServiceContext.ts +++ b/packages/llamaindex/src/ServiceContext.ts @@ -5,8 +5,7 @@ import { type NodeParser, SentenceSplitter, } from "@llamaindex/core/node-parser"; -import { OpenAIEmbedding } from "./embeddings/OpenAIEmbedding.js"; -import { OpenAI } from "./llm/openai.js"; +import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai"; /** * The ServiceContext is a collection of components that are used in different parts of the application. diff --git a/packages/llamaindex/src/Settings.ts b/packages/llamaindex/src/Settings.ts index a4acce25d429fbb24a5af24d3b1daa2e4bb1f946..a39fc3b1ab05c427b6e264df68b793556eef8e1d 100644 --- a/packages/llamaindex/src/Settings.ts +++ b/packages/llamaindex/src/Settings.ts @@ -2,7 +2,7 @@ import { type CallbackManager, Settings as CoreSettings, } from "@llamaindex/core/global"; -import { OpenAI } from "./llm/openai.js"; +import { OpenAI } from "@llamaindex/openai"; import { PromptHelper } from "@llamaindex/core/indices"; diff --git a/packages/llamaindex/src/agent/openai.ts b/packages/llamaindex/src/agent/openai.ts index a85fb4c5a1719984dddaaca8cdf0d66c55897c5b..8cd84bc675ff3972df36eec3b91d6fa158686f31 100644 --- a/packages/llamaindex/src/agent/openai.ts +++ b/packages/llamaindex/src/agent/openai.ts @@ -1,5 +1,5 @@ +import { OpenAI } from "@llamaindex/openai"; import { Settings } from "../Settings.js"; -import { OpenAI } from "../llm/openai.js"; import { LLMAgent, LLMAgentWorker, type LLMAgentParams } from "./llm.js"; // This is likely not necessary anymore but leaving it here just incase it's in use elsewhere diff --git a/packages/llamaindex/src/cloud/LlamaCloudIndex.ts b/packages/llamaindex/src/cloud/LlamaCloudIndex.ts index 7e83ab1360501cf170eea56aa2143cfd7cb6ecd3..14ddb6f8de3884807b08d50f7d48fb4c1f2b8f95 100644 --- a/packages/llamaindex/src/cloud/LlamaCloudIndex.ts +++ b/packages/llamaindex/src/cloud/LlamaCloudIndex.ts @@ -13,8 +13,8 @@ import { getAppBaseUrl, getProjectId, initService } from "./utils.js"; import { PipelinesService, ProjectsService } from "@llamaindex/cloud/api"; import { SentenceSplitter } from "@llamaindex/core/node-parser"; import { getEnv } from "@llamaindex/env"; +import { OpenAIEmbedding } from "@llamaindex/openai"; import { Settings } from "../Settings.js"; -import { OpenAIEmbedding } from "../embeddings/OpenAIEmbedding.js"; export class LlamaCloudIndex { params: CloudConstructorParams; diff --git a/packages/llamaindex/src/cloud/config.ts b/packages/llamaindex/src/cloud/config.ts index 0a562fd58662c94c584d4f3c6fd56a2c6d2ef2c8..5b3b7a48e38f101943fbc22a98d8c0e037b7d575 100644 --- a/packages/llamaindex/src/cloud/config.ts +++ b/packages/llamaindex/src/cloud/config.ts @@ -5,7 +5,7 @@ import type { } from "@llamaindex/cloud/api"; import { SentenceSplitter } from "@llamaindex/core/node-parser"; import { BaseNode, type TransformComponent } from "@llamaindex/core/schema"; -import { OpenAIEmbedding } from "../embeddings/OpenAIEmbedding.js"; +import { OpenAIEmbedding } from "@llamaindex/openai"; export type GetPipelineCreateParams = { pipelineName: string; diff --git a/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts b/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts index 72386410e1dfd3a97568bf31c6f709ffcaaa0e55..02781efcb42bde6e5598eb279af193c85ec9158f 100644 --- a/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts +++ b/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts @@ -1,152 +1 @@ -import { BaseEmbedding } from "@llamaindex/core/embeddings"; -import { Tokenizers } from "@llamaindex/env"; -import type { ClientOptions as OpenAIClientOptions } from "openai"; -import type { AzureOpenAIConfig } from "../llm/azure.js"; -import { - getAzureConfigFromEnv, - getAzureModel, - shouldUseAzure, -} from "../llm/azure.js"; -import type { OpenAISession } from "../llm/openai.js"; -import { getOpenAISession } from "../llm/openai.js"; - -export const ALL_OPENAI_EMBEDDING_MODELS = { - "text-embedding-ada-002": { - dimensions: 1536, - maxTokens: 8192, - tokenizer: Tokenizers.CL100K_BASE, - }, - "text-embedding-3-small": { - dimensions: 1536, - dimensionOptions: [512, 1536], - maxTokens: 8192, - tokenizer: Tokenizers.CL100K_BASE, - }, - "text-embedding-3-large": { - dimensions: 3072, - dimensionOptions: [256, 1024, 3072], - maxTokens: 8192, - tokenizer: Tokenizers.CL100K_BASE, - }, -}; - -type ModelKeys = keyof typeof ALL_OPENAI_EMBEDDING_MODELS; - -export class OpenAIEmbedding extends BaseEmbedding { - /** embeddding model. defaults to "text-embedding-ada-002" */ - model: string; - /** number of dimensions of the resulting vector, for models that support choosing fewer dimensions. undefined will default to model default */ - dimensions?: number | undefined; - - // OpenAI session params - - /** api key */ - apiKey?: string | undefined = undefined; - /** maximum number of retries, default 10 */ - maxRetries: number; - /** timeout in ms, default 60 seconds */ - timeout?: number | undefined; - /** other session options for OpenAI */ - additionalSessionOptions?: - | Omit<Partial<OpenAIClientOptions>, "apiKey" | "maxRetries" | "timeout"> - | undefined; - - /** session object */ - session: OpenAISession; - - /** - * OpenAI Embedding - * @param init - initial parameters - */ - constructor(init?: Partial<OpenAIEmbedding> & { azure?: AzureOpenAIConfig }) { - super(); - - this.model = init?.model ?? "text-embedding-ada-002"; - this.dimensions = init?.dimensions; // if no dimensions provided, will be undefined/not sent to OpenAI - - this.embedBatchSize = init?.embedBatchSize ?? 10; - this.maxRetries = init?.maxRetries ?? 10; - - this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds - this.additionalSessionOptions = init?.additionalSessionOptions; - - // find metadata for model - const key = Object.keys(ALL_OPENAI_EMBEDDING_MODELS).find( - (key) => key === this.model, - ) as ModelKeys | undefined; - if (key) { - this.embedInfo = ALL_OPENAI_EMBEDDING_MODELS[key]; - } - - if (init?.azure || shouldUseAzure()) { - const azureConfig = { - ...getAzureConfigFromEnv({ - model: getAzureModel(this.model), - }), - ...init?.azure, - }; - - this.apiKey = azureConfig.apiKey; - this.session = - init?.session ?? - getOpenAISession({ - azure: true, - maxRetries: this.maxRetries, - timeout: this.timeout, - ...this.additionalSessionOptions, - ...azureConfig, - }); - } else { - this.apiKey = init?.apiKey ?? undefined; - this.session = - init?.session ?? - getOpenAISession({ - apiKey: this.apiKey, - maxRetries: this.maxRetries, - timeout: this.timeout, - ...this.additionalSessionOptions, - }); - } - } - - /** - * Get embeddings for a batch of texts - * @param texts - * @param options - */ - private async getOpenAIEmbedding(input: string[]): Promise<number[][]> { - // TODO: ensure this for every sub class by calling it in the base class - input = this.truncateMaxTokens(input); - - const { data } = await this.session.openai.embeddings.create( - this.dimensions - ? { - model: this.model, - dimensions: this.dimensions, // only sent to OpenAI if set by user - input, - } - : { - model: this.model, - input, - }, - ); - - return data.map((d) => d.embedding); - } - - /** - * Get embeddings for a batch of texts - * @param texts - */ - getTextEmbeddings = async (texts: string[]): Promise<number[][]> => { - return this.getOpenAIEmbedding(texts); - }; - - /** - * Get embeddings for a single text - * @param texts - */ - async getTextEmbedding(text: string): Promise<number[]> { - return (await this.getOpenAIEmbedding([text]))[0]!; - } -} +export * from "@llamaindex/openai"; diff --git a/packages/llamaindex/src/embeddings/fireworks.ts b/packages/llamaindex/src/embeddings/fireworks.ts index a48039e1022e91c06561e08937a02b39bc3f5e7e..8338884c12955937b009a28c1d482d6799a2bfd7 100644 --- a/packages/llamaindex/src/embeddings/fireworks.ts +++ b/packages/llamaindex/src/embeddings/fireworks.ts @@ -1,5 +1,5 @@ import { getEnv } from "@llamaindex/env"; -import { OpenAIEmbedding } from "./OpenAIEmbedding.js"; +import { OpenAIEmbedding } from "@llamaindex/openai"; export class FireworksEmbedding extends OpenAIEmbedding { constructor(init?: Partial<OpenAIEmbedding>) { diff --git a/packages/llamaindex/src/embeddings/together.ts b/packages/llamaindex/src/embeddings/together.ts index b284daf6a701fe42000b3184dfc68c5f62d7e127..1ed43fef7f582bb557c9afe961fec41eb1c182b0 100644 --- a/packages/llamaindex/src/embeddings/together.ts +++ b/packages/llamaindex/src/embeddings/together.ts @@ -1,5 +1,5 @@ import { getEnv } from "@llamaindex/env"; -import { OpenAIEmbedding } from "./OpenAIEmbedding.js"; +import { OpenAIEmbedding } from "@llamaindex/openai"; export class TogetherEmbedding extends OpenAIEmbedding { constructor(init?: Partial<OpenAIEmbedding>) { diff --git a/packages/llamaindex/src/extractors/MetadataExtractors.ts b/packages/llamaindex/src/extractors/MetadataExtractors.ts index b7ca6c49f9c8fef157ef172b2c22ddaa6134d828..b1d0ce5a47412627745c0375ef86b1a0f30e27b9 100644 --- a/packages/llamaindex/src/extractors/MetadataExtractors.ts +++ b/packages/llamaindex/src/extractors/MetadataExtractors.ts @@ -1,7 +1,7 @@ import type { LLM } from "@llamaindex/core/llms"; import type { BaseNode } from "@llamaindex/core/schema"; import { MetadataMode, TextNode } from "@llamaindex/core/schema"; -import { OpenAI } from "../llm/index.js"; +import { OpenAI } from "@llamaindex/openai"; import { defaultKeywordExtractorPromptTemplate, defaultQuestionAnswerPromptTemplate, diff --git a/packages/llamaindex/src/internal/settings/EmbedModel.ts b/packages/llamaindex/src/internal/settings/EmbedModel.ts index fab2331a8702e0d077330258a5c18d404e2f743e..d912d20b9c4121891751f1317d347b4ca9ca9dfc 100644 --- a/packages/llamaindex/src/internal/settings/EmbedModel.ts +++ b/packages/llamaindex/src/internal/settings/EmbedModel.ts @@ -1,6 +1,6 @@ import type { BaseEmbedding } from "@llamaindex/core/embeddings"; import { AsyncLocalStorage } from "@llamaindex/env"; -import { OpenAIEmbedding } from "../../embeddings/OpenAIEmbedding.js"; +import { OpenAIEmbedding } from "@llamaindex/openai"; const embeddedModelAsyncLocalStorage = new AsyncLocalStorage<BaseEmbedding>(); let globalEmbeddedModel: BaseEmbedding | null = null; diff --git a/packages/llamaindex/src/llm/deepinfra.ts b/packages/llamaindex/src/llm/deepinfra.ts index 9e2d9e2f8b20958552c07b23d907cc256f112d41..c2c8bde814bd8ef1be6ae0a39abb6d2e53c3edca 100644 --- a/packages/llamaindex/src/llm/deepinfra.ts +++ b/packages/llamaindex/src/llm/deepinfra.ts @@ -1,5 +1,5 @@ import { getEnv } from "@llamaindex/env"; -import { OpenAI } from "./openai.js"; +import { OpenAI } from "@llamaindex/openai"; const ENV_VARIABLE_NAME = "DEEPINFRA_API_TOKEN"; const DEFAULT_MODEL = "mistralai/Mixtral-8x22B-Instruct-v0.1"; diff --git a/packages/llamaindex/src/llm/deepseek.ts b/packages/llamaindex/src/llm/deepseek.ts index 7c4f15466df2d81192197a61a96b33dcdc1d10b2..d8a2586838ab763e1a0f98a07f9746f274b407d7 100644 --- a/packages/llamaindex/src/llm/deepseek.ts +++ b/packages/llamaindex/src/llm/deepseek.ts @@ -1,5 +1,5 @@ import { getEnv } from "@llamaindex/env"; -import { OpenAI } from "./openai.js"; +import { OpenAI } from "@llamaindex/openai"; export const DEEPSEEK_MODELS = { "deepseek-coder": { contextWindow: 128000 }, diff --git a/packages/llamaindex/src/llm/fireworks.ts b/packages/llamaindex/src/llm/fireworks.ts index 0385b62f25206b8eb2fa3d2cdcdef0115abbfd8d..3e5979f4d921708c01016ef4a24f190e8b7e5a84 100644 --- a/packages/llamaindex/src/llm/fireworks.ts +++ b/packages/llamaindex/src/llm/fireworks.ts @@ -1,5 +1,5 @@ import { getEnv } from "@llamaindex/env"; -import { OpenAI } from "./openai.js"; +import { OpenAI } from "@llamaindex/openai"; export class FireworksLLM extends OpenAI { constructor(init?: Partial<OpenAI>) { diff --git a/packages/llamaindex/src/llm/groq.ts b/packages/llamaindex/src/llm/groq.ts index 8f77a9389cebcc24c04c2e996f779b73fb6c88b0..5c058f923ed7847cadbf02654fc4a3c3ebea3a7b 100644 --- a/packages/llamaindex/src/llm/groq.ts +++ b/packages/llamaindex/src/llm/groq.ts @@ -1,6 +1,6 @@ import { getEnv } from "@llamaindex/env"; +import { OpenAI } from "@llamaindex/openai"; import GroqSDK, { type ClientOptions } from "groq-sdk"; -import { OpenAI } from "./openai.js"; export class Groq extends OpenAI { constructor( diff --git a/packages/llamaindex/src/llm/index.ts b/packages/llamaindex/src/llm/index.ts index de4dbad211b0b58c4979935c6f76cb6bca3c76a9..722a006657d0c8672a79aebd01e4b56de9dd7014 100644 --- a/packages/llamaindex/src/llm/index.ts +++ b/packages/llamaindex/src/llm/index.ts @@ -11,7 +11,6 @@ export { GEMINI_MODEL, type GoogleGeminiSessionOptions, } from "./gemini/types.js"; - export { Groq } from "./groq.js"; export { HuggingFaceInferenceAPI, HuggingFaceLLM } from "./huggingface.js"; export { diff --git a/packages/llamaindex/src/llm/openai.ts b/packages/llamaindex/src/llm/openai.ts index 1a5b237a160d346465a05402611e79381e92cd2f..02781efcb42bde6e5598eb279af193c85ec9158f 100644 --- a/packages/llamaindex/src/llm/openai.ts +++ b/packages/llamaindex/src/llm/openai.ts @@ -1,502 +1 @@ -import { getEnv } from "@llamaindex/env"; -import _ from "lodash"; -import type OpenAILLM from "openai"; -import type { - ClientOptions, - ClientOptions as OpenAIClientOptions, -} from "openai"; -import { AzureOpenAI, OpenAI as OrigOpenAI } from "openai"; -import type { ChatModel } from "openai/resources/chat/chat"; - -import { - type BaseTool, - type ChatMessage, - type ChatResponse, - type ChatResponseChunk, - type LLM, - type LLMChatParamsNonStreaming, - type LLMChatParamsStreaming, - type LLMMetadata, - type MessageType, - type PartialToolCall, - ToolCallLLM, - type ToolCallLLMMessageOptions, -} from "@llamaindex/core/llms"; -import { - extractText, - wrapEventCaller, - wrapLLMEvent, -} from "@llamaindex/core/utils"; -import { Tokenizers } from "@llamaindex/env"; -import type { - ChatCompletionAssistantMessageParam, - ChatCompletionMessageToolCall, - ChatCompletionRole, - ChatCompletionSystemMessageParam, - ChatCompletionTool, - ChatCompletionToolMessageParam, - ChatCompletionUserMessageParam, -} from "openai/resources/chat/completions"; -import type { ChatCompletionMessageParam } from "openai/resources/index.js"; -import type { AzureOpenAIConfig } from "./azure.js"; -import { - getAzureConfigFromEnv, - getAzureModel, - shouldUseAzure, -} from "./azure.js"; - -export class OpenAISession { - openai: Pick<OrigOpenAI, "chat" | "embeddings">; - - constructor(options: ClientOptions & { azure?: boolean } = {}) { - if (options.azure) { - this.openai = new AzureOpenAI(options as AzureOpenAIConfig); - } else { - if (!options.apiKey) { - options.apiKey = getEnv("OPENAI_API_KEY"); - } - - if (!options.apiKey) { - throw new Error("Set OpenAI Key in OPENAI_API_KEY env variable"); // Overriding OpenAI package's error message - } - - this.openai = new OrigOpenAI({ - ...options, - }); - } - } -} - -// I'm not 100% sure this is necessary vs. just starting a new session -// every time we make a call. They say they try to reuse connections -// so in theory this is more efficient, but we should test it in the future. -const defaultOpenAISession: { - session: OpenAISession; - options: ClientOptions; -}[] = []; - -/** - * Get a session for the OpenAI API. If one already exists with the same options, - * it will be returned. Otherwise, a new session will be created. - * @param options - * @returns - */ -export function getOpenAISession( - options: ClientOptions & { azure?: boolean } = {}, -) { - let session = defaultOpenAISession.find((session) => { - return _.isEqual(session.options, options); - })?.session; - - if (!session) { - session = new OpenAISession(options); - defaultOpenAISession.push({ session, options }); - } - - return session; -} - -export const GPT4_MODELS = { - "chatgpt-4o-latest": { - contextWindow: 128000, - }, - "gpt-4": { contextWindow: 8192 }, - "gpt-4-32k": { contextWindow: 32768 }, - "gpt-4-32k-0613": { contextWindow: 32768 }, - "gpt-4-turbo": { contextWindow: 128000 }, - "gpt-4-turbo-preview": { contextWindow: 128000 }, - "gpt-4-1106-preview": { contextWindow: 128000 }, - "gpt-4-0125-preview": { contextWindow: 128000 }, - "gpt-4-vision-preview": { contextWindow: 128000 }, - "gpt-4o": { contextWindow: 128000 }, - "gpt-4o-2024-05-13": { contextWindow: 128000 }, - "gpt-4o-mini": { contextWindow: 128000 }, - "gpt-4o-mini-2024-07-18": { contextWindow: 128000 }, - "gpt-4o-2024-08-06": { contextWindow: 128000 }, - "gpt-4o-2024-09-14": { contextWindow: 128000 }, - "gpt-4o-2024-10-14": { contextWindow: 128000 }, - "gpt-4-0613": { contextWindow: 128000 }, - "gpt-4-turbo-2024-04-09": { contextWindow: 128000 }, - "gpt-4-0314": { contextWindow: 128000 }, - "gpt-4-32k-0314": { contextWindow: 32768 }, -}; - -// NOTE we don't currently support gpt-3.5-turbo-instruct and don't plan to in the near future -export const GPT35_MODELS = { - "gpt-3.5-turbo": { contextWindow: 16385 }, - "gpt-3.5-turbo-0613": { contextWindow: 4096 }, - "gpt-3.5-turbo-16k": { contextWindow: 16385 }, - "gpt-3.5-turbo-16k-0613": { contextWindow: 16385 }, - "gpt-3.5-turbo-1106": { contextWindow: 16385 }, - "gpt-3.5-turbo-0125": { contextWindow: 16385 }, - "gpt-3.5-turbo-0301": { contextWindow: 16385 }, -}; - -export const O1_MODELS = { - "o1-preview": { - contextWindow: 128000, - }, - "o1-preview-2024-09-12": { - contextWindow: 128000, - }, - "o1-mini": { - contextWindow: 128000, - }, - "o1-mini-2024-09-12": { - contextWindow: 128000, - }, -}; - -/** - * We currently support GPT-3.5 and GPT-4 models - */ -export const ALL_AVAILABLE_OPENAI_MODELS = { - ...GPT4_MODELS, - ...GPT35_MODELS, - ...O1_MODELS, -} satisfies Record<ChatModel, { contextWindow: number }>; - -export function isFunctionCallingModel(llm: LLM): llm is OpenAI { - let model: string; - if (llm instanceof OpenAI) { - model = llm.model; - } else if ("model" in llm && typeof llm.model === "string") { - model = llm.model; - } else { - return false; - } - const isChatModel = Object.keys(ALL_AVAILABLE_OPENAI_MODELS).includes(model); - const isOld = model.includes("0314") || model.includes("0301"); - const isO1 = model.startsWith("o1"); - return isChatModel && !isOld && !isO1; -} - -export type OpenAIAdditionalMetadata = {}; - -export type OpenAIAdditionalChatOptions = Omit< - Partial<OpenAILLM.Chat.ChatCompletionCreateParams>, - | "max_tokens" - | "messages" - | "model" - | "temperature" - | "top_p" - | "stream" - | "tools" - | "toolChoice" ->; - -export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> { - model: - | ChatModel - // string & {} is a hack to allow any string, but still give autocomplete - | (string & {}); - temperature: number; - topP: number; - maxTokens?: number | undefined; - additionalChatOptions?: OpenAIAdditionalChatOptions | undefined; - - // OpenAI session params - apiKey?: string | undefined = undefined; - maxRetries: number; - timeout?: number; - session: OpenAISession; - additionalSessionOptions?: - | undefined - | Omit<Partial<OpenAIClientOptions>, "apiKey" | "maxRetries" | "timeout">; - - constructor( - init?: Partial<OpenAI> & { - azure?: AzureOpenAIConfig; - }, - ) { - super(); - this.model = init?.model ?? "gpt-4o"; - this.temperature = init?.temperature ?? 0.1; - this.topP = init?.topP ?? 1; - this.maxTokens = init?.maxTokens ?? undefined; - - this.maxRetries = init?.maxRetries ?? 10; - this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds - this.additionalChatOptions = init?.additionalChatOptions; - this.additionalSessionOptions = init?.additionalSessionOptions; - - if (init?.azure || shouldUseAzure()) { - const azureConfig = { - ...getAzureConfigFromEnv({ - model: getAzureModel(this.model), - }), - ...init?.azure, - }; - - this.apiKey = azureConfig.apiKey; - this.session = - init?.session ?? - getOpenAISession({ - azure: true, - maxRetries: this.maxRetries, - timeout: this.timeout, - ...this.additionalSessionOptions, - ...azureConfig, - }); - } else { - this.apiKey = init?.apiKey ?? undefined; - this.session = - init?.session ?? - getOpenAISession({ - apiKey: this.apiKey, - maxRetries: this.maxRetries, - timeout: this.timeout, - ...this.additionalSessionOptions, - }); - } - } - - get supportToolCall() { - return isFunctionCallingModel(this); - } - - get metadata(): LLMMetadata & OpenAIAdditionalMetadata { - const contextWindow = - ALL_AVAILABLE_OPENAI_MODELS[ - this.model as keyof typeof ALL_AVAILABLE_OPENAI_MODELS - ]?.contextWindow ?? 1024; - return { - model: this.model, - temperature: this.temperature, - topP: this.topP, - maxTokens: this.maxTokens, - contextWindow, - tokenizer: Tokenizers.CL100K_BASE, - }; - } - - static toOpenAIRole(messageType: MessageType): ChatCompletionRole { - switch (messageType) { - case "user": - return "user"; - case "assistant": - return "assistant"; - case "system": - return "system"; - default: - return "user"; - } - } - - static toOpenAIMessage( - messages: ChatMessage<ToolCallLLMMessageOptions>[], - ): ChatCompletionMessageParam[] { - return messages.map((message) => { - const options = message.options ?? {}; - if ("toolResult" in options) { - return { - tool_call_id: options.toolResult.id, - role: "tool", - content: extractText(message.content), - } satisfies ChatCompletionToolMessageParam; - } else if ("toolCall" in options) { - return { - role: "assistant", - content: extractText(message.content), - tool_calls: options.toolCall.map((toolCall) => { - return { - id: toolCall.id, - type: "function", - function: { - name: toolCall.name, - arguments: - typeof toolCall.input === "string" - ? toolCall.input - : JSON.stringify(toolCall.input), - }, - }; - }), - } satisfies ChatCompletionAssistantMessageParam; - } else if (message.role === "user") { - return { - role: "user", - content: message.content, - } satisfies ChatCompletionUserMessageParam; - } - - const response: - | ChatCompletionSystemMessageParam - | ChatCompletionUserMessageParam - | ChatCompletionMessageToolCall = { - // fixme(alex): type assertion - role: OpenAI.toOpenAIRole(message.role) as never, - // fixme: should not extract text, but assert content is string - content: extractText(message.content), - }; - return response; - }); - } - - chat( - params: LLMChatParamsStreaming< - OpenAIAdditionalChatOptions, - ToolCallLLMMessageOptions - >, - ): Promise<AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>>; - chat( - params: LLMChatParamsNonStreaming< - OpenAIAdditionalChatOptions, - ToolCallLLMMessageOptions - >, - ): Promise<ChatResponse<ToolCallLLMMessageOptions>>; - @wrapEventCaller - @wrapLLMEvent - async chat( - params: - | LLMChatParamsNonStreaming< - OpenAIAdditionalChatOptions, - ToolCallLLMMessageOptions - > - | LLMChatParamsStreaming< - OpenAIAdditionalChatOptions, - ToolCallLLMMessageOptions - >, - ): Promise< - | ChatResponse<ToolCallLLMMessageOptions> - | AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> - > { - const { messages, stream, tools, additionalChatOptions } = params; - const baseRequestParams = <OpenAILLM.Chat.ChatCompletionCreateParams>{ - model: this.model, - temperature: this.temperature, - max_tokens: this.maxTokens, - tools: tools?.map(OpenAI.toTool), - messages: OpenAI.toOpenAIMessage(messages), - top_p: this.topP, - ...Object.assign({}, this.additionalChatOptions, additionalChatOptions), - }; - - if ( - Array.isArray(baseRequestParams.tools) && - baseRequestParams.tools.length === 0 - ) { - // remove empty tools array to avoid OpenAI error - delete baseRequestParams.tools; - } - - // Streaming - if (stream) { - return this.streamChat(baseRequestParams); - } - - // Non-streaming - const response = await this.session.openai.chat.completions.create({ - ...baseRequestParams, - stream: false, - }); - - const content = response.choices[0]!.message?.content ?? ""; - - return { - raw: response, - message: { - content, - role: response.choices[0]!.message.role, - options: response.choices[0]!.message?.tool_calls - ? { - toolCall: response.choices[0]!.message.tool_calls.map( - (toolCall) => ({ - id: toolCall.id, - name: toolCall.function.name, - input: toolCall.function.arguments, - }), - ), - } - : {}, - }, - }; - } - - // todo: this wrapper is ugly, refactor it - @wrapEventCaller - protected async *streamChat( - baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams, - ): AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> { - const stream: AsyncIterable<OpenAILLM.Chat.ChatCompletionChunk> = - await this.session.openai.chat.completions.create({ - ...baseRequestParams, - stream: true, - }); - - // TODO: add callback to streamConverter and use streamConverter here - // this will be used to keep track of the current tool call, make sure input are valid json object. - let currentToolCall: PartialToolCall | null = null; - const toolCallMap = new Map<string, PartialToolCall>(); - for await (const part of stream) { - if (part.choices.length === 0) continue; - const choice = part.choices[0]!; - // skip parts that don't have any content - if (!(choice.delta.content || choice.delta.tool_calls)) continue; - - let shouldEmitToolCall: PartialToolCall | null = null; - if ( - choice.delta.tool_calls?.[0]!.id && - currentToolCall && - choice.delta.tool_calls?.[0].id !== currentToolCall.id - ) { - shouldEmitToolCall = { - ...currentToolCall, - input: JSON.parse(currentToolCall.input), - }; - } - if (choice.delta.tool_calls?.[0]!.id) { - currentToolCall = { - name: choice.delta.tool_calls[0].function!.name!, - id: choice.delta.tool_calls[0].id, - input: choice.delta.tool_calls[0].function!.arguments!, - }; - toolCallMap.set(choice.delta.tool_calls[0].id, currentToolCall); - } else { - if (choice.delta.tool_calls?.[0]!.function?.arguments) { - currentToolCall!.input += - choice.delta.tool_calls[0].function.arguments; - } - } - - const isDone: boolean = choice.finish_reason !== null; - - if (isDone && currentToolCall) { - // for the last one, we need to emit the tool call - shouldEmitToolCall = { - ...currentToolCall, - input: JSON.parse(currentToolCall.input), - }; - } - - yield { - raw: part, - options: shouldEmitToolCall - ? { toolCall: [shouldEmitToolCall] } - : currentToolCall - ? { - toolCall: [currentToolCall], - } - : {}, - delta: choice.delta.content ?? "", - }; - } - toolCallMap.clear(); - return; - } - - static toTool(tool: BaseTool): ChatCompletionTool { - return { - type: "function", - function: tool.metadata.parameters - ? { - name: tool.metadata.name, - description: tool.metadata.description, - parameters: tool.metadata.parameters, - } - : { - name: tool.metadata.name, - description: tool.metadata.description, - }, - }; - } -} +export * from "@llamaindex/openai"; diff --git a/packages/llamaindex/src/llm/together.ts b/packages/llamaindex/src/llm/together.ts index 3772b2be2bf08cd2b7735a1419d9e09684c1873c..4d314bcc0a84261580dac5502651dffd7d0f48d6 100644 --- a/packages/llamaindex/src/llm/together.ts +++ b/packages/llamaindex/src/llm/together.ts @@ -1,5 +1,5 @@ import { getEnv } from "@llamaindex/env"; -import { OpenAI } from "./openai.js"; +import { OpenAI } from "@llamaindex/openai"; export class TogetherLLM extends OpenAI { constructor(init?: Partial<OpenAI>) { diff --git a/packages/llm/openai/package.json b/packages/llm/openai/package.json new file mode 100644 index 0000000000000000000000000000000000000000..5fb001d0066a61176b754398848f0169fad628df --- /dev/null +++ b/packages/llm/openai/package.json @@ -0,0 +1,42 @@ +{ + "name": "@llamaindex/openai", + "description": "OpenAI Adapter for LlamaIndex", + "version": "0.1.0", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "exports": { + ".": { + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + }, + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + } + }, + "files": [ + "dist" + ], + "repository": { + "type": "git", + "url": "https://github.com/run-llama/LlamaIndexTS.git", + "directory": "packages/llm/openai" + }, + "private": true, + "scripts": { + "build": "bunchee", + "dev": "bunchee --watch" + }, + "devDependencies": { + "bunchee": "5.3.2" + }, + "dependencies": { + "@llamaindex/core": "workspace:*", + "@llamaindex/env": "workspace:*", + "openai": "^4.60.0", + "remeda": "^2.12.0" + } +} diff --git a/packages/llamaindex/src/llm/azure.ts b/packages/llm/openai/src/azure.ts similarity index 100% rename from packages/llamaindex/src/llm/azure.ts rename to packages/llm/openai/src/azure.ts diff --git a/packages/llm/openai/src/embedding.ts b/packages/llm/openai/src/embedding.ts new file mode 100644 index 0000000000000000000000000000000000000000..e4b5024488e5cf1ec6a7a554a38211b6432f6d2c --- /dev/null +++ b/packages/llm/openai/src/embedding.ts @@ -0,0 +1,152 @@ +import { BaseEmbedding } from "@llamaindex/core/embeddings"; +import { Tokenizers } from "@llamaindex/env"; +import type { ClientOptions as OpenAIClientOptions } from "openai"; +import type { AzureOpenAIConfig } from "./azure.js"; +import { + getAzureConfigFromEnv, + getAzureModel, + shouldUseAzure, +} from "./azure.js"; +import type { OpenAISession } from "./llm.js"; +import { getOpenAISession } from "./llm.js"; + +export const ALL_OPENAI_EMBEDDING_MODELS = { + "text-embedding-ada-002": { + dimensions: 1536, + maxTokens: 8192, + tokenizer: Tokenizers.CL100K_BASE, + }, + "text-embedding-3-small": { + dimensions: 1536, + dimensionOptions: [512, 1536], + maxTokens: 8192, + tokenizer: Tokenizers.CL100K_BASE, + }, + "text-embedding-3-large": { + dimensions: 3072, + dimensionOptions: [256, 1024, 3072], + maxTokens: 8192, + tokenizer: Tokenizers.CL100K_BASE, + }, +}; + +type ModelKeys = keyof typeof ALL_OPENAI_EMBEDDING_MODELS; + +export class OpenAIEmbedding extends BaseEmbedding { + /** embeddding model. defaults to "text-embedding-ada-002" */ + model: string; + /** number of dimensions of the resulting vector, for models that support choosing fewer dimensions. undefined will default to model default */ + dimensions?: number | undefined; + + // OpenAI session params + + /** api key */ + apiKey?: string | undefined = undefined; + /** maximum number of retries, default 10 */ + maxRetries: number; + /** timeout in ms, default 60 seconds */ + timeout?: number | undefined; + /** other session options for OpenAI */ + additionalSessionOptions?: + | Omit<Partial<OpenAIClientOptions>, "apiKey" | "maxRetries" | "timeout"> + | undefined; + + /** session object */ + session: OpenAISession; + + /** + * OpenAI Embedding + * @param init - initial parameters + */ + constructor(init?: Partial<OpenAIEmbedding> & { azure?: AzureOpenAIConfig }) { + super(); + + this.model = init?.model ?? "text-embedding-ada-002"; + this.dimensions = init?.dimensions; // if no dimensions provided, will be undefined/not sent to OpenAI + + this.embedBatchSize = init?.embedBatchSize ?? 10; + this.maxRetries = init?.maxRetries ?? 10; + + this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds + this.additionalSessionOptions = init?.additionalSessionOptions; + + // find metadata for model + const key = Object.keys(ALL_OPENAI_EMBEDDING_MODELS).find( + (key) => key === this.model, + ) as ModelKeys | undefined; + if (key) { + this.embedInfo = ALL_OPENAI_EMBEDDING_MODELS[key]; + } + + if (init?.azure || shouldUseAzure()) { + const azureConfig = { + ...getAzureConfigFromEnv({ + model: getAzureModel(this.model), + }), + ...init?.azure, + }; + + this.apiKey = azureConfig.apiKey; + this.session = + init?.session ?? + getOpenAISession({ + azure: true, + maxRetries: this.maxRetries, + timeout: this.timeout, + ...this.additionalSessionOptions, + ...azureConfig, + }); + } else { + this.apiKey = init?.apiKey ?? undefined; + this.session = + init?.session ?? + getOpenAISession({ + apiKey: this.apiKey, + maxRetries: this.maxRetries, + timeout: this.timeout, + ...this.additionalSessionOptions, + }); + } + } + + /** + * Get embeddings for a batch of texts + * @param texts + * @param options + */ + private async getOpenAIEmbedding(input: string[]): Promise<number[][]> { + // TODO: ensure this for every sub class by calling it in the base class + input = this.truncateMaxTokens(input); + + const { data } = await this.session.openai.embeddings.create( + this.dimensions + ? { + model: this.model, + dimensions: this.dimensions, // only sent to OpenAI if set by user + input, + } + : { + model: this.model, + input, + }, + ); + + return data.map((d) => d.embedding); + } + + /** + * Get embeddings for a batch of texts + * @param texts + */ + getTextEmbeddings = async (texts: string[]): Promise<number[][]> => { + return this.getOpenAIEmbedding(texts); + }; + + /** + * Get embeddings for a single text + * @param texts + */ + async getTextEmbedding(text: string): Promise<number[]> { + return (await this.getOpenAIEmbedding([text]))[0]!; + } +} diff --git a/packages/llm/openai/src/index.ts b/packages/llm/openai/src/index.ts new file mode 100644 index 0000000000000000000000000000000000000000..f1e663639c19225d50f1142545b3ac92f75488bf --- /dev/null +++ b/packages/llm/openai/src/index.ts @@ -0,0 +1,13 @@ +export { ALL_OPENAI_EMBEDDING_MODELS, OpenAIEmbedding } from "./embedding"; +export { + ALL_AVAILABLE_OPENAI_MODELS, + GPT35_MODELS, + GPT4_MODELS, + O1_MODELS, + OpenAI, + OpenAISession, + type OpenAIAdditionalChatOptions, + type OpenAIAdditionalMetadata, +} from "./llm"; + +export { type AzureOpenAIConfig } from "./azure"; diff --git a/packages/llm/openai/src/llm.ts b/packages/llm/openai/src/llm.ts new file mode 100644 index 0000000000000000000000000000000000000000..58d7aa1d22d710450ef3b282c94fd959bc883a9a --- /dev/null +++ b/packages/llm/openai/src/llm.ts @@ -0,0 +1,502 @@ +import { getEnv } from "@llamaindex/env"; +import type OpenAILLM from "openai"; +import type { + ClientOptions, + ClientOptions as OpenAIClientOptions, +} from "openai"; +import { AzureOpenAI, OpenAI as OrigOpenAI } from "openai"; +import type { ChatModel } from "openai/resources/chat/chat"; +import { isDeepEqual } from "remeda"; + +import { + type BaseTool, + type ChatMessage, + type ChatResponse, + type ChatResponseChunk, + type LLM, + type LLMChatParamsNonStreaming, + type LLMChatParamsStreaming, + type LLMMetadata, + type MessageType, + type PartialToolCall, + ToolCallLLM, + type ToolCallLLMMessageOptions, +} from "@llamaindex/core/llms"; +import { + extractText, + wrapEventCaller, + wrapLLMEvent, +} from "@llamaindex/core/utils"; +import { Tokenizers } from "@llamaindex/env"; +import type { + ChatCompletionAssistantMessageParam, + ChatCompletionMessageToolCall, + ChatCompletionRole, + ChatCompletionSystemMessageParam, + ChatCompletionTool, + ChatCompletionToolMessageParam, + ChatCompletionUserMessageParam, +} from "openai/resources/chat/completions"; +import type { ChatCompletionMessageParam } from "openai/resources/index.js"; +import type { AzureOpenAIConfig } from "./azure.js"; +import { + getAzureConfigFromEnv, + getAzureModel, + shouldUseAzure, +} from "./azure.js"; + +export class OpenAISession { + openai: Pick<OrigOpenAI, "chat" | "embeddings">; + + constructor(options: ClientOptions & { azure?: boolean } = {}) { + if (options.azure) { + this.openai = new AzureOpenAI(options as AzureOpenAIConfig); + } else { + if (!options.apiKey) { + options.apiKey = getEnv("OPENAI_API_KEY"); + } + + if (!options.apiKey) { + throw new Error("Set OpenAI Key in OPENAI_API_KEY env variable"); // Overriding OpenAI package's error message + } + + this.openai = new OrigOpenAI({ + ...options, + }); + } + } +} + +// I'm not 100% sure this is necessary vs. just starting a new session +// every time we make a call. They say they try to reuse connections +// so in theory this is more efficient, but we should test it in the future. +const defaultOpenAISession: { + session: OpenAISession; + options: ClientOptions; +}[] = []; + +/** + * Get a session for the OpenAI API. If one already exists with the same options, + * it will be returned. Otherwise, a new session will be created. + * @param options + * @returns + */ +export function getOpenAISession( + options: ClientOptions & { azure?: boolean } = {}, +) { + let session = defaultOpenAISession.find((session) => { + return isDeepEqual(session.options, options); + })?.session; + + if (!session) { + session = new OpenAISession(options); + defaultOpenAISession.push({ session, options }); + } + + return session; +} + +export const GPT4_MODELS = { + "chatgpt-4o-latest": { + contextWindow: 128000, + }, + "gpt-4": { contextWindow: 8192 }, + "gpt-4-32k": { contextWindow: 32768 }, + "gpt-4-32k-0613": { contextWindow: 32768 }, + "gpt-4-turbo": { contextWindow: 128000 }, + "gpt-4-turbo-preview": { contextWindow: 128000 }, + "gpt-4-1106-preview": { contextWindow: 128000 }, + "gpt-4-0125-preview": { contextWindow: 128000 }, + "gpt-4-vision-preview": { contextWindow: 128000 }, + "gpt-4o": { contextWindow: 128000 }, + "gpt-4o-2024-05-13": { contextWindow: 128000 }, + "gpt-4o-mini": { contextWindow: 128000 }, + "gpt-4o-mini-2024-07-18": { contextWindow: 128000 }, + "gpt-4o-2024-08-06": { contextWindow: 128000 }, + "gpt-4o-2024-09-14": { contextWindow: 128000 }, + "gpt-4o-2024-10-14": { contextWindow: 128000 }, + "gpt-4-0613": { contextWindow: 128000 }, + "gpt-4-turbo-2024-04-09": { contextWindow: 128000 }, + "gpt-4-0314": { contextWindow: 128000 }, + "gpt-4-32k-0314": { contextWindow: 32768 }, +}; + +// NOTE we don't currently support gpt-3.5-turbo-instruct and don't plan to in the near future +export const GPT35_MODELS = { + "gpt-3.5-turbo": { contextWindow: 16385 }, + "gpt-3.5-turbo-0613": { contextWindow: 4096 }, + "gpt-3.5-turbo-16k": { contextWindow: 16385 }, + "gpt-3.5-turbo-16k-0613": { contextWindow: 16385 }, + "gpt-3.5-turbo-1106": { contextWindow: 16385 }, + "gpt-3.5-turbo-0125": { contextWindow: 16385 }, + "gpt-3.5-turbo-0301": { contextWindow: 16385 }, +}; + +export const O1_MODELS = { + "o1-preview": { + contextWindow: 128000, + }, + "o1-preview-2024-09-12": { + contextWindow: 128000, + }, + "o1-mini": { + contextWindow: 128000, + }, + "o1-mini-2024-09-12": { + contextWindow: 128000, + }, +}; + +/** + * We currently support GPT-3.5 and GPT-4 models + */ +export const ALL_AVAILABLE_OPENAI_MODELS = { + ...GPT4_MODELS, + ...GPT35_MODELS, + ...O1_MODELS, +} satisfies Record<ChatModel, { contextWindow: number }>; + +export function isFunctionCallingModel(llm: LLM): llm is OpenAI { + let model: string; + if (llm instanceof OpenAI) { + model = llm.model; + } else if ("model" in llm && typeof llm.model === "string") { + model = llm.model; + } else { + return false; + } + const isChatModel = Object.keys(ALL_AVAILABLE_OPENAI_MODELS).includes(model); + const isOld = model.includes("0314") || model.includes("0301"); + const isO1 = model.startsWith("o1"); + return isChatModel && !isOld && !isO1; +} + +export type OpenAIAdditionalMetadata = {}; + +export type OpenAIAdditionalChatOptions = Omit< + Partial<OpenAILLM.Chat.ChatCompletionCreateParams>, + | "max_tokens" + | "messages" + | "model" + | "temperature" + | "top_p" + | "stream" + | "tools" + | "toolChoice" +>; + +export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> { + model: + | ChatModel + // string & {} is a hack to allow any string, but still give autocomplete + | (string & {}); + temperature: number; + topP: number; + maxTokens?: number | undefined; + additionalChatOptions?: OpenAIAdditionalChatOptions | undefined; + + // OpenAI session params + apiKey?: string | undefined = undefined; + maxRetries: number; + timeout?: number; + session: OpenAISession; + additionalSessionOptions?: + | undefined + | Omit<Partial<OpenAIClientOptions>, "apiKey" | "maxRetries" | "timeout">; + + constructor( + init?: Partial<OpenAI> & { + azure?: AzureOpenAIConfig; + }, + ) { + super(); + this.model = init?.model ?? "gpt-4o"; + this.temperature = init?.temperature ?? 0.1; + this.topP = init?.topP ?? 1; + this.maxTokens = init?.maxTokens ?? undefined; + + this.maxRetries = init?.maxRetries ?? 10; + this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds + this.additionalChatOptions = init?.additionalChatOptions; + this.additionalSessionOptions = init?.additionalSessionOptions; + + if (init?.azure || shouldUseAzure()) { + const azureConfig = { + ...getAzureConfigFromEnv({ + model: getAzureModel(this.model), + }), + ...init?.azure, + }; + + this.apiKey = azureConfig.apiKey; + this.session = + init?.session ?? + getOpenAISession({ + azure: true, + maxRetries: this.maxRetries, + timeout: this.timeout, + ...this.additionalSessionOptions, + ...azureConfig, + }); + } else { + this.apiKey = init?.apiKey ?? undefined; + this.session = + init?.session ?? + getOpenAISession({ + apiKey: this.apiKey, + maxRetries: this.maxRetries, + timeout: this.timeout, + ...this.additionalSessionOptions, + }); + } + } + + get supportToolCall() { + return isFunctionCallingModel(this); + } + + get metadata(): LLMMetadata & OpenAIAdditionalMetadata { + const contextWindow = + ALL_AVAILABLE_OPENAI_MODELS[ + this.model as keyof typeof ALL_AVAILABLE_OPENAI_MODELS + ]?.contextWindow ?? 1024; + return { + model: this.model, + temperature: this.temperature, + topP: this.topP, + maxTokens: this.maxTokens, + contextWindow, + tokenizer: Tokenizers.CL100K_BASE, + }; + } + + static toOpenAIRole(messageType: MessageType): ChatCompletionRole { + switch (messageType) { + case "user": + return "user"; + case "assistant": + return "assistant"; + case "system": + return "system"; + default: + return "user"; + } + } + + static toOpenAIMessage( + messages: ChatMessage<ToolCallLLMMessageOptions>[], + ): ChatCompletionMessageParam[] { + return messages.map((message) => { + const options = message.options ?? {}; + if ("toolResult" in options) { + return { + tool_call_id: options.toolResult.id, + role: "tool", + content: extractText(message.content), + } satisfies ChatCompletionToolMessageParam; + } else if ("toolCall" in options) { + return { + role: "assistant", + content: extractText(message.content), + tool_calls: options.toolCall.map((toolCall) => { + return { + id: toolCall.id, + type: "function", + function: { + name: toolCall.name, + arguments: + typeof toolCall.input === "string" + ? toolCall.input + : JSON.stringify(toolCall.input), + }, + }; + }), + } satisfies ChatCompletionAssistantMessageParam; + } else if (message.role === "user") { + return { + role: "user", + content: message.content, + } satisfies ChatCompletionUserMessageParam; + } + + const response: + | ChatCompletionSystemMessageParam + | ChatCompletionUserMessageParam + | ChatCompletionMessageToolCall = { + // fixme(alex): type assertion + role: OpenAI.toOpenAIRole(message.role) as never, + // fixme: should not extract text, but assert content is string + content: extractText(message.content), + }; + return response; + }); + } + + chat( + params: LLMChatParamsStreaming< + OpenAIAdditionalChatOptions, + ToolCallLLMMessageOptions + >, + ): Promise<AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>>; + chat( + params: LLMChatParamsNonStreaming< + OpenAIAdditionalChatOptions, + ToolCallLLMMessageOptions + >, + ): Promise<ChatResponse<ToolCallLLMMessageOptions>>; + @wrapEventCaller + @wrapLLMEvent + async chat( + params: + | LLMChatParamsNonStreaming< + OpenAIAdditionalChatOptions, + ToolCallLLMMessageOptions + > + | LLMChatParamsStreaming< + OpenAIAdditionalChatOptions, + ToolCallLLMMessageOptions + >, + ): Promise< + | ChatResponse<ToolCallLLMMessageOptions> + | AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> + > { + const { messages, stream, tools, additionalChatOptions } = params; + const baseRequestParams = <OpenAILLM.Chat.ChatCompletionCreateParams>{ + model: this.model, + temperature: this.temperature, + max_tokens: this.maxTokens, + tools: tools?.map(OpenAI.toTool), + messages: OpenAI.toOpenAIMessage(messages), + top_p: this.topP, + ...Object.assign({}, this.additionalChatOptions, additionalChatOptions), + }; + + if ( + Array.isArray(baseRequestParams.tools) && + baseRequestParams.tools.length === 0 + ) { + // remove empty tools array to avoid OpenAI error + delete baseRequestParams.tools; + } + + // Streaming + if (stream) { + return this.streamChat(baseRequestParams); + } + + // Non-streaming + const response = await this.session.openai.chat.completions.create({ + ...baseRequestParams, + stream: false, + }); + + const content = response.choices[0]!.message?.content ?? ""; + + return { + raw: response, + message: { + content, + role: response.choices[0]!.message.role, + options: response.choices[0]!.message?.tool_calls + ? { + toolCall: response.choices[0]!.message.tool_calls.map( + (toolCall) => ({ + id: toolCall.id, + name: toolCall.function.name, + input: toolCall.function.arguments, + }), + ), + } + : {}, + }, + }; + } + + // todo: this wrapper is ugly, refactor it + @wrapEventCaller + protected async *streamChat( + baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams, + ): AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> { + const stream: AsyncIterable<OpenAILLM.Chat.ChatCompletionChunk> = + await this.session.openai.chat.completions.create({ + ...baseRequestParams, + stream: true, + }); + + // TODO: add callback to streamConverter and use streamConverter here + // this will be used to keep track of the current tool call, make sure input are valid json object. + let currentToolCall: PartialToolCall | null = null; + const toolCallMap = new Map<string, PartialToolCall>(); + for await (const part of stream) { + if (part.choices.length === 0) continue; + const choice = part.choices[0]!; + // skip parts that don't have any content + if (!(choice.delta.content || choice.delta.tool_calls)) continue; + + let shouldEmitToolCall: PartialToolCall | null = null; + if ( + choice.delta.tool_calls?.[0]!.id && + currentToolCall && + choice.delta.tool_calls?.[0].id !== currentToolCall.id + ) { + shouldEmitToolCall = { + ...currentToolCall, + input: JSON.parse(currentToolCall.input), + }; + } + if (choice.delta.tool_calls?.[0]!.id) { + currentToolCall = { + name: choice.delta.tool_calls[0].function!.name!, + id: choice.delta.tool_calls[0].id, + input: choice.delta.tool_calls[0].function!.arguments!, + }; + toolCallMap.set(choice.delta.tool_calls[0].id, currentToolCall); + } else { + if (choice.delta.tool_calls?.[0]!.function?.arguments) { + currentToolCall!.input += + choice.delta.tool_calls[0].function.arguments; + } + } + + const isDone: boolean = choice.finish_reason !== null; + + if (isDone && currentToolCall) { + // for the last one, we need to emit the tool call + shouldEmitToolCall = { + ...currentToolCall, + input: JSON.parse(currentToolCall.input), + }; + } + + yield { + raw: part, + options: shouldEmitToolCall + ? { toolCall: [shouldEmitToolCall] } + : currentToolCall + ? { + toolCall: [currentToolCall], + } + : {}, + delta: choice.delta.content ?? "", + }; + } + toolCallMap.clear(); + return; + } + + static toTool(tool: BaseTool): ChatCompletionTool { + return { + type: "function", + function: tool.metadata.parameters + ? { + name: tool.metadata.name, + description: tool.metadata.description, + parameters: tool.metadata.parameters, + } + : { + name: tool.metadata.name, + description: tool.metadata.description, + }, + }; + } +} diff --git a/packages/llm/openai/tsconfig.json b/packages/llm/openai/tsconfig.json new file mode 100644 index 0000000000000000000000000000000000000000..5a94aa033116766429d5c2d42f337c5b1393ba86 --- /dev/null +++ b/packages/llm/openai/tsconfig.json @@ -0,0 +1,18 @@ +{ + "extends": "../../../tsconfig.json", + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "bundler", + "outDir": "./lib" + }, + "include": ["./src"], + "references": [ + { + "path": "../../llamaindex/tsconfig.json" + }, + { + "path": "../../env/tsconfig.json" + } + ] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c7ed08d7392b6c652f19cfdfa4996fb516f08a10..8186577fe5ea7585961ecaf302f3701a95746da2 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -516,6 +516,9 @@ importers: '@llamaindex/env': specifier: workspace:* version: link:../env + '@llamaindex/openai': + specifier: workspace:* + version: link:../llm/openai '@mistralai/mistralai': specifier: ^1.0.4 version: 1.0.4(zod@3.23.8) @@ -652,9 +655,6 @@ importers: '@faker-js/faker': specifier: ^8.4.1 version: 8.4.1 - '@llamaindex/core': - specifier: workspace:* - version: link:../../core '@types/node': specifier: ^22.5.1 version: 22.5.1 @@ -853,6 +853,25 @@ importers: specifier: ^2.0.5 version: 2.0.5(@types/node@22.5.1)(terser@5.31.6) + packages/llm/openai: + dependencies: + '@llamaindex/core': + specifier: workspace:* + version: link:../../core + '@llamaindex/env': + specifier: workspace:* + version: link:../../env + openai: + specifier: ^4.60.0 + version: 4.60.0(encoding@0.1.13)(zod@3.23.8) + remeda: + specifier: ^2.12.0 + version: 2.12.0 + devDependencies: + bunchee: + specifier: 5.3.2 + version: 5.3.2(typescript@5.5.4) + packages/wasm-tools: dependencies: '@assemblyscript/loader': @@ -9472,6 +9491,9 @@ packages: remark-stringify@11.0.0: resolution: {integrity: sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw==} + remeda@2.12.0: + resolution: {integrity: sha512-VAlyhh1os8boCA9/7yN9sXzo0tfCeOwScGXztwBspS0DXQmbIN8xTBfEABvbAW8rMJMPzqxQ1UymHquuESh/pg==} + renderkid@3.0.0: resolution: {integrity: sha512-q/7VIQA8lmM1hF+jn+sFSPWGlMkSAeNYcPLmDQx2zzuiDfaLrOmumR8iaUKlenFgh0XRPIUeSPlH3A+AW3Z5pg==} @@ -10456,6 +10478,10 @@ packages: resolution: {integrity: sha512-RAH822pAdBgcNMAfWnCBU3CFZcfZ/i1eZjwFU/dsLKumyuuP3niueg2UAukXYF0E2AAoc82ZSSf9J0WQBinzHA==} engines: {node: '>=12.20'} + type-fest@4.26.1: + resolution: {integrity: sha512-yOGpmOAL7CkKe/91I5O3gPICmJNLJ1G4zFYVAsRHg7M64biSnPtRj0WNQt++bRkjYOqjWXrhnUw1utzmVErAdg==} + engines: {node: '>=16'} + type-is@1.6.18: resolution: {integrity: sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==} engines: {node: '>= 0.6'} @@ -14763,7 +14789,7 @@ snapshots: '@smithy/is-array-buffer@2.2.0': dependencies: - tslib: 2.6.3 + tslib: 2.7.0 '@smithy/is-array-buffer@3.0.0': dependencies: @@ -22166,6 +22192,10 @@ snapshots: mdast-util-to-markdown: 2.1.0 unified: 11.0.5 + remeda@2.12.0: + dependencies: + type-fest: 4.26.1 + renderkid@3.0.0: dependencies: css-select: 4.3.0 @@ -23263,6 +23293,8 @@ snapshots: type-fest@2.19.0: {} + type-fest@4.26.1: {} + type-is@1.6.18: dependencies: media-typer: 0.3.0 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 79bfed33e0bbc99608bb4c778da032e07c8b5b4d..78e2bf350152e5bf4ec6356c5c525bd84fd26f79 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,6 +1,7 @@ packages: - "apps/*" - "packages/*" + - "packages/llm/*" - "packages/core/tests" - "packages/llamaindex/tests" - "packages/llamaindex/e2e" diff --git a/tsconfig.json b/tsconfig.json index 3a11d68c083c49212e2dd620109e5ac9f4e8b0a6..11288b4933d01f94ba7bc12cf5766135c0079814 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -26,6 +26,9 @@ { "path": "./packages/community/tsconfig.json" }, + { + "path": "./packages/llm/openai/tsconfig.json" + }, { "path": "./packages/cloud/tsconfig.json" },