diff --git a/.changeset/four-beers-kick.md b/.changeset/four-beers-kick.md index 4e591e3c3512d5863249b94531dde56a557bb089..97250234b051ba76e629019fe72bcb2bc3f1f6a4 100644 --- a/.changeset/four-beers-kick.md +++ b/.changeset/four-beers-kick.md @@ -1,12 +1,9 @@ --- -"@llamaindex/core": minor -"llamaindex": minor +"@llamaindex/core": patch +"llamaindex": patch --- refactor: move chat engine & retriever into core. -This is a breaking change since `stream` option has moved to second parameter. - -- `chat` API in BaseChatEngine has changed, Move `stream` option into second parameter. - `chatHistory` in BaseChatEngine now returns `ChatMessage[] | Promise<ChatMessage[]>`, instead of `BaseMemory` - update `retrieve-end` type diff --git a/examples/agent/azure_dynamic_session.ts b/examples/agent/azure_dynamic_session.ts index 72dbb77e639c96a696998e22d5ed0618eabf056b..31d5375c7e223d53c85d529ceb8095a50938779a 100644 --- a/examples/agent/azure_dynamic_session.ts +++ b/examples/agent/azure_dynamic_session.ts @@ -42,6 +42,7 @@ async function main() { const response = await agent.chat({ message: "plot a chart of 5 random numbers and save it to /mnt/data/chart.png", + stream: false, }); // Print the response diff --git a/examples/agent/stream_openai_agent.ts b/examples/agent/stream_openai_agent.ts index 19dfcca177cf2339f9111d2e9e9a68b74e73f34c..4d8d6e8fcbd4cf3bc52f6923b630e2b17cdece09 100644 --- a/examples/agent/stream_openai_agent.ts +++ b/examples/agent/stream_openai_agent.ts @@ -61,12 +61,10 @@ async function main() { tools: [functionTool, functionTool2], }); - const stream = await agent.chat( - { - message: "Divide 16 by 2 then add 20", - }, - true, - ); + const stream = await agent.chat({ + message: "Divide 16 by 2 then add 20", + stream: true, + }); console.log("Response:"); diff --git a/examples/agent/wiki.ts b/examples/agent/wiki.ts index 1ec98652b15bc748b6465f6d9b0ed7d22fb0f6bc..e4100e9909a97f0b79c5e4d73eef8b0d77379f87 100644 --- a/examples/agent/wiki.ts +++ b/examples/agent/wiki.ts @@ -11,12 +11,10 @@ async function main() { }); // Chat with the agent - const response = await agent.chat( - { - message: "Who was Goethe?", - }, - true, - ); + const response = await agent.chat({ + message: "Who was Goethe?", + stream: true, + }); for await (const { delta } of response) { process.stdout.write(delta); diff --git a/examples/anthropic/chat_interactive.ts b/examples/anthropic/chat_interactive.ts index 9579cd7a4aa2137626fb1f7227d30c743a90ca60..3f33b268fb7f46ea971273ffa6ba9e392461809b 100644 --- a/examples/anthropic/chat_interactive.ts +++ b/examples/anthropic/chat_interactive.ts @@ -25,7 +25,7 @@ import readline from "node:readline/promises"; while (true) { const query = await rl.question("User: "); process.stdout.write("Assistant: "); - const stream = await chatEngine.chat({ message: query }, true); + const stream = await chatEngine.chat({ message: query, stream: true }); for await (const chunk of stream) { process.stdout.write(chunk.response); } diff --git a/examples/chatEngine.ts b/examples/chatEngine.ts index da24e225156e5974150d61b5bc8ffa5b4ce7ce47..addd025bb83cc23c870b99752b3c1d85711b8108 100644 --- a/examples/chatEngine.ts +++ b/examples/chatEngine.ts @@ -24,7 +24,7 @@ async function main() { while (true) { const query = await rl.question("Query: "); - const stream = await chatEngine.chat({ message: query }, true); + const stream = await chatEngine.chat({ message: query, stream: true }); console.log(); for await (const chunk of stream) { process.stdout.write(chunk.response); diff --git a/examples/chatHistory.ts b/examples/chatHistory.ts index 14e12a31b02766e9a8d94121102b5cf42927162a..c55c618d69ac5eb64ee20812ef11bb10e67bc68f 100644 --- a/examples/chatHistory.ts +++ b/examples/chatHistory.ts @@ -24,13 +24,11 @@ async function main() { while (true) { const query = await rl.question("Query: "); - const stream = await chatEngine.chat( - { - message: query, - chatHistory, - }, - true, - ); + const stream = await chatEngine.chat({ + message: query, + chatHistory, + stream: true, + }); if (chatHistory.getLastSummary()) { // Print the summary of the conversation so far that is produced by the SummaryChatHistory console.log(`Summary: ${chatHistory.getLastSummary()?.content}`); diff --git a/examples/cloud/chat.ts b/examples/cloud/chat.ts index c39224ae08bc8feffdd24cbc9e084c1f8a83735f..d6fdce572140601cb757dc577c799f2d00dabf43 100644 --- a/examples/cloud/chat.ts +++ b/examples/cloud/chat.ts @@ -18,7 +18,7 @@ async function main() { while (true) { const query = await rl.question("User: "); - const stream = await chatEngine.chat({ message: query }, true); + const stream = await chatEngine.chat({ message: query, stream: true }); for await (const chunk of stream) { process.stdout.write(chunk.response); } diff --git a/examples/huggingface/embedding.ts b/examples/huggingface/embedding.ts index 01aca316a98ba26c6a2f4e173659db395195486c..8297b75366a47944bb56de0550e8df14b91694b0 100644 --- a/examples/huggingface/embedding.ts +++ b/examples/huggingface/embedding.ts @@ -27,12 +27,10 @@ async function main() { // Query the index const queryEngine = index.asQueryEngine(); - const stream = await queryEngine.query( - { - query: "What did the author do in college?", - }, - true, - ); + const stream = await queryEngine.query({ + query: "What did the author do in college?", + stream: true, + }); // Output response for await (const chunk of stream) { diff --git a/examples/huggingface/embeddingApi.ts b/examples/huggingface/embeddingApi.ts index a0bc861cb743653a6340472f442cc5bf2371fcfb..a89df27036ad8d2c05092b5e3f65c531f03703f5 100644 --- a/examples/huggingface/embeddingApi.ts +++ b/examples/huggingface/embeddingApi.ts @@ -37,12 +37,10 @@ async function main() { // Query the index const queryEngine = index.asQueryEngine(); - const stream = await queryEngine.query( - { - query: "What did the author do in college?", - }, - true, - ); + const stream = await queryEngine.query({ + query: "What did the author do in college?", + stream: true, + }); // Output response for await (const chunk of stream) { diff --git a/examples/multimodal/rag.ts b/examples/multimodal/rag.ts index bbe8a6296768aae7e22af47ba17011e84686c7c9..14d3a1c7488e8365edfe5d746e81de2d920156d9 100644 --- a/examples/multimodal/rag.ts +++ b/examples/multimodal/rag.ts @@ -32,12 +32,10 @@ async function main() { responseSynthesizer: getResponseSynthesizer("multi_modal"), retriever: index.asRetriever({ topK: { TEXT: 3, IMAGE: 1 } }), }); - const stream = await queryEngine.query( - { - query: "Tell me more about Vincent van Gogh's famous paintings", - }, - true, - ); + const stream = await queryEngine.query({ + query: "Tell me more about Vincent van Gogh's famous paintings", + stream: true, + }); for await (const chunk of stream) { process.stdout.write(chunk.response); } diff --git a/packages/autotool/examples/02_nextjs/actions.ts b/packages/autotool/examples/02_nextjs/actions.ts index b6738a2300b917ab2a491d5e6110c815c421ce6f..3c5d12a48f49f7363987b2f3d13c6a4fc4e86930 100644 --- a/packages/autotool/examples/02_nextjs/actions.ts +++ b/packages/autotool/examples/02_nextjs/actions.ts @@ -14,12 +14,10 @@ export async function chatWithAI(message: string): Promise<ReactNode> { const uiStream = createStreamableUI(); runWithStreamableUI(uiStream, () => agent - .chat( - { - message, - }, - true, - ) + .chat({ + stream: true, + message, + }) .then(async (responseStream) => { return responseStream.pipeTo( new WritableStream({ diff --git a/packages/core/src/chat-engine/index.ts b/packages/core/src/chat-engine/index.ts index 07d7b1223eb6ee53bd0659eac9e79d8efd3e48d2..b4bd4cf3b1a4cba3ddccd7f33417222bcce15d7b 100644 --- a/packages/core/src/chat-engine/index.ts +++ b/packages/core/src/chat-engine/index.ts @@ -2,7 +2,7 @@ import type { ChatMessage, MessageContent } from "../llms"; import type { BaseMemory } from "../memory"; import { EngineResponse } from "../schema"; -export interface ChatEngineParams< +export interface BaseChatEngineParams< AdditionalMessageOptions extends object = object, > { message: MessageContent; @@ -14,14 +14,22 @@ export interface ChatEngineParams< | BaseMemory<AdditionalMessageOptions>; } +export interface StreamingChatEngineParams< + AdditionalMessageOptions extends object = object, +> extends BaseChatEngineParams<AdditionalMessageOptions> { + stream: true; +} + +export interface NonStreamingChatEngineParams< + AdditionalMessageOptions extends object = object, +> extends BaseChatEngineParams<AdditionalMessageOptions> { + stream?: false; +} + export abstract class BaseChatEngine { + abstract chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>; abstract chat( - params: ChatEngineParams, - stream?: false, - ): Promise<EngineResponse>; - abstract chat( - params: ChatEngineParams, - stream: true, + params: StreamingChatEngineParams, ): Promise<AsyncIterable<EngineResponse>>; abstract chatHistory: ChatMessage[] | Promise<ChatMessage[]>; diff --git a/packages/core/src/query-engine/base.ts b/packages/core/src/query-engine/base.ts index db079b28429ecbe664fbaded13d0fb3c3c6c4e3e..464c4ff2b2138db8ddb8ce70d6530452a726bf75 100644 --- a/packages/core/src/query-engine/base.ts +++ b/packages/core/src/query-engine/base.ts @@ -18,6 +18,18 @@ export type QueryBundle = { export type QueryType = string | QueryBundle; +export type BaseQueryParams = { + query: QueryType; +}; + +export interface StreamingQueryParams extends BaseQueryParams { + stream: true; +} + +export interface NonStreamingQueryParams extends BaseQueryParams { + stream?: false; +} + export type QueryFn = ( strOrQueryBundle: QueryType, stream?: boolean, @@ -34,23 +46,20 @@ export abstract class BaseQueryEngine extends PromptMixin { ); } - query( - strOrQueryBundle: QueryType, - stream: true, - ): Promise<AsyncIterable<EngineResponse>>; - query(strOrQueryBundle: QueryType, stream?: false): Promise<EngineResponse>; + query(params: StreamingQueryParams): Promise<AsyncIterable<EngineResponse>>; + query(params: NonStreamingQueryParams): Promise<EngineResponse>; @wrapEventCaller async query( - strOrQueryBundle: QueryType, - stream = false, + params: StreamingQueryParams | NonStreamingQueryParams, ): Promise<EngineResponse | AsyncIterable<EngineResponse>> { + const { stream, query } = params; const id = randomUUID(); const callbackManager = Settings.callbackManager; callbackManager.dispatchEvent("query-start", { id, - query: strOrQueryBundle, + query, }); - const response = await this._query(strOrQueryBundle, stream); + const response = await this._query(query, stream); callbackManager.dispatchEvent("query-end", { id, response, diff --git a/packages/llamaindex/e2e/examples/cloudflare-worker-agent/src/index.ts b/packages/llamaindex/e2e/examples/cloudflare-worker-agent/src/index.ts index ce80b00b2b830c84ff0bcd96b7aec11e9cd7a88b..8a283cc6bb1d734470426ba84d9c39cdc79c618d 100644 --- a/packages/llamaindex/e2e/examples/cloudflare-worker-agent/src/index.ts +++ b/packages/llamaindex/e2e/examples/cloudflare-worker-agent/src/index.ts @@ -11,12 +11,10 @@ export default { tools: [], }); console.log(1); - const responseStream = await agent.chat( - { - message: "Hello? What is the weather today?", - }, - true, - ); + const responseStream = await agent.chat({ + stream: true, + message: "Hello? What is the weather today?", + }); console.log(2); const textEncoder = new TextEncoder(); const response = responseStream.pipeThrough<Uint8Array>( diff --git a/packages/llamaindex/e2e/examples/nextjs-agent/src/actions/index.tsx b/packages/llamaindex/e2e/examples/nextjs-agent/src/actions/index.tsx index f648a8d9d473cddbe0387795506509fb3d1e6c19..b71e52a928f4241ee28280fa63bce6e9a612c7d1 100644 --- a/packages/llamaindex/e2e/examples/nextjs-agent/src/actions/index.tsx +++ b/packages/llamaindex/e2e/examples/nextjs-agent/src/actions/index.tsx @@ -10,13 +10,11 @@ export async function chatWithAgent( const agent = new OpenAIAgent({ tools: [], }); - const responseStream = await agent.chat( - { - message: question, - chatHistory: prevMessages, - }, - true, - ); + const responseStream = await agent.chat({ + stream: true, + message: question, + chatHistory: prevMessages, + }); const uiStream = createStreamableUI(<div>loading...</div>); responseStream .pipeTo( diff --git a/packages/llamaindex/e2e/node/openai.e2e.ts b/packages/llamaindex/e2e/node/openai.e2e.ts index eb9918eef5bc946b60a45d8aaff263754ca411e5..4019390854c1059306639e61b99a542a695695c1 100644 --- a/packages/llamaindex/e2e/node/openai.e2e.ts +++ b/packages/llamaindex/e2e/node/openai.e2e.ts @@ -322,12 +322,10 @@ await test("agent stream", async (t) => { tools: [sumNumbersTool, divideNumbersTool], }); - const stream = await agent.chat( - { - message: "Divide 16 by 2 then add 20", - }, - true, - ); + const stream = await agent.chat({ + message: "Divide 16 by 2 then add 20", + stream: true, + }); let message = ""; diff --git a/packages/llamaindex/e2e/node/react.e2e.ts b/packages/llamaindex/e2e/node/react.e2e.ts index 58170a57f4ecdc03069c7df9fd05bf101dea3155..c0bb8c46c9b399b1bf32749fefcda4a65a9fb24a 100644 --- a/packages/llamaindex/e2e/node/react.e2e.ts +++ b/packages/llamaindex/e2e/node/react.e2e.ts @@ -20,6 +20,7 @@ await test("react agent", async (t) => { tools: [getWeatherTool], }); const response = await agent.chat({ + stream: false, message: "What is the weather like in San Francisco?", }); @@ -34,12 +35,10 @@ await test("react agent stream", async (t) => { tools: [getWeatherTool], }); - const stream = await agent.chat( - { - message: "What is the weather like in San Francisco?", - }, - true, - ); + const stream = await agent.chat({ + stream: true, + message: "What is the weather like in San Francisco?", + }); let content = ""; for await (const response of stream) { diff --git a/packages/llamaindex/src/agent/anthropic.ts b/packages/llamaindex/src/agent/anthropic.ts index e8c827b685962977aca70164c1bd34995b885288..45d7c5ce31e754ad0ea220f09eb345e3d96a7a34 100644 --- a/packages/llamaindex/src/agent/anthropic.ts +++ b/packages/llamaindex/src/agent/anthropic.ts @@ -1,4 +1,7 @@ -import type { ChatEngineParams } from "@llamaindex/core/chat-engine"; +import type { + NonStreamingChatEngineParams, + StreamingChatEngineParams, +} from "@llamaindex/core/chat-engine"; import type { EngineResponse } from "@llamaindex/core/schema"; import { Settings } from "../Settings.js"; import { Anthropic } from "../llm/anthropic.js"; @@ -21,9 +24,12 @@ export class AnthropicAgent extends LLMAgent { }); } - async chat(params: ChatEngineParams, stream?: false): Promise<EngineResponse>; - async chat(params: ChatEngineParams, stream: true): Promise<never>; - override async chat(params: ChatEngineParams, stream?: boolean) { + async chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>; + async chat(params: StreamingChatEngineParams): Promise<never>; + override async chat( + params: NonStreamingChatEngineParams | StreamingChatEngineParams, + ) { + const { stream } = params; if (stream) { // Anthropic does support this, but looks like it's not supported in the LITS LLM throw new Error("Anthropic does not support streaming"); diff --git a/packages/llamaindex/src/agent/base.ts b/packages/llamaindex/src/agent/base.ts index 8141ceac42178414d2472d9334d02172a836664a..2cc2233ca63e897c68c98a54299897a1d7b89854 100644 --- a/packages/llamaindex/src/agent/base.ts +++ b/packages/llamaindex/src/agent/base.ts @@ -1,6 +1,7 @@ import { BaseChatEngine, - type ChatEngineParams, + type NonStreamingChatEngineParams, + type StreamingChatEngineParams, } from "@llamaindex/core/chat-engine"; import type { BaseToolWithCall, @@ -344,15 +345,13 @@ export abstract class AgentRunner< }); } - async chat(params: ChatEngineParams, stream?: false): Promise<EngineResponse>; + async chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>; async chat( - params: ChatEngineParams, - stream: true, + params: StreamingChatEngineParams, ): Promise<ReadableStream<EngineResponse>>; @wrapEventCaller async chat( - params: ChatEngineParams, - stream?: boolean, + params: NonStreamingChatEngineParams | StreamingChatEngineParams, ): Promise<EngineResponse | ReadableStream<EngineResponse>> { let chatHistory: ChatMessage<AdditionalMessageOptions>[] = []; @@ -364,7 +363,12 @@ export abstract class AgentRunner< params.chatHistory as ChatMessage<AdditionalMessageOptions>[]; } - const task = this.createTask(params.message, !!stream, false, chatHistory); + const task = this.createTask( + params.message, + !!params.stream, + false, + chatHistory, + ); for await (const stepOutput of task) { // update chat history for each round this.#chatHistory = [...stepOutput.taskStep.context.store.messages]; diff --git a/packages/llamaindex/src/engines/chat/CondenseQuestionChatEngine.ts b/packages/llamaindex/src/engines/chat/CondenseQuestionChatEngine.ts index 4ebff2f40e16d3160ce7818ad0ade71ef0772baf..b63ca774a341f9d400e6c5869fdb5733b1ff46d7 100644 --- a/packages/llamaindex/src/engines/chat/CondenseQuestionChatEngine.ts +++ b/packages/llamaindex/src/engines/chat/CondenseQuestionChatEngine.ts @@ -1,6 +1,7 @@ import { BaseChatEngine, - type ChatEngineParams, + type NonStreamingChatEngineParams, + type StreamingChatEngineParams, } from "@llamaindex/core/chat-engine"; import type { ChatMessage, LLM } from "@llamaindex/core/llms"; import { BaseMemory, ChatMemoryBuffer } from "@llamaindex/core/memory"; @@ -86,17 +87,15 @@ export class CondenseQuestionChatEngine extends BaseChatEngine { }); } - chat(params: ChatEngineParams, stream?: false): Promise<EngineResponse>; + chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>; chat( - params: ChatEngineParams, - stream: true, + params: StreamingChatEngineParams, ): Promise<AsyncIterable<EngineResponse>>; @wrapEventCaller async chat( - params: ChatEngineParams, - stream = false, + params: NonStreamingChatEngineParams | StreamingChatEngineParams, ): Promise<EngineResponse | AsyncIterable<EngineResponse>> { - const { message } = params; + const { message, stream } = params; const chatHistory = params.chatHistory ? new ChatMemoryBuffer({ chatHistory: @@ -112,12 +111,10 @@ export class CondenseQuestionChatEngine extends BaseChatEngine { chatHistory.put({ content: message, role: "user" }); if (stream) { - const stream = await this.queryEngine.query( - { - query: condensedQuestion, - }, - true, - ); + const stream = await this.queryEngine.query({ + query: condensedQuestion, + stream: true, + }); return streamReducer({ stream, initialValue: "", diff --git a/packages/llamaindex/src/engines/chat/ContextChatEngine.ts b/packages/llamaindex/src/engines/chat/ContextChatEngine.ts index 26baddf9c9b6ae26633e7a92bdce90a3d252784a..81e6f419f936e7184ec92582c00e0620c25b6192 100644 --- a/packages/llamaindex/src/engines/chat/ContextChatEngine.ts +++ b/packages/llamaindex/src/engines/chat/ContextChatEngine.ts @@ -1,6 +1,7 @@ import type { BaseChatEngine, - ChatEngineParams, + NonStreamingChatEngineParams, + StreamingChatEngineParams, } from "@llamaindex/core/chat-engine"; import type { ChatMessage, @@ -82,17 +83,15 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine { }; } - chat(params: ChatEngineParams, stream?: false): Promise<EngineResponse>; + chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>; chat( - params: ChatEngineParams, - stream: true, + params: StreamingChatEngineParams, ): Promise<AsyncIterable<EngineResponse>>; @wrapEventCaller async chat( - params: ChatEngineParams, - stream = false, + params: StreamingChatEngineParams | NonStreamingChatEngineParams, ): Promise<EngineResponse | AsyncIterable<EngineResponse>> { - const { message } = params; + const { message, stream } = params; const chatHistory = params.chatHistory ? new ChatMemoryBuffer({ chatHistory: diff --git a/packages/llamaindex/src/engines/chat/SimpleChatEngine.ts b/packages/llamaindex/src/engines/chat/SimpleChatEngine.ts index d3dc9f5d0166041cbd2a6cb5e20691411c82ccad..dee8b517070a384739c69c940b7005d86a8db030 100644 --- a/packages/llamaindex/src/engines/chat/SimpleChatEngine.ts +++ b/packages/llamaindex/src/engines/chat/SimpleChatEngine.ts @@ -1,6 +1,7 @@ import type { BaseChatEngine, - ChatEngineParams, + NonStreamingChatEngineParams, + StreamingChatEngineParams, } from "@llamaindex/core/chat-engine"; import type { LLM } from "@llamaindex/core/llms"; import { BaseMemory, ChatMemoryBuffer } from "@llamaindex/core/memory"; @@ -29,17 +30,15 @@ export class SimpleChatEngine implements BaseChatEngine { this.llm = init?.llm ?? Settings.llm; } - chat(params: ChatEngineParams, stream?: false): Promise<EngineResponse>; + chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>; chat( - params: ChatEngineParams, - stream: true, + params: StreamingChatEngineParams, ): Promise<AsyncIterable<EngineResponse>>; @wrapEventCaller async chat( - params: ChatEngineParams, - stream = false, + params: NonStreamingChatEngineParams | StreamingChatEngineParams, ): Promise<EngineResponse | AsyncIterable<EngineResponse>> { - const { message } = params; + const { message, stream } = params; const chatHistory = params.chatHistory ? new ChatMemoryBuffer({ diff --git a/packages/llamaindex/src/engines/query/RouterQueryEngine.ts b/packages/llamaindex/src/engines/query/RouterQueryEngine.ts index 8d4f6058daddfce0352871d726f4e2a16a18699a..7f0a614f050a6c075a18ad952e9f9de8b7fce272 100644 --- a/packages/llamaindex/src/engines/query/RouterQueryEngine.ts +++ b/packages/llamaindex/src/engines/query/RouterQueryEngine.ts @@ -136,7 +136,9 @@ export class RouterQueryEngine extends BaseQueryEngine { } const selectedQueryEngine = this.queryEngines[engineInd.index]!; - responses.push(await selectedQueryEngine.query(query)); + responses.push( + await selectedQueryEngine.query({ query, stream: false }), + ); } if (responses.length > 1) { diff --git a/packages/llamaindex/src/evaluation/Faithfulness.ts b/packages/llamaindex/src/evaluation/Faithfulness.ts index b1e68f1f4ce6109476d955d4ded33233daff7f96..590e2e63d089af66320fd30425301af8f1e5701c 100644 --- a/packages/llamaindex/src/evaluation/Faithfulness.ts +++ b/packages/llamaindex/src/evaluation/Faithfulness.ts @@ -103,7 +103,8 @@ export class FaithfulnessEvaluator }); const responseObj = await queryEngine.query({ - query: response, + query: { query: response }, + stream: false, }); const rawResponseTxt = responseObj.toString();