From 866149193a3a4109c1a0b00fe89e92eaec16016a Mon Sep 17 00:00:00 2001 From: Marcus Schiesser <mail@marcusschiesser.de> Date: Thu, 4 Apr 2024 06:04:35 +0800 Subject: [PATCH] fix: use LLM's context window to specify agent's token limit (#689) --- examples/agent/wiki.ts | 25 ++++++++++++++++++++ packages/core/src/agent/openai/base.ts | 1 + packages/core/src/agent/openai/worker.ts | 4 +++- packages/core/src/agent/react/worker.ts | 4 +++- packages/core/src/agent/runner/base.ts | 1 + packages/core/src/memory/ChatMemoryBuffer.ts | 17 ++++++++++--- 6 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 examples/agent/wiki.ts diff --git a/examples/agent/wiki.ts b/examples/agent/wiki.ts new file mode 100644 index 000000000..739f834cb --- /dev/null +++ b/examples/agent/wiki.ts @@ -0,0 +1,25 @@ +import { OpenAI, OpenAIAgent, WikipediaTool } from "llamaindex"; + +async function main() { + const llm = new OpenAI({ model: "gpt-4-turbo-preview" }); + const wikiTool = new WikipediaTool(); + + // Create an OpenAIAgent with the Wikipedia tool + const agent = new OpenAIAgent({ + llm, + tools: [wikiTool], + verbose: true, + }); + + // Chat with the agent + const response = await agent.chat({ + message: "Who was Goethe?", + }); + + console.log(response.response); +} + +(async function () { + await main(); + console.log("Done"); +})(); diff --git a/packages/core/src/agent/openai/base.ts b/packages/core/src/agent/openai/base.ts index cfade4f45..dee13e94f 100644 --- a/packages/core/src/agent/openai/base.ts +++ b/packages/core/src/agent/openai/base.ts @@ -64,6 +64,7 @@ export class OpenAIAgent extends AgentRunner { super({ agentWorker: stepEngine, + llm, memory, defaultToolChoice, chatHistory: prefixMessages, diff --git a/packages/core/src/agent/openai/worker.ts b/packages/core/src/agent/openai/worker.ts index 6bb9946c5..b4f80407b 100644 --- a/packages/core/src/agent/openai/worker.ts +++ b/packages/core/src/agent/openai/worker.ts @@ -286,7 +286,9 @@ export class OpenAIAgentWorker implements AgentWorker { initializeStep(task: Task, kwargs?: any): TaskStep { const sources: ToolOutput[] = []; - const newMemory = new ChatMemoryBuffer(); + const newMemory = new ChatMemoryBuffer({ + tokenLimit: task.memory.tokenLimit, + }); const taskState = { sources, diff --git a/packages/core/src/agent/react/worker.ts b/packages/core/src/agent/react/worker.ts index d4d525ca7..36c9ec687 100644 --- a/packages/core/src/agent/react/worker.ts +++ b/packages/core/src/agent/react/worker.ts @@ -106,7 +106,9 @@ export class ReActAgentWorker implements AgentWorker { initializeStep(task: Task, kwargs?: any): TaskStep { const sources: ToolOutput[] = []; const currentReasoning: BaseReasoningStep[] = []; - const newMemory = new ChatMemoryBuffer(); + const newMemory = new ChatMemoryBuffer({ + tokenLimit: task.memory.tokenLimit, + }); const taskState = { sources, diff --git a/packages/core/src/agent/runner/base.ts b/packages/core/src/agent/runner/base.ts index 82d32cbf0..47e5469b2 100644 --- a/packages/core/src/agent/runner/base.ts +++ b/packages/core/src/agent/runner/base.ts @@ -58,6 +58,7 @@ export class AgentRunner extends BaseAgentRunner { this.memory = params.memory ?? new ChatMemoryBuffer({ + llm: params.llm, chatHistory: params.chatHistory, }); this.initTaskStateKwargs = params.initTaskStateKwargs ?? {}; diff --git a/packages/core/src/memory/ChatMemoryBuffer.ts b/packages/core/src/memory/ChatMemoryBuffer.ts index 43c216116..fbd8708a8 100644 --- a/packages/core/src/memory/ChatMemoryBuffer.ts +++ b/packages/core/src/memory/ChatMemoryBuffer.ts @@ -1,13 +1,17 @@ -import type { ChatMessage } from "../llm/index.js"; +import type { ChatMessage, LLM } from "../llm/index.js"; import { SimpleChatStore } from "../storage/chatStore/SimpleChatStore.js"; import type { BaseChatStore } from "../storage/chatStore/types.js"; import type { BaseMemory } from "./types.js"; +const DEFAULT_TOKEN_LIMIT_RATIO = 0.75; +const DEFAULT_TOKEN_LIMIT = 3000; + type ChatMemoryBufferParams = { tokenLimit?: number; chatStore?: BaseChatStore; chatStoreKey?: string; chatHistory?: ChatMessage[]; + llm?: LLM; }; /** @@ -23,9 +27,16 @@ export class ChatMemoryBuffer implements BaseMemory { * Initialize. */ constructor(init?: Partial<ChatMemoryBufferParams>) { - this.tokenLimit = init?.tokenLimit ?? 3000; this.chatStore = init?.chatStore ?? new SimpleChatStore(); this.chatStoreKey = init?.chatStoreKey ?? "chat_history"; + if (init?.llm) { + const contextWindow = init.llm.metadata.contextWindow; + this.tokenLimit = + init?.tokenLimit ?? + Math.ceil(contextWindow * DEFAULT_TOKEN_LIMIT_RATIO); + } else { + this.tokenLimit = init?.tokenLimit ?? DEFAULT_TOKEN_LIMIT; + } if (init?.chatHistory) { this.chatStore.setMessages(this.chatStoreKey, init.chatHistory); @@ -49,7 +60,7 @@ export class ChatMemoryBuffer implements BaseMemory { while (tokenCount > this.tokenLimit && messageCount > 1) { messageCount -= 1; - if (chatHistory[-messageCount].role === "assistant") { + if (chatHistory.at(-messageCount)?.role === "assistant") { // we cannot have an assistant message at the start of the chat history // if after removal of the first, we have an assistant message, // we need to remove the assistant message too -- GitLab