From a7b0ac3cb789c40d4c9f7738e1898ec207c7f573 Mon Sep 17 00:00:00 2001 From: Alex Yang <himself65@outlook.com> Date: Thu, 14 Nov 2024 17:35:21 -0800 Subject: [PATCH] feat(anthropic): support prompt caching (#1488) --- .changeset/honest-moose-roll-2.md | 5 ++ .changeset/honest-moose-roll.md | 5 ++ examples/anthropic/prompt-caching.ts | 38 +++++++++ packages/core/src/llms/base.ts | 4 +- packages/providers/anthropic/package.json | 2 +- packages/providers/anthropic/src/llm.ts | 99 +++++++++++++++++------ pnpm-lock.yaml | 19 ++++- 7 files changed, 145 insertions(+), 27 deletions(-) create mode 100644 .changeset/honest-moose-roll-2.md create mode 100644 .changeset/honest-moose-roll.md create mode 100644 examples/anthropic/prompt-caching.ts diff --git a/.changeset/honest-moose-roll-2.md b/.changeset/honest-moose-roll-2.md new file mode 100644 index 000000000..06fd6434e --- /dev/null +++ b/.changeset/honest-moose-roll-2.md @@ -0,0 +1,5 @@ +--- +"@llamaindex/anthropic": patch +--- + +feat(anthropic): support prompt caching diff --git a/.changeset/honest-moose-roll.md b/.changeset/honest-moose-roll.md new file mode 100644 index 000000000..01cf72c13 --- /dev/null +++ b/.changeset/honest-moose-roll.md @@ -0,0 +1,5 @@ +--- +"@llamaindex/core": patch +--- + +fix: update tool call llm type diff --git a/examples/anthropic/prompt-caching.ts b/examples/anthropic/prompt-caching.ts new file mode 100644 index 000000000..2f9182e9a --- /dev/null +++ b/examples/anthropic/prompt-caching.ts @@ -0,0 +1,38 @@ +import { Anthropic } from "llamaindex"; + +async function main() { + const anthropic = new Anthropic({ + model: "claude-3-5-sonnet-20241022", + }); + + const entireBook = await fetch( + "https://www.gutenberg.org/files/1342/1342-0.txt", + ).then((response) => response.text()); + + const response = await anthropic.chat({ + messages: [ + { + content: + "You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n", + role: "system", + }, + { + content: entireBook, + role: "system", + options: { + cache_control: { + type: "ephemeral", + }, + }, + }, + { + content: "analyze the major themes in Pride and Prejudice.", + role: "user", + }, + ], + }); + + console.log(response.message.content); +} + +main().catch(console.error); diff --git a/packages/core/src/llms/base.ts b/packages/core/src/llms/base.ts index 89b9ecd82..46306bfec 100644 --- a/packages/core/src/llms/base.ts +++ b/packages/core/src/llms/base.ts @@ -66,6 +66,8 @@ export abstract class BaseLLM< export abstract class ToolCallLLM< AdditionalChatOptions extends object = object, -> extends BaseLLM<AdditionalChatOptions, ToolCallLLMMessageOptions> { + AdditionalMessageOptions extends + ToolCallLLMMessageOptions = ToolCallLLMMessageOptions, +> extends BaseLLM<AdditionalChatOptions, AdditionalMessageOptions> { abstract supportToolCall: boolean; } diff --git a/packages/providers/anthropic/package.json b/packages/providers/anthropic/package.json index 1ae802efa..f64967fca 100644 --- a/packages/providers/anthropic/package.json +++ b/packages/providers/anthropic/package.json @@ -33,7 +33,7 @@ "bunchee": "5.6.1" }, "dependencies": { - "@anthropic-ai/sdk": "0.27.1", + "@anthropic-ai/sdk": "0.32.1", "@llamaindex/core": "workspace:*", "@llamaindex/env": "workspace:*", "remeda": "^2.12.0" diff --git a/packages/providers/anthropic/src/llm.ts b/packages/providers/anthropic/src/llm.ts index 485b28186..251f7b295 100644 --- a/packages/providers/anthropic/src/llm.ts +++ b/packages/providers/anthropic/src/llm.ts @@ -1,5 +1,9 @@ import type { ClientOptions } from "@anthropic-ai/sdk"; import { Anthropic as SDKAnthropic } from "@anthropic-ai/sdk"; +import type { + BetaCacheControlEphemeral, + BetaTextBlockParam, +} from "@anthropic-ai/sdk/resources/beta/index"; import type { TextBlock, TextBlockParam, @@ -8,6 +12,7 @@ import type { ImageBlockParam, MessageCreateParamsNonStreaming, MessageParam, + Model, Tool, ToolResultBlockParam, ToolUseBlock, @@ -75,6 +80,9 @@ export const ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS = { "claude-2.1": { contextWindow: 200000, }, + "claude-2.0": { + contextWindow: 100000, + }, "claude-instant-1.2": { contextWindow: 100000, }, @@ -82,18 +90,30 @@ export const ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS = { export const ALL_AVAILABLE_V3_MODELS = { "claude-3-opus": { contextWindow: 200000 }, + "claude-3-opus-latest": { contextWindow: 200000 }, + "claude-3-opus-20240229": { contextWindow: 200000 }, "claude-3-sonnet": { contextWindow: 200000 }, + "claude-3-sonnet-20240229": { contextWindow: 200000 }, "claude-3-haiku": { contextWindow: 200000 }, + "claude-3-haiku-20240307": { contextWindow: 200000 }, }; export const ALL_AVAILABLE_V3_5_MODELS = { "claude-3-5-sonnet": { contextWindow: 200000 }, + "claude-3-5-sonnet-20241022": { contextWindow: 200000 }, + "claude-3-5-sonnet-20240620": { contextWindow: 200000 }, + "claude-3-5-sonnet-latest": { contextWindow: 200000 }, + "claude-3-5-haiku": { contextWindow: 200000 }, + "claude-3-5-haiku-latest": { contextWindow: 200000 }, + "claude-3-5-haiku-20241022": { contextWindow: 200000 }, }; export const ALL_AVAILABLE_ANTHROPIC_MODELS = { ...ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS, ...ALL_AVAILABLE_V3_MODELS, ...ALL_AVAILABLE_V3_5_MODELS, +} satisfies { + [key in Model]: { contextWindow: number }; }; const AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE: { [key: string]: string } = { @@ -104,10 +124,16 @@ const AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE: { [key: string]: string } = { } as { [key in keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS]: string }; export type AnthropicAdditionalChatOptions = object; +export type AnthropicToolCallLLMMessageOptions = ToolCallLLMMessageOptions & { + cache_control?: BetaCacheControlEphemeral | null; +}; -export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> { +export class Anthropic extends ToolCallLLM< + AnthropicAdditionalChatOptions, + AnthropicToolCallLLMMessageOptions +> { // Per completion Anthropic params - model: keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS; + model: keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS | ({} & string); temperature: number; topP: number; maxTokens?: number | undefined; @@ -147,7 +173,12 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> { temperature: this.temperature, topP: this.topP, maxTokens: this.maxTokens, - contextWindow: ALL_AVAILABLE_ANTHROPIC_MODELS[this.model].contextWindow, + contextWindow: + this.model in ALL_AVAILABLE_ANTHROPIC_MODELS + ? ALL_AVAILABLE_ANTHROPIC_MODELS[ + this.model as keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS + ].contextWindow + : 200000, tokenizer: undefined, }; } @@ -291,56 +322,74 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> { chat( params: LLMChatParamsStreaming< AnthropicAdditionalChatOptions, - ToolCallLLMMessageOptions + AnthropicToolCallLLMMessageOptions >, - ): Promise<AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>>; + ): Promise< + AsyncIterable<ChatResponseChunk<AnthropicToolCallLLMMessageOptions>> + >; chat( params: LLMChatParamsNonStreaming< AnthropicAdditionalChatOptions, - ToolCallLLMMessageOptions + AnthropicToolCallLLMMessageOptions >, - ): Promise<ChatResponse<ToolCallLLMMessageOptions>>; + ): Promise<ChatResponse<AnthropicToolCallLLMMessageOptions>>; @wrapLLMEvent async chat( params: | LLMChatParamsNonStreaming< AnthropicAdditionalChatOptions, - ToolCallLLMMessageOptions + AnthropicToolCallLLMMessageOptions > | LLMChatParamsStreaming< AnthropicAdditionalChatOptions, - ToolCallLLMMessageOptions + AnthropicToolCallLLMMessageOptions >, ): Promise< - | ChatResponse<ToolCallLLMMessageOptions> - | AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> + | ChatResponse<AnthropicToolCallLLMMessageOptions> + | AsyncIterable<ChatResponseChunk<AnthropicToolCallLLMMessageOptions>> > { let { messages } = params; const { stream, tools } = params; - let systemPrompt: string | null = null; + let systemPrompt: string | Array<BetaTextBlockParam> | null = null; const systemMessages = messages.filter( (message) => message.role === "system", ); if (systemMessages.length > 0) { - systemPrompt = systemMessages - .map((message) => message.content) - .join("\n"); + systemPrompt = systemMessages.map((message) => + message.options && "cache_control" in message.options + ? { + type: "text", + text: extractText(message.content), + cache_control: message.options.cache_control, + } + : { + type: "text", + text: extractText(message.content), + }, + ); messages = messages.filter((message) => message.role !== "system"); } + const beta = + systemPrompt?.find((message) => "cache_control" in message) !== undefined; + + // case: Non-streaming + let anthropic = this.session.anthropic; + if (beta) { + // @ts-expect-error type casting + anthropic = anthropic.beta.promptCaching; + } // case: Streaming if (stream) { if (tools) { console.error("Tools are not supported in streaming mode"); } - return this.streamChat(messages, systemPrompt); + return this.streamChat(messages, systemPrompt, anthropic); } - // case: Non-streaming - const anthropic = this.session.anthropic; if (tools) { const params: MessageCreateParamsNonStreaming = { @@ -378,7 +427,10 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> { toolCall: toolUseBlock.map((block) => ({ id: block.id, name: block.name, - input: block.input, + input: + typeof block.input === "object" + ? JSON.stringify(block.input) + : `${block.input}`, })), } : {}, @@ -411,10 +463,11 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> { } protected async *streamChat( - messages: ChatMessage<ToolCallLLMMessageOptions>[], - systemPrompt?: string | null, - ): AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> { - const stream = await this.session.anthropic.messages.create({ + messages: ChatMessage<AnthropicToolCallLLMMessageOptions>[], + systemPrompt: string | Array<BetaTextBlockParam> | null, + anthropic: SDKAnthropic, + ): AsyncIterable<ChatResponseChunk<AnthropicToolCallLLMMessageOptions>> { + const stream = await anthropic.messages.create({ model: this.getModelName(this.model), messages: this.formatMessages(messages), max_tokens: this.maxTokens ?? 4096, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 870e492d6..69ff1fe88 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1191,8 +1191,8 @@ importers: packages/providers/anthropic: dependencies: '@anthropic-ai/sdk': - specifier: 0.27.1 - version: 0.27.1(encoding@0.1.13) + specifier: 0.32.1 + version: 0.32.1(encoding@0.1.13) '@llamaindex/core': specifier: workspace:* version: link:../../core @@ -1701,6 +1701,9 @@ packages: '@anthropic-ai/sdk@0.27.1': resolution: {integrity: sha512-AKFd/E8HO26+DOVPiZpEked3Pm2feA5d4gcX2FcJXr9veDkXbKO90hr2C7N2TL7mPIMwm040ldXlsIZQ416dHg==} + '@anthropic-ai/sdk@0.32.1': + resolution: {integrity: sha512-U9JwTrDvdQ9iWuABVsMLj8nJVwAyQz6QXvgLsVhryhCEPkLsbcP/MXxm+jYcAwLoV8ESbaTTjnD4kuAFa+Hyjg==} + '@apidevtools/json-schema-ref-parser@11.7.2': resolution: {integrity: sha512-4gY54eEGEstClvEkGnwVkTkrx0sqwemEFG5OSRRn3tD91XH0+Q8XIkYIfo7IwEWPpJZwILb9GUXeShtplRc/eA==} engines: {node: '>= 16'} @@ -13425,6 +13428,18 @@ snapshots: transitivePeerDependencies: - encoding + '@anthropic-ai/sdk@0.32.1(encoding@0.1.13)': + dependencies: + '@types/node': 18.19.64 + '@types/node-fetch': 2.6.11 + abort-controller: 3.0.0 + agentkeepalive: 4.5.0 + form-data-encoder: 1.7.2 + formdata-node: 4.4.1 + node-fetch: 2.7.0(encoding@0.1.13) + transitivePeerDependencies: + - encoding + '@apidevtools/json-schema-ref-parser@11.7.2': dependencies: '@jsdevtools/ono': 7.1.3 -- GitLab