From a7b0ac3cb789c40d4c9f7738e1898ec207c7f573 Mon Sep 17 00:00:00 2001
From: Alex Yang <himself65@outlook.com>
Date: Thu, 14 Nov 2024 17:35:21 -0800
Subject: [PATCH] feat(anthropic): support prompt caching (#1488)

---
 .changeset/honest-moose-roll-2.md         |  5 ++
 .changeset/honest-moose-roll.md           |  5 ++
 examples/anthropic/prompt-caching.ts      | 38 +++++++++
 packages/core/src/llms/base.ts            |  4 +-
 packages/providers/anthropic/package.json |  2 +-
 packages/providers/anthropic/src/llm.ts   | 99 +++++++++++++++++------
 pnpm-lock.yaml                            | 19 ++++-
 7 files changed, 145 insertions(+), 27 deletions(-)
 create mode 100644 .changeset/honest-moose-roll-2.md
 create mode 100644 .changeset/honest-moose-roll.md
 create mode 100644 examples/anthropic/prompt-caching.ts

diff --git a/.changeset/honest-moose-roll-2.md b/.changeset/honest-moose-roll-2.md
new file mode 100644
index 000000000..06fd6434e
--- /dev/null
+++ b/.changeset/honest-moose-roll-2.md
@@ -0,0 +1,5 @@
+---
+"@llamaindex/anthropic": patch
+---
+
+feat(anthropic): support prompt caching
diff --git a/.changeset/honest-moose-roll.md b/.changeset/honest-moose-roll.md
new file mode 100644
index 000000000..01cf72c13
--- /dev/null
+++ b/.changeset/honest-moose-roll.md
@@ -0,0 +1,5 @@
+---
+"@llamaindex/core": patch
+---
+
+fix: update tool call llm type
diff --git a/examples/anthropic/prompt-caching.ts b/examples/anthropic/prompt-caching.ts
new file mode 100644
index 000000000..2f9182e9a
--- /dev/null
+++ b/examples/anthropic/prompt-caching.ts
@@ -0,0 +1,38 @@
+import { Anthropic } from "llamaindex";
+
+async function main() {
+  const anthropic = new Anthropic({
+    model: "claude-3-5-sonnet-20241022",
+  });
+
+  const entireBook = await fetch(
+    "https://www.gutenberg.org/files/1342/1342-0.txt",
+  ).then((response) => response.text());
+
+  const response = await anthropic.chat({
+    messages: [
+      {
+        content:
+          "You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n",
+        role: "system",
+      },
+      {
+        content: entireBook,
+        role: "system",
+        options: {
+          cache_control: {
+            type: "ephemeral",
+          },
+        },
+      },
+      {
+        content: "analyze the major themes in Pride and Prejudice.",
+        role: "user",
+      },
+    ],
+  });
+
+  console.log(response.message.content);
+}
+
+main().catch(console.error);
diff --git a/packages/core/src/llms/base.ts b/packages/core/src/llms/base.ts
index 89b9ecd82..46306bfec 100644
--- a/packages/core/src/llms/base.ts
+++ b/packages/core/src/llms/base.ts
@@ -66,6 +66,8 @@ export abstract class BaseLLM<
 
 export abstract class ToolCallLLM<
   AdditionalChatOptions extends object = object,
-> extends BaseLLM<AdditionalChatOptions, ToolCallLLMMessageOptions> {
+  AdditionalMessageOptions extends
+    ToolCallLLMMessageOptions = ToolCallLLMMessageOptions,
+> extends BaseLLM<AdditionalChatOptions, AdditionalMessageOptions> {
   abstract supportToolCall: boolean;
 }
diff --git a/packages/providers/anthropic/package.json b/packages/providers/anthropic/package.json
index 1ae802efa..f64967fca 100644
--- a/packages/providers/anthropic/package.json
+++ b/packages/providers/anthropic/package.json
@@ -33,7 +33,7 @@
     "bunchee": "5.6.1"
   },
   "dependencies": {
-    "@anthropic-ai/sdk": "0.27.1",
+    "@anthropic-ai/sdk": "0.32.1",
     "@llamaindex/core": "workspace:*",
     "@llamaindex/env": "workspace:*",
     "remeda": "^2.12.0"
diff --git a/packages/providers/anthropic/src/llm.ts b/packages/providers/anthropic/src/llm.ts
index 485b28186..251f7b295 100644
--- a/packages/providers/anthropic/src/llm.ts
+++ b/packages/providers/anthropic/src/llm.ts
@@ -1,5 +1,9 @@
 import type { ClientOptions } from "@anthropic-ai/sdk";
 import { Anthropic as SDKAnthropic } from "@anthropic-ai/sdk";
+import type {
+  BetaCacheControlEphemeral,
+  BetaTextBlockParam,
+} from "@anthropic-ai/sdk/resources/beta/index";
 import type {
   TextBlock,
   TextBlockParam,
@@ -8,6 +12,7 @@ import type {
   ImageBlockParam,
   MessageCreateParamsNonStreaming,
   MessageParam,
+  Model,
   Tool,
   ToolResultBlockParam,
   ToolUseBlock,
@@ -75,6 +80,9 @@ export const ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS = {
   "claude-2.1": {
     contextWindow: 200000,
   },
+  "claude-2.0": {
+    contextWindow: 100000,
+  },
   "claude-instant-1.2": {
     contextWindow: 100000,
   },
@@ -82,18 +90,30 @@ export const ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS = {
 
 export const ALL_AVAILABLE_V3_MODELS = {
   "claude-3-opus": { contextWindow: 200000 },
+  "claude-3-opus-latest": { contextWindow: 200000 },
+  "claude-3-opus-20240229": { contextWindow: 200000 },
   "claude-3-sonnet": { contextWindow: 200000 },
+  "claude-3-sonnet-20240229": { contextWindow: 200000 },
   "claude-3-haiku": { contextWindow: 200000 },
+  "claude-3-haiku-20240307": { contextWindow: 200000 },
 };
 
 export const ALL_AVAILABLE_V3_5_MODELS = {
   "claude-3-5-sonnet": { contextWindow: 200000 },
+  "claude-3-5-sonnet-20241022": { contextWindow: 200000 },
+  "claude-3-5-sonnet-20240620": { contextWindow: 200000 },
+  "claude-3-5-sonnet-latest": { contextWindow: 200000 },
+  "claude-3-5-haiku": { contextWindow: 200000 },
+  "claude-3-5-haiku-latest": { contextWindow: 200000 },
+  "claude-3-5-haiku-20241022": { contextWindow: 200000 },
 };
 
 export const ALL_AVAILABLE_ANTHROPIC_MODELS = {
   ...ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS,
   ...ALL_AVAILABLE_V3_MODELS,
   ...ALL_AVAILABLE_V3_5_MODELS,
+} satisfies {
+  [key in Model]: { contextWindow: number };
 };
 
 const AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE: { [key: string]: string } = {
@@ -104,10 +124,16 @@ const AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE: { [key: string]: string } = {
 } as { [key in keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS]: string };
 
 export type AnthropicAdditionalChatOptions = object;
+export type AnthropicToolCallLLMMessageOptions = ToolCallLLMMessageOptions & {
+  cache_control?: BetaCacheControlEphemeral | null;
+};
 
-export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> {
+export class Anthropic extends ToolCallLLM<
+  AnthropicAdditionalChatOptions,
+  AnthropicToolCallLLMMessageOptions
+> {
   // Per completion Anthropic params
-  model: keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS;
+  model: keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS | ({} & string);
   temperature: number;
   topP: number;
   maxTokens?: number | undefined;
@@ -147,7 +173,12 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> {
       temperature: this.temperature,
       topP: this.topP,
       maxTokens: this.maxTokens,
-      contextWindow: ALL_AVAILABLE_ANTHROPIC_MODELS[this.model].contextWindow,
+      contextWindow:
+        this.model in ALL_AVAILABLE_ANTHROPIC_MODELS
+          ? ALL_AVAILABLE_ANTHROPIC_MODELS[
+              this.model as keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS
+            ].contextWindow
+          : 200000,
       tokenizer: undefined,
     };
   }
@@ -291,56 +322,74 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> {
   chat(
     params: LLMChatParamsStreaming<
       AnthropicAdditionalChatOptions,
-      ToolCallLLMMessageOptions
+      AnthropicToolCallLLMMessageOptions
     >,
-  ): Promise<AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>>;
+  ): Promise<
+    AsyncIterable<ChatResponseChunk<AnthropicToolCallLLMMessageOptions>>
+  >;
   chat(
     params: LLMChatParamsNonStreaming<
       AnthropicAdditionalChatOptions,
-      ToolCallLLMMessageOptions
+      AnthropicToolCallLLMMessageOptions
     >,
-  ): Promise<ChatResponse<ToolCallLLMMessageOptions>>;
+  ): Promise<ChatResponse<AnthropicToolCallLLMMessageOptions>>;
   @wrapLLMEvent
   async chat(
     params:
       | LLMChatParamsNonStreaming<
           AnthropicAdditionalChatOptions,
-          ToolCallLLMMessageOptions
+          AnthropicToolCallLLMMessageOptions
         >
       | LLMChatParamsStreaming<
           AnthropicAdditionalChatOptions,
-          ToolCallLLMMessageOptions
+          AnthropicToolCallLLMMessageOptions
         >,
   ): Promise<
-    | ChatResponse<ToolCallLLMMessageOptions>
-    | AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>
+    | ChatResponse<AnthropicToolCallLLMMessageOptions>
+    | AsyncIterable<ChatResponseChunk<AnthropicToolCallLLMMessageOptions>>
   > {
     let { messages } = params;
 
     const { stream, tools } = params;
 
-    let systemPrompt: string | null = null;
+    let systemPrompt: string | Array<BetaTextBlockParam> | null = null;
 
     const systemMessages = messages.filter(
       (message) => message.role === "system",
     );
 
     if (systemMessages.length > 0) {
-      systemPrompt = systemMessages
-        .map((message) => message.content)
-        .join("\n");
+      systemPrompt = systemMessages.map((message) =>
+        message.options && "cache_control" in message.options
+          ? {
+              type: "text",
+              text: extractText(message.content),
+              cache_control: message.options.cache_control,
+            }
+          : {
+              type: "text",
+              text: extractText(message.content),
+            },
+      );
       messages = messages.filter((message) => message.role !== "system");
     }
+    const beta =
+      systemPrompt?.find((message) => "cache_control" in message) !== undefined;
+
+    // case: Non-streaming
+    let anthropic = this.session.anthropic;
+    if (beta) {
+      // @ts-expect-error type casting
+      anthropic = anthropic.beta.promptCaching;
+    }
 
     // case: Streaming
     if (stream) {
       if (tools) {
         console.error("Tools are not supported in streaming mode");
       }
-      return this.streamChat(messages, systemPrompt);
+      return this.streamChat(messages, systemPrompt, anthropic);
     }
-    // case: Non-streaming
-    const anthropic = this.session.anthropic;
 
     if (tools) {
       const params: MessageCreateParamsNonStreaming = {
@@ -378,7 +427,10 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> {
                   toolCall: toolUseBlock.map((block) => ({
                     id: block.id,
                     name: block.name,
-                    input: block.input,
+                    input:
+                      typeof block.input === "object"
+                        ? JSON.stringify(block.input)
+                        : `${block.input}`,
                   })),
                 }
               : {},
@@ -411,10 +463,11 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> {
   }
 
   protected async *streamChat(
-    messages: ChatMessage<ToolCallLLMMessageOptions>[],
-    systemPrompt?: string | null,
-  ): AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> {
-    const stream = await this.session.anthropic.messages.create({
+    messages: ChatMessage<AnthropicToolCallLLMMessageOptions>[],
+    systemPrompt: string | Array<BetaTextBlockParam> | null,
+    anthropic: SDKAnthropic,
+  ): AsyncIterable<ChatResponseChunk<AnthropicToolCallLLMMessageOptions>> {
+    const stream = await anthropic.messages.create({
       model: this.getModelName(this.model),
       messages: this.formatMessages(messages),
       max_tokens: this.maxTokens ?? 4096,
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 870e492d6..69ff1fe88 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -1191,8 +1191,8 @@ importers:
   packages/providers/anthropic:
     dependencies:
       '@anthropic-ai/sdk':
-        specifier: 0.27.1
-        version: 0.27.1(encoding@0.1.13)
+        specifier: 0.32.1
+        version: 0.32.1(encoding@0.1.13)
       '@llamaindex/core':
         specifier: workspace:*
         version: link:../../core
@@ -1701,6 +1701,9 @@ packages:
   '@anthropic-ai/sdk@0.27.1':
     resolution: {integrity: sha512-AKFd/E8HO26+DOVPiZpEked3Pm2feA5d4gcX2FcJXr9veDkXbKO90hr2C7N2TL7mPIMwm040ldXlsIZQ416dHg==}
 
+  '@anthropic-ai/sdk@0.32.1':
+    resolution: {integrity: sha512-U9JwTrDvdQ9iWuABVsMLj8nJVwAyQz6QXvgLsVhryhCEPkLsbcP/MXxm+jYcAwLoV8ESbaTTjnD4kuAFa+Hyjg==}
+
   '@apidevtools/json-schema-ref-parser@11.7.2':
     resolution: {integrity: sha512-4gY54eEGEstClvEkGnwVkTkrx0sqwemEFG5OSRRn3tD91XH0+Q8XIkYIfo7IwEWPpJZwILb9GUXeShtplRc/eA==}
     engines: {node: '>= 16'}
@@ -13425,6 +13428,18 @@ snapshots:
     transitivePeerDependencies:
       - encoding
 
+  '@anthropic-ai/sdk@0.32.1(encoding@0.1.13)':
+    dependencies:
+      '@types/node': 18.19.64
+      '@types/node-fetch': 2.6.11
+      abort-controller: 3.0.0
+      agentkeepalive: 4.5.0
+      form-data-encoder: 1.7.2
+      formdata-node: 4.4.1
+      node-fetch: 2.7.0(encoding@0.1.13)
+    transitivePeerDependencies:
+      - encoding
+
   '@apidevtools/json-schema-ref-parser@11.7.2':
     dependencies:
       '@jsdevtools/ono': 7.1.3
-- 
GitLab