From a9c6144eecc127369f65849f8520ef44650f61ee Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Tue, 25 Feb 2025 16:48:12 +0700
Subject: [PATCH] feat: stream thinking tokens for claude 3.7 (#1679)

---
 .changeset/flat-candles-count.md        |  5 +++++
 examples/anthropic/thinking.ts          | 11 +++++++++--
 packages/providers/anthropic/src/llm.ts | 23 +++++++++++++++--------
 3 files changed, 29 insertions(+), 10 deletions(-)
 create mode 100644 .changeset/flat-candles-count.md

diff --git a/.changeset/flat-candles-count.md b/.changeset/flat-candles-count.md
new file mode 100644
index 000000000..9a6468473
--- /dev/null
+++ b/.changeset/flat-candles-count.md
@@ -0,0 +1,5 @@
+---
+"@llamaindex/anthropic": patch
+---
+
+Stream thinking tokens
diff --git a/examples/anthropic/thinking.ts b/examples/anthropic/thinking.ts
index 3901a4eaf..cc72ba662 100644
--- a/examples/anthropic/thinking.ts
+++ b/examples/anthropic/thinking.ts
@@ -2,7 +2,6 @@ import { Anthropic } from "@llamaindex/anthropic";
 
 (async () => {
   const anthropic = new Anthropic({
-    apiKey: process.env.ANTHROPIC_API_KEY,
     model: "claude-3-7-sonnet",
     maxTokens: 20000,
     additionalChatOptions: {
@@ -20,6 +19,14 @@ import { Anthropic } from "@llamaindex/anthropic";
           "Are there an infinite number of prime numbers such that n mod 4 == 3?",
       },
     ],
+    stream: true,
   });
-  console.log(result.message);
+  console.log("Thinking...");
+  for await (const chunk of result) {
+    if (chunk.delta) {
+      process.stdout.write(chunk.delta);
+    } else if (chunk.options?.thinking) {
+      process.stdout.write(chunk.options.thinking);
+    }
+  }
 })();
diff --git a/packages/providers/anthropic/src/llm.ts b/packages/providers/anthropic/src/llm.ts
index 0cd80a183..98a468104 100644
--- a/packages/providers/anthropic/src/llm.ts
+++ b/packages/providers/anthropic/src/llm.ts
@@ -133,6 +133,7 @@ export type AnthropicAdditionalChatOptions = Pick<
 >;
 export type AnthropicToolCallLLMMessageOptions = ToolCallLLMMessageOptions & {
   cache_control?: BetaCacheControlEphemeral | null;
+  thinking?: string | undefined;
 };
 
 export class Anthropic extends ToolCallLLM<
@@ -504,20 +505,26 @@ export class Anthropic extends ToolCallLLM<
 
     let idx_counter: number = 0;
     for await (const part of stream) {
-      const content =
-        part.type === "content_block_delta"
-          ? part.delta.type === "text_delta"
-            ? part.delta.text
-            : part.delta
+      const textContent =
+        part.type === "content_block_delta" && part.delta.type === "text_delta"
+          ? part.delta.text
           : undefined;
 
-      if (typeof content !== "string") continue;
+      const thinking =
+        part.type === "content_block_delta" &&
+        part.delta.type === "thinking_delta"
+          ? part.delta.thinking
+          : undefined;
+
+      if (!textContent && !thinking) continue;
 
       idx_counter++;
       yield {
         raw: part,
-        delta: content,
-        options: {},
+        delta: textContent ?? "",
+        options: {
+          thinking: thinking,
+        },
       };
     }
     return;
-- 
GitLab