From 1b13395e655928ee7a864df07a09c8f264efb24a Mon Sep 17 00:00:00 2001
From: Elliot Kang <kkang2097@gmail.com>
Date: Sat, 30 Sep 2023 12:18:17 -0700
Subject: [PATCH] Anthropic steaming support

---
 packages/core/src/llm/LLM.ts | 43 ++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/packages/core/src/llm/LLM.ts b/packages/core/src/llm/LLM.ts
index d9a53a578..ac21a6737 100644
--- a/packages/core/src/llm/LLM.ts
+++ b/packages/core/src/llm/LLM.ts
@@ -4,6 +4,7 @@ import {
   Event,
   EventType,
   OpenAIStreamToken,
+  AnthropicStreamToken,
   StreamCallbackResponse,
 } from "../callbacks/CallbackManager";
 
@@ -606,6 +607,15 @@ export class Anthropic implements LLM {
     parentEvent?: Event | undefined,
     streaming?: T,
   ): Promise<R> {
+
+    //Streaming
+    if (streaming) {
+      if (!this.hasStreaming) {
+        throw Error("No streaming support for this LLM.");
+      }
+      return this.streamChat(messages, parentEvent) as R;
+    }
+    //Non-streaming
     const response = await this.session.anthropic.completions.create({
       model: this.model,
       prompt: this.mapMessagesToPrompt(messages),
@@ -620,6 +630,35 @@ export class Anthropic implements LLM {
       // That space will be re-added when we generate the next prompt.
     } as R;
   }
+
+  protected async *streamChat(messages: ChatMessage[], parentEvent?: Event | undefined): AsyncGenerator<string, void, unknown>
+  {
+    // AsyncIterable<AnthropicStreamToken>
+    const stream:AsyncIterable<AnthropicStreamToken>  = await this.session.anthropic.completions.create({
+      model: this.model,
+      prompt: this.mapMessagesToPrompt(messages),
+      max_tokens_to_sample: this.maxTokens ?? 100000,
+      temperature: this.temperature,
+      top_p: this.topP,
+      streaming: true
+    })
+
+    var idx_counter: number = 0;
+    for await (const part of stream) {
+      //Increment
+      part.choices[0].index = idx_counter;
+      const is_done: boolean =
+        part.choices[0].finish_reason === "stop" ? true : false;
+      //TODO: LLM Stream Callback, pending re-work.
+
+      idx_counter++;
+      yield part.choices[0].delta.content ? part.choices[0].delta.content : "";
+
+    return;
+  }
+
+}
+
   async complete<
     T extends boolean | undefined = undefined,
     R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse,
@@ -630,4 +669,8 @@ export class Anthropic implements LLM {
   ): Promise<R> {
     return this.chat([{ content: prompt, role: "user" }], parentEvent) as R;
   }
+
+  protected stream_complete(prompt: string, parentEvent?: Event | undefined): AsyncGenerator<string, void, unknown>{
+    return this.streamChat([{ content: prompt, role: "user" }], parentEvent);
+  }
 }
-- 
GitLab