diff --git a/apps/simple/llmStream.ts b/apps/simple/llmStream.ts index 2c2d75a0e9e1d9cc8d59b6a8aefd817cbb5182b8..a4c3c80e3a52a3aaa7f047eaced5423d0e7d7d6b 100644 --- a/apps/simple/llmStream.ts +++ b/apps/simple/llmStream.ts @@ -1,4 +1,5 @@ -import { Anthropic, ChatMessage, SimpleChatEngine } from "llamaindex"; +import {OpenAI, Anthropic, ChatMessage, SimpleChatEngine } from "llamaindex"; +// import {Anthropic} from '@anthropic-ai/sdk'; import { stdin as input, stdout as output } from "node:process"; import readline from "node:readline/promises"; @@ -8,11 +9,11 @@ Where is Istanbul? `; // const llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }); - const llm = new Anthropic(); + const llm = new OpenAI(); const message: ChatMessage = { content: query, role: "user" }; - var accumulated_result: string = ""; - var total_tokens: number = 0; + // var accumulated_result: string = ""; + // var total_tokens: number = 0; //TODO: Add callbacks later @@ -21,7 +22,7 @@ Where is Istanbul? //either an AsyncGenerator or a Response. // Omitting the streaming flag automatically sets streaming to false - const chatEngine: SimpleChatEngine = new SimpleChatEngine(); + const chatEngine: SimpleChatEngine = new SimpleChatEngine({chatHistory: undefined, llm: llm}); const rl = readline.createInterface({ input, output }); while (true) { @@ -35,9 +36,10 @@ Where is Istanbul? //Case 2: .chat(query, undefined, false) => Response object //Case 3: .chat(query, undefined) => Response object const chatStream = await chatEngine.chat(query, undefined, true); + var accumulated_result = ""; for await (const part of chatStream) { + accumulated_result += part; process.stdout.write(part); - // accumulated_result += part; } } } diff --git a/packages/core/src/llm/LLM.ts b/packages/core/src/llm/LLM.ts index b7febec10cce1b13ef5d5925a35ebc3496b4f21f..6f6c8c4e5d37b34b9ef41bab8b4d90d5c1956817 100644 --- a/packages/core/src/llm/LLM.ts +++ b/packages/core/src/llm/LLM.ts @@ -642,22 +642,17 @@ export class Anthropic implements LLM { max_tokens_to_sample: this.maxTokens ?? 100000, temperature: this.temperature, top_p: this.topP, - streaming: true, + stream: true, }); var idx_counter: number = 0; for await (const part of stream) { - //Increment - part.choices[0].index = idx_counter; - const is_done: boolean = - part.choices[0].finish_reason === "stop" ? true : false; //TODO: LLM Stream Callback, pending re-work. idx_counter++; - yield part.choices[0].delta.content ? part.choices[0].delta.content : ""; - - return; + yield part.completion; } + return; } async complete< @@ -668,10 +663,13 @@ export class Anthropic implements LLM { parentEvent?: Event | undefined, streaming?: T, ): Promise<R> { - return this.chat([{ content: prompt, role: "user" }], parentEvent) as R; + if(streaming){ + return this.streamComplete(prompt, parentEvent) as R; + } + return this.chat([{ content: prompt, role: "user" }], parentEvent, streaming) as R; } - protected stream_complete( + protected streamComplete( prompt: string, parentEvent?: Event | undefined, ): AsyncGenerator<string, void, unknown> {