diff --git a/examples/llm_stream.ts b/examples/llm_stream.ts index d0cf8099213d926c89c67657e948161aef605611..09c75b44991fa2575fa96525fc333210c7eff7f7 100644 --- a/examples/llm_stream.ts +++ b/examples/llm_stream.ts @@ -7,7 +7,9 @@ import { import { ChatMessage, MessageType, OpenAI } from "../packages/core/src/llm/LLM"; async function main() { - const query: string = "Where is Istanbul?"; + const query: string = ` +Where is Istanbul? + `; const llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }); const message: ChatMessage = { content: query, role: "user" }; @@ -19,26 +21,14 @@ async function main() { //GPT 3.5 Turbo uses CL100K_Base encodings, check your LLM to see which tokenizer it uses. const encoding = tiktoken.getEncoding("cl100k_base"); - const callback: CallbackManager = new CallbackManager(); - callback.onLLMStream = (callback_response) => { - //Token text - const text = callback_response.token.choices[0].delta.content - ? callback_response.token.choices[0].delta.content - : ""; - //Increment total number of tokens - total_tokens += encoding.encode(text).length; - }; - - llm.callbackManager = callback; + //Stream Complete + //Note: Setting streaming flag to true or false will auto-set your return type to + //either an AsyncGenerator or a Response. + // Omitting the streaming flag automatically sets streaming to false - //Create a dummy event to trigger our Stream Callback - const dummy_event: Event = { - id: "something", - type: "intermediate" as EventType, - }; + // const stream2 = await llm.chat([message], undefined); + const stream = await llm.complete(query, undefined, true); - //Stream Complete - const stream = llm.stream_complete(query, dummy_event); for await (const part of stream) { //This only gives you the string part of a stream @@ -49,13 +39,11 @@ async function main() { const correct_total_tokens: number = encoding.encode(accumulated_result).length; + console.log(accumulated_result); //Check if our stream token counter works console.log( `Output token total using tokenizer on accumulated output: ${correct_total_tokens}`, ); - console.log( - `Output token total using tokenizer on stream output: ${total_tokens}`, - ); } main();