diff --git a/examples/llm_stream.ts b/examples/llm_stream.ts index 09c75b44991fa2575fa96525fc333210c7eff7f7..89bc1aac3dd31c899eb80efea09fe89e2b6d76e0 100644 --- a/examples/llm_stream.ts +++ b/examples/llm_stream.ts @@ -1,10 +1,5 @@ import * as tiktoken from "tiktoken-node"; -import { - CallbackManager, - Event, - EventType, -} from "../packages/core/src/callbacks/CallbackManager"; -import { ChatMessage, MessageType, OpenAI } from "../packages/core/src/llm/LLM"; +import { ChatMessage, OpenAI } from "../packages/core/src/llm/LLM"; async function main() { const query: string = ` @@ -29,7 +24,6 @@ Where is Istanbul? // const stream2 = await llm.chat([message], undefined); const stream = await llm.complete(query, undefined, true); - for await (const part of stream) { //This only gives you the string part of a stream console.log(part); diff --git a/packages/core/src/ChatEngine.ts b/packages/core/src/ChatEngine.ts index 8e627042781e98d790a64a96bd680b3e68e65857..bc299356e36263a247d047b1b4b807e843162ee1 100644 --- a/packages/core/src/ChatEngine.ts +++ b/packages/core/src/ChatEngine.ts @@ -25,8 +25,14 @@ export interface ChatEngine { * @param chatHistory optional chat history if you want to customize the chat history * @param streaming optional streaming flag, which auto-sets the return value if True. */ - chat<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : Response> -(message: string, chatHistory?: ChatMessage[], streaming?: T): Promise<R>; + chat< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : Response, + >( + message: string, + chatHistory?: ChatMessage[], + streaming?: T, + ): Promise<R>; /** * Resets the chat history so that it's empty. @@ -46,11 +52,12 @@ export class SimpleChatEngine implements ChatEngine { this.llm = init?.llm ?? new OpenAI(); } - async chat<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : Response> - (message: string, chatHistory?: ChatMessage[], streaming?: T): Promise<R> { - + async chat< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : Response, + >(message: string, chatHistory?: ChatMessage[], streaming?: T): Promise<R> { //Streaming option - if(streaming){ + if (streaming) { return this.streamChat(message, chatHistory) as R; } @@ -63,13 +70,20 @@ export class SimpleChatEngine implements ChatEngine { return new Response(response.message.content) as R; } - protected async *streamChat(message: string, chatHistory?: ChatMessage[]): AsyncGenerator<string, void, unknown> { + protected async *streamChat( + message: string, + chatHistory?: ChatMessage[], + ): AsyncGenerator<string, void, unknown> { chatHistory = chatHistory ?? this.chatHistory; chatHistory.push({ content: message, role: "user" }); - const response_generator = await this.llm.chat(chatHistory, undefined, true); + const response_generator = await this.llm.chat( + chatHistory, + undefined, + true, + ); var accumulator: string = ""; - for await(const part of response_generator){ + for await (const part of response_generator) { accumulator += part; yield part; } @@ -125,10 +139,13 @@ export class CondenseQuestionChatEngine implements ChatEngine { ); } - async chat<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : Response>( + async chat< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : Response, + >( message: string, chatHistory?: ChatMessage[] | undefined, - streaming?: T + streaming?: T, ): Promise<R> { chatHistory = chatHistory ?? this.chatHistory; @@ -174,12 +191,18 @@ export class ContextChatEngine implements ChatEngine { init?.contextSystemPrompt ?? defaultContextSystemPrompt; } - async chat<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : Response> - (message: string, chatHistory?: ChatMessage[] | undefined, streaming?: T): Promise<R> { + async chat< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : Response, + >( + message: string, + chatHistory?: ChatMessage[] | undefined, + streaming?: T, + ): Promise<R> { chatHistory = chatHistory ?? this.chatHistory; //Streaming option - if(streaming){ + if (streaming) { return this.streamChat(message, chatHistory) as R; } @@ -218,7 +241,10 @@ export class ContextChatEngine implements ChatEngine { ) as R; } - protected async *streamChat(message: string, chatHistory?: ChatMessage[] | undefined): AsyncGenerator<string, void, unknown> { + protected async *streamChat( + message: string, + chatHistory?: ChatMessage[] | undefined, + ): AsyncGenerator<string, void, unknown> { chatHistory = chatHistory ?? this.chatHistory; const parentEvent: Event = { @@ -242,19 +268,18 @@ export class ContextChatEngine implements ChatEngine { chatHistory.push({ content: message, role: "user" }); - const response_stream = await this.chatModel.chat( [systemMessage, ...chatHistory], parentEvent, - true + true, ); var accumulator: string = ""; - for await(const part of response_stream){ + for await (const part of response_stream) { accumulator += part; yield part; } - chatHistory.push({content: accumulator, role: "system"}); + chatHistory.push({ content: accumulator, role: "system" }); this.chatHistory = chatHistory; @@ -279,10 +304,16 @@ export class HistoryChatEngine implements ChatEngine { this.llm = init?.llm ?? new OpenAI(); } - async chat<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : Response> - (message: string, chatHistory?: ChatMessage[] | undefined, streaming?: T ): Promise<R> { + async chat< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : Response, + >( + message: string, + chatHistory?: ChatMessage[] | undefined, + streaming?: T, + ): Promise<R> { //Streaming option - if(streaming){ + if (streaming) { return this.streamChat(message, chatHistory) as R; } this.chatHistory.addMessage({ content: message, role: "user" }); @@ -291,17 +322,23 @@ export class HistoryChatEngine implements ChatEngine { return new Response(response.message.content) as R; } - protected async *streamChat(message: string, chatHistory?: ChatMessage[] | undefined): AsyncGenerator<string, void, unknown> { - + protected async *streamChat( + message: string, + chatHistory?: ChatMessage[] | undefined, + ): AsyncGenerator<string, void, unknown> { this.chatHistory.addMessage({ content: message, role: "user" }); - const response_stream = await this.llm.chat(this.chatHistory.messages, undefined, true); + const response_stream = await this.llm.chat( + this.chatHistory.messages, + undefined, + true, + ); var accumulator = ""; - for await(const part of response_stream){ + for await (const part of response_stream) { accumulator += part; yield part; } - this.chatHistory.addMessage({content: accumulator, role: "user"}); + this.chatHistory.addMessage({ content: accumulator, role: "user" }); return; } diff --git a/packages/core/src/llm/LLM.ts b/packages/core/src/llm/LLM.ts index 85561f6d235173f39ca60a5e6723eb5ee09b43e9..d9a53a578516306fed8067634b7117df6b0457e2 100644 --- a/packages/core/src/llm/LLM.ts +++ b/packages/core/src/llm/LLM.ts @@ -53,18 +53,30 @@ export interface LLM { /** * Get a chat response from the LLM * @param messages - * + * * The return type of chat() and complete() are set by the "streaming" parameter being set to True. */ - chat<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse> - (messages: ChatMessage[], parentEvent?: Event, streaming?: T): Promise<R>; + chat< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, + >( + messages: ChatMessage[], + parentEvent?: Event, + streaming?: T, + ): Promise<R>; /** * Get a prompt completion from the LLM * @param prompt the prompt to complete */ - complete<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse> - (prompt: string, parentEvent?: Event, streaming?: T): Promise<R>; + complete< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, + >( + prompt: string, + parentEvent?: Event, + streaming?: T, + ): Promise<R>; } export const GPT4_MODELS = { @@ -184,11 +196,10 @@ export class OpenAI implements LLM { } } - async chat<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse>( - messages: ChatMessage[], - parentEvent?: Event, - streaming?: T, - ): Promise<R> { + async chat< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, + >(messages: ChatMessage[], parentEvent?: Event, streaming?: T): Promise<R> { const baseRequestParams: OpenAILLM.Chat.CompletionCreateParams = { model: this.model, temperature: this.temperature, @@ -201,8 +212,8 @@ export class OpenAI implements LLM { ...this.additionalChatOptions, }; // Streaming - if(streaming){ - if(!this.hasStreaming){ + if (streaming) { + if (!this.hasStreaming) { throw Error("No streaming support for this LLM."); } return this.streamChat(messages, parentEvent) as R; @@ -214,15 +225,20 @@ export class OpenAI implements LLM { }); const content = response.choices[0].message?.content ?? ""; - return { message: { content, role: response.choices[0].message.role } } as R; + return { + message: { content, role: response.choices[0].message.role }, + } as R; } - async complete<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse>( - prompt: string, - parentEvent?: Event, - streaming?: T - ): Promise<R> { - return this.chat([{ content: prompt, role: "user" }], parentEvent, streaming); + async complete< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, + >(prompt: string, parentEvent?: Event, streaming?: T): Promise<R> { + return this.chat( + [{ content: prompt, role: "user" }], + parentEvent, + streaming, + ); } //We can wrap a stream in a generator to add some additional logging behavior @@ -477,11 +493,10 @@ If a question does not make any sense, or is not factually coherent, explain why }; } - async chat<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse>( - messages: ChatMessage[], - _parentEvent?: Event, - streaming?: T - ): Promise<R> { + async chat< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, + >(messages: ChatMessage[], _parentEvent?: Event, streaming?: T): Promise<R> { const api = ALL_AVAILABLE_LLAMADEUCE_MODELS[this.model] .replicateApi as `${string}/${string}:${string}`; @@ -518,11 +533,10 @@ If a question does not make any sense, or is not factually coherent, explain why } as R; } - async complete<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse>( - prompt: string, - parentEvent?: Event, - streaming?: T - ): Promise<R> { + async complete< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, + >(prompt: string, parentEvent?: Event, streaming?: T): Promise<R> { return this.chat([{ content: prompt, role: "user" }], parentEvent); } } @@ -531,7 +545,6 @@ If a question does not make any sense, or is not factually coherent, explain why * Anthropic LLM implementation */ - //TODO: Add streaming for this export class Anthropic implements LLM { // Per completion Anthropic params @@ -585,10 +598,13 @@ export class Anthropic implements LLM { ); } - async chat<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse>( + async chat< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, + >( messages: ChatMessage[], parentEvent?: Event | undefined, - streaming?: T + streaming?: T, ): Promise<R> { const response = await this.session.anthropic.completions.create({ model: this.model, @@ -604,10 +620,13 @@ export class Anthropic implements LLM { // That space will be re-added when we generate the next prompt. } as R; } - async complete<T extends boolean | undefined = undefined, R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse>( + async complete< + T extends boolean | undefined = undefined, + R = T extends true ? AsyncGenerator<string, void, unknown> : ChatResponse, + >( prompt: string, parentEvent?: Event | undefined, - streaming?: T + streaming?: T, ): Promise<R> { return this.chat([{ content: prompt, role: "user" }], parentEvent) as R; }