import { stdin as input, stdout as output } from "node:process"; import readline from "node:readline/promises"; import { OpenAI } from "@llamaindex/openai"; import { ChatSummaryMemoryBuffer, Settings, SimpleChatEngine, } from "llamaindex"; if (process.env.NODE_ENV === "development") { Settings.callbackManager.on("llm-end", (event) => { console.log("callers chain", event.reason?.computedCallers); }); } async function main() { // Set maxTokens to 75% of the context window size of 4096 // This will trigger the summarizer once the chat history reaches 25% of the context window size (1024 tokens) const llm = new OpenAI({ model: "gpt-3.5-turbo", maxTokens: 4096 * 0.75 }); const chatHistory = new ChatSummaryMemoryBuffer({ llm }); const chatEngine = new SimpleChatEngine({ llm }); const rl = readline.createInterface({ input, output }); while (true) { const query = await rl.question("Query: "); const stream = await chatEngine.chat({ message: query, chatHistory, stream: true, }); if (chatHistory.getLastSummary()) { // Print the summary of the conversation so far that is produced by the SummaryChatHistory console.log(`Summary: ${chatHistory.getLastSummary()?.content}`); } for await (const chunk of stream) { process.stdout.write(chunk.response); } console.log(); } } main().catch(console.error);