From 69a7ef063d52d20721b866f8e8273f62b68f41f3 Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Fri, 27 Oct 2023 17:49:21 +0700
Subject: [PATCH] added streaming for llamaindex

---
 .vscode/launch.json                           | 32 +++++++++++++
 .../nextjs/app/api/chat/llamaindex-stream.ts  | 35 ++++++++++++++
 .../streaming/nextjs/app/api/chat/route.ts    | 46 +++++++------------
 .../templates/streaming/nextjs/package.json   |  1 -
 4 files changed, 84 insertions(+), 30 deletions(-)
 create mode 100644 .vscode/launch.json
 create mode 100644 packages/create-llama/templates/streaming/nextjs/app/api/chat/llamaindex-stream.ts

diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 000000000..50537f056
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,32 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Next.js: debug server-side",
+      "type": "node-terminal",
+      "request": "launch",
+      "cwd": "${workspaceFolder}/packages/create-llama/templates/streaming/nextjs", // Add this line to set the current working directory to the desired folder
+      "command": "pnpm run dev"
+    },
+    {
+      "name": "Next.js: debug client-side",
+      "type": "chrome",
+      "request": "launch",
+      "url": "http://localhost:3000"
+    },
+    {
+      "name": "Next.js: debug full stack",
+      "type": "node-terminal",
+      "request": "launch",
+      "command": "pnpm run dev",
+      "serverReadyAction": {
+        "pattern": "started server on .+, url: (https?://.+)",
+        "uriFormat": "%s",
+        "action": "debugWithChrome"
+      }
+    }
+  ],
+  "runtimeArgs": [
+    "--preserve-symlinks"
+  ]
+}
\ No newline at end of file
diff --git a/packages/create-llama/templates/streaming/nextjs/app/api/chat/llamaindex-stream.ts b/packages/create-llama/templates/streaming/nextjs/app/api/chat/llamaindex-stream.ts
new file mode 100644
index 000000000..12328de87
--- /dev/null
+++ b/packages/create-llama/templates/streaming/nextjs/app/api/chat/llamaindex-stream.ts
@@ -0,0 +1,35 @@
+import {
+  createCallbacksTransformer,
+  createStreamDataTransformer,
+  trimStartOfStreamHelper,
+  type AIStreamCallbacksAndOptions,
+} from "ai";
+
+function createParser(res: AsyncGenerator<any>) {
+  const trimStartOfStream = trimStartOfStreamHelper();
+  return new ReadableStream<string>({
+    async pull(controller): Promise<void> {
+      const { value, done } = await res.next();
+      if (done) {
+        controller.close();
+        return;
+      }
+
+      const text = trimStartOfStream(value ?? "");
+      if (text) {
+        controller.enqueue(text);
+      }
+    },
+  });
+}
+
+export function LlamaIndexStream(
+  res: AsyncGenerator<any>,
+  callbacks?: AIStreamCallbacksAndOptions,
+): ReadableStream {
+  return createParser(res)
+    .pipeThrough(createCallbacksTransformer(callbacks))
+    .pipeThrough(
+      createStreamDataTransformer(callbacks?.experimental_streamData),
+    );
+}
diff --git a/packages/create-llama/templates/streaming/nextjs/app/api/chat/route.ts b/packages/create-llama/templates/streaming/nextjs/app/api/chat/route.ts
index 06432075c..fb54dbc80 100644
--- a/packages/create-llama/templates/streaming/nextjs/app/api/chat/route.ts
+++ b/packages/create-llama/templates/streaming/nextjs/app/api/chat/route.ts
@@ -1,50 +1,38 @@
-import { OpenAIStream, StreamingTextResponse } from "ai";
+import { Message, StreamingTextResponse } from "ai";
+import { OpenAI, SimpleChatEngine } from "llamaindex";
 import { NextRequest, NextResponse } from "next/server";
-import OpenAI from "openai";
+import { LlamaIndexStream } from "./llamaindex-stream";
+
 export const runtime = "nodejs";
 export const dynamic = "force-dynamic";
 
-const openai = new OpenAI({
-  apiKey: process.env.OPENAI_API_KEY,
-});
-
 export async function POST(request: NextRequest) {
   try {
     const body = await request.json();
-    const { messages } = body;
-    if (!messages) {
+    const { messages }: { messages: Message[] } = body;
+    const lastMessage = messages.pop();
+    if (!messages || !lastMessage || lastMessage.role !== "user") {
       return NextResponse.json(
         {
-          error: "messages are required in the request body",
+          error:
+            "messages are required in the request body and the last message must be from the user",
         },
         { status: 400 },
       );
     }
 
-    // const llm = new OpenAI({
-    //   model: "gpt-3.5-turbo",
-    // });
-
-    // const chatEngine = new SimpleChatEngine({
-    //   llm,
-    // });
-
-    // const response = await chatEngine.chat(message, chatHistory);
-    // const result: ChatMessage = {
-    //   role: "assistant",
-    //   content: response.response,
-    // };
-
-    // return NextResponse.json({ result });
+    const llm = new OpenAI({
+      model: "gpt-3.5-turbo",
+    });
 
-    const response = await openai.chat.completions.create({
-      model: "gpt-4",
-      stream: true,
-      messages,
+    const chatEngine = new SimpleChatEngine({
+      llm,
     });
 
+    const response = await chatEngine.chat(lastMessage.content, messages, true);
+
     // Transform the response into a readable stream
-    const stream = OpenAIStream(response);
+    const stream = LlamaIndexStream(response);
 
     // Return a StreamingTextResponse, which can be consumed by the client
     return new StreamingTextResponse(stream);
diff --git a/packages/create-llama/templates/streaming/nextjs/package.json b/packages/create-llama/templates/streaming/nextjs/package.json
index 399c20bcb..e9f23201d 100644
--- a/packages/create-llama/templates/streaming/nextjs/package.json
+++ b/packages/create-llama/templates/streaming/nextjs/package.json
@@ -11,7 +11,6 @@
     "ai": "^2",
     "llamaindex": "0.0.31",
     "next": "^13",
-    "openai": "^4.14.0",
     "react": "^18",
     "react-dom": "^18"
   },
-- 
GitLab