diff --git a/packages/server/src/handlers/chat.ts b/packages/server/src/handlers/chat.ts
index d8b56308ce4aa7a976918c49116c9c64747851c7..10bf8484a4de340033887a1581286074dc9ab320 100644
--- a/packages/server/src/handlers/chat.ts
+++ b/packages/server/src/handlers/chat.ts
@@ -1,16 +1,16 @@
 import { type Message } from "ai";
 import { IncomingMessage, ServerResponse } from "http";
 import { type ChatMessage } from "llamaindex";
-import type { ServerWorkflow } from "../types";
+import type { WorkflowFactory } from "../types";
 import {
   parseRequestBody,
-  pipeResponse,
+  pipeStreamToResponse,
   sendJSONResponse,
 } from "../utils/request";
 import { runWorkflow } from "../utils/workflow";
 
 export const handleChat = async (
-  workflow: ServerWorkflow,
+  workflowFactory: WorkflowFactory,
   req: IncomingMessage,
   res: ServerResponse,
 ) => {
@@ -25,10 +25,14 @@ export const handleChat = async (
       });
     }
 
-    const userInput = lastMessage.content;
-    const chatHistory = messages.slice(0, -1) as ChatMessage[];
-    const streamResponse = await runWorkflow(workflow, userInput, chatHistory);
-    pipeResponse(res, streamResponse);
+    const workflow = await workflowFactory(body);
+
+    const stream = await runWorkflow(workflow, {
+      userInput: lastMessage.content,
+      chatHistory: messages.slice(0, -1) as ChatMessage[],
+    });
+
+    pipeStreamToResponse(res, stream);
   } catch (error) {
     console.error("Chat error:", error);
     return sendJSONResponse(res, 500, {
diff --git a/packages/server/src/server.ts b/packages/server/src/server.ts
index 2b75ea440dcfc9f1e09d02b3fa09dc3daa973490..4b18c771b123e4d7176eda38d53edabf987aa170 100644
--- a/packages/server/src/server.ts
+++ b/packages/server/src/server.ts
@@ -3,24 +3,18 @@ import next from "next";
 import path from "path";
 import { parse } from "url";
 import { handleChat } from "./handlers/chat";
-import type { ServerWorkflow } from "./types";
-
-type NextAppOptions = Omit<Parameters<typeof next>[0], "dir">;
-
-export type LlamaIndexServerOptions = NextAppOptions & {
-  workflow: ServerWorkflow;
-};
+import type { LlamaIndexServerOptions, ServerWorkflow } from "./types";
 
 export class LlamaIndexServer {
   port: number;
   app: ReturnType<typeof next>;
-  workflow: ServerWorkflow;
+  workflowFactory: () => Promise<ServerWorkflow> | ServerWorkflow;
 
   constructor({ workflow, ...nextAppOptions }: LlamaIndexServerOptions) {
     const nextDir = path.join(__dirname, ".."); // location of the .next after build next app
     this.app = next({ ...nextAppOptions, dir: nextDir });
     this.port = nextAppOptions.port ?? 3000;
-    this.workflow = workflow;
+    this.workflowFactory = workflow;
   }
 
   async start() {
@@ -31,7 +25,7 @@ export class LlamaIndexServer {
       const pathname = parsedUrl.pathname;
 
       if (pathname === "/api/chat" && req.method === "POST") {
-        return handleChat(this.workflow, req, res);
+        return handleChat(this.workflowFactory, req, res);
       }
 
       const handle = this.app.getRequestHandler();
diff --git a/packages/server/src/types.ts b/packages/server/src/types.ts
index c0d0744fc962dee9573a660bc353887d394ee6ae..9567c41efd16c10726e7c3d6d93e3f9293f260cc 100644
--- a/packages/server/src/types.ts
+++ b/packages/server/src/types.ts
@@ -4,6 +4,7 @@ import {
   type ChatMessage,
   type ChatResponseChunk,
 } from "llamaindex";
+import type next from "next";
 
 export type AgentInput = {
   userInput: string; // the last message content from the user
@@ -13,3 +14,17 @@ export type AgentInput = {
 export type ServerWorkflow =
   | Workflow<null, AgentInput, ChatResponseChunk>
   | AgentWorkflow;
+
+/**
+ * A factory function that creates a ServerWorkflow instance, possibly asynchronously.
+ * The requestBody parameter is the body from the request, which can be used to customize the workflow per request.
+ */
+export type WorkflowFactory = (
+  requestBody?: unknown,
+) => Promise<ServerWorkflow> | ServerWorkflow;
+
+export type NextAppOptions = Omit<Parameters<typeof next>[0], "dir">;
+
+export type LlamaIndexServerOptions = NextAppOptions & {
+  workflow: WorkflowFactory;
+};
diff --git a/packages/server/src/utils/request.ts b/packages/server/src/utils/request.ts
index 416b97c6abfe56a082df664ffc708bc6438e6b9d..fc5992b1ba9f01497f707138bb7c39c277e85555 100644
--- a/packages/server/src/utils/request.ts
+++ b/packages/server/src/utils/request.ts
@@ -27,15 +27,15 @@ export function sendJSONResponse(
   response.end(typeof body === "string" ? body : JSON.stringify(body));
 }
 
-export async function pipeResponse(
-  serverResponse: ServerResponse,
-  streamResponse: Response,
+export async function pipeStreamToResponse(
+  response: ServerResponse,
+  stream: Response,
 ) {
-  if (!streamResponse.body) return;
-  const reader = streamResponse.body.getReader();
+  if (!stream.body) return;
+  const reader = stream.body.getReader();
   while (true) {
     const { done, value } = await reader.read();
-    if (done) return serverResponse.end();
-    serverResponse.write(value);
+    if (done) return response.end();
+    response.write(value);
   }
 }
diff --git a/packages/server/src/utils/workflow.ts b/packages/server/src/utils/workflow.ts
index 328f9f9755ed99dcf23dc91152132e12fbda29bc..9bebd3198f8311b06fe12a89f3571368842a26b5 100644
--- a/packages/server/src/utils/workflow.ts
+++ b/packages/server/src/utils/workflow.ts
@@ -5,18 +5,17 @@ import {
   StopEvent,
   WorkflowContext,
   WorkflowEvent,
-  type ChatMessage,
   type ChatResponseChunk,
 } from "llamaindex";
 import { ReadableStream } from "stream/web";
-import type { ServerWorkflow } from "../types";
+import type { AgentInput, ServerWorkflow } from "../types";
 
 export async function runWorkflow(
   workflow: ServerWorkflow,
-  userInput: string,
-  chatHistory: ChatMessage[],
+  agentInput: AgentInput,
 ) {
   if (workflow instanceof AgentWorkflow) {
+    const { userInput, chatHistory } = agentInput;
     const context = workflow.run(userInput, { chatHistory });
     const { stream, dataStream } = await createStreamFromWorkflowContext(
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -25,7 +24,7 @@ export async function runWorkflow(
     return LlamaIndexAdapter.toDataStreamResponse(stream, { data: dataStream });
   }
 
-  const context = workflow.run({ userInput, chatHistory });
+  const context = workflow.run(agentInput);
   const { stream, dataStream } = await createStreamFromWorkflowContext(context);
   return LlamaIndexAdapter.toDataStreamResponse(stream, { data: dataStream });
 }