From 0cf6386e173f3a25a89a4bb2c199cb774cac8aa5 Mon Sep 17 00:00:00 2001
From: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Date: Mon, 8 Apr 2024 15:27:07 +0700
Subject: [PATCH] feat: use setting config (#38)

---
 .../engines/typescript/agent/chat.ts          |  8 +++----
 .../engines/typescript/chat/chat.ts           |  8 +++----
 .../vectordbs/typescript/astra/generate.ts    |  2 ++
 .../vectordbs/typescript/astra/index.ts       | 18 ++++-----------
 .../vectordbs/typescript/astra/shared.ts      |  3 ---
 .../vectordbs/typescript/milvus/generate.ts   |  2 ++
 .../vectordbs/typescript/milvus/index.ts      | 23 ++++---------------
 .../vectordbs/typescript/milvus/shared.ts     |  3 ---
 .../vectordbs/typescript/mongo/generate.ts    |  2 ++
 .../vectordbs/typescript/mongo/index.ts       | 18 ++++-----------
 .../vectordbs/typescript/mongo/shared.ts      |  3 ---
 .../vectordbs/typescript/none/generate.ts     | 21 +++++------------
 .../vectordbs/typescript/none/index.ts        | 12 ++--------
 .../vectordbs/typescript/none/shared.ts       |  2 --
 .../vectordbs/typescript/pg/generate.ts       |  2 ++
 .../vectordbs/typescript/pg/index.ts          | 18 +++------------
 .../vectordbs/typescript/pg/shared.ts         |  2 --
 .../vectordbs/typescript/pinecone/generate.ts |  2 ++
 .../vectordbs/typescript/pinecone/index.ts    | 18 ++++-----------
 .../vectordbs/typescript/pinecone/shared.ts   |  3 ---
 .../controllers/chat-request.controller.ts    |  8 ++-----
 .../src/controllers/chat.controller.ts        |  8 ++-----
 .../express/src/controllers/engine/chat.ts    |  6 ++---
 .../src/controllers/engine/settings.ts        | 13 +++++++++++
 .../express/src/routes/chat.route.ts          |  2 ++
 .../nextjs/app/api/chat/engine/chat.ts        |  6 ++---
 .../nextjs/app/api/chat/engine/settings.ts    | 13 +++++++++++
 .../streaming/nextjs/app/api/chat/route.ts    | 11 ++++-----
 28 files changed, 87 insertions(+), 150 deletions(-)
 create mode 100644 templates/types/streaming/express/src/controllers/engine/settings.ts
 create mode 100644 templates/types/streaming/nextjs/app/api/chat/engine/settings.ts

diff --git a/templates/components/engines/typescript/agent/chat.ts b/templates/components/engines/typescript/agent/chat.ts
index 51a61ba0..9c82dbab 100644
--- a/templates/components/engines/typescript/agent/chat.ts
+++ b/templates/components/engines/typescript/agent/chat.ts
@@ -1,8 +1,8 @@
 import {
   BaseTool,
-  OpenAI,
   OpenAIAgent,
   QueryEngineTool,
+  Settings,
   ToolFactory,
 } from "llamaindex";
 import fs from "node:fs/promises";
@@ -10,12 +10,12 @@ import path from "node:path";
 import { getDataSource } from "./index";
 import { STORAGE_CACHE_DIR } from "./shared";
 
-export async function createChatEngine(llm: OpenAI) {
+export async function createChatEngine() {
   let tools: BaseTool[] = [];
 
   // Add a query engine tool if we have a data source
   // Delete this code if you don't have a data source
-  const index = await getDataSource(llm);
+  const index = await getDataSource();
   if (index) {
     tools.push(
       new QueryEngineTool({
@@ -38,7 +38,7 @@ export async function createChatEngine(llm: OpenAI) {
 
   return new OpenAIAgent({
     tools,
-    llm,
+    llm: Settings.llm,
     verbose: true,
   });
 }
diff --git a/templates/components/engines/typescript/chat/chat.ts b/templates/components/engines/typescript/chat/chat.ts
index 2feea01b..62cc77df 100644
--- a/templates/components/engines/typescript/chat/chat.ts
+++ b/templates/components/engines/typescript/chat/chat.ts
@@ -1,8 +1,8 @@
-import { ContextChatEngine, LLM } from "llamaindex";
+import { ContextChatEngine, Settings } from "llamaindex";
 import { getDataSource } from "./index";
 
-export async function createChatEngine(llm: LLM) {
-  const index = await getDataSource(llm);
+export async function createChatEngine() {
+  const index = await getDataSource();
   if (!index) {
     throw new Error(
       `StorageContext is empty - call 'npm run generate' to generate the storage first`,
@@ -12,7 +12,7 @@ export async function createChatEngine(llm: LLM) {
   retriever.similarityTopK = 3;
 
   return new ContextChatEngine({
-    chatModel: llm,
+    chatModel: Settings.llm,
     retriever,
   });
 }
diff --git a/templates/components/vectordbs/typescript/astra/generate.ts b/templates/components/vectordbs/typescript/astra/generate.ts
index ca13f51e..c29e390c 100644
--- a/templates/components/vectordbs/typescript/astra/generate.ts
+++ b/templates/components/vectordbs/typescript/astra/generate.ts
@@ -6,6 +6,7 @@ import {
   storageContextFromDefaults,
 } from "llamaindex";
 import { getDocuments } from "./loader";
+import { initSettings } from "./settings";
 import { checkRequiredEnvVars } from "./shared";
 
 dotenv.config();
@@ -33,6 +34,7 @@ async function loadAndIndex() {
 
 (async () => {
   checkRequiredEnvVars();
+  initSettings();
   await loadAndIndex();
   console.log("Finished generating storage.");
 })();
diff --git a/templates/components/vectordbs/typescript/astra/index.ts b/templates/components/vectordbs/typescript/astra/index.ts
index d1ac6c11..c659bc92 100644
--- a/templates/components/vectordbs/typescript/astra/index.ts
+++ b/templates/components/vectordbs/typescript/astra/index.ts
@@ -1,20 +1,10 @@
 /* eslint-disable turbo/no-undeclared-env-vars */
-import {
-  AstraDBVectorStore,
-  LLM,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
-import { CHUNK_OVERLAP, CHUNK_SIZE, checkRequiredEnvVars } from "./shared";
+import { AstraDBVectorStore, VectorStoreIndex } from "llamaindex";
+import { checkRequiredEnvVars } from "./shared";
 
-export async function getDataSource(llm: LLM) {
+export async function getDataSource() {
   checkRequiredEnvVars();
-  const serviceContext = serviceContextFromDefaults({
-    llm,
-    chunkSize: CHUNK_SIZE,
-    chunkOverlap: CHUNK_OVERLAP,
-  });
   const store = new AstraDBVectorStore();
   await store.connect(process.env.ASTRA_DB_COLLECTION!);
-  return await VectorStoreIndex.fromVectorStore(store, serviceContext);
+  return await VectorStoreIndex.fromVectorStore(store);
 }
diff --git a/templates/components/vectordbs/typescript/astra/shared.ts b/templates/components/vectordbs/typescript/astra/shared.ts
index fb240187..d923d1b2 100644
--- a/templates/components/vectordbs/typescript/astra/shared.ts
+++ b/templates/components/vectordbs/typescript/astra/shared.ts
@@ -1,6 +1,3 @@
-export const CHUNK_SIZE = 512;
-export const CHUNK_OVERLAP = 20;
-
 const REQUIRED_ENV_VARS = [
   "ASTRA_DB_APPLICATION_TOKEN",
   "ASTRA_DB_ENDPOINT",
diff --git a/templates/components/vectordbs/typescript/milvus/generate.ts b/templates/components/vectordbs/typescript/milvus/generate.ts
index bb2a33cf..709d3546 100644
--- a/templates/components/vectordbs/typescript/milvus/generate.ts
+++ b/templates/components/vectordbs/typescript/milvus/generate.ts
@@ -6,6 +6,7 @@ import {
   storageContextFromDefaults,
 } from "llamaindex";
 import { getDocuments } from "./loader";
+import { initSettings } from "./settings";
 import { checkRequiredEnvVars, getMilvusClient } from "./shared";
 
 dotenv.config();
@@ -32,6 +33,7 @@ async function loadAndIndex() {
 
 (async () => {
   checkRequiredEnvVars();
+  initSettings();
   await loadAndIndex();
   console.log("Finished generating storage.");
 })();
diff --git a/templates/components/vectordbs/typescript/milvus/index.ts b/templates/components/vectordbs/typescript/milvus/index.ts
index 05652b30..c10bad6d 100644
--- a/templates/components/vectordbs/typescript/milvus/index.ts
+++ b/templates/components/vectordbs/typescript/milvus/index.ts
@@ -1,25 +1,10 @@
-import {
-  LLM,
-  MilvusVectorStore,
-  serviceContextFromDefaults,
-  VectorStoreIndex,
-} from "llamaindex";
-import {
-  checkRequiredEnvVars,
-  CHUNK_OVERLAP,
-  CHUNK_SIZE,
-  getMilvusClient,
-} from "./shared";
+import { MilvusVectorStore, VectorStoreIndex } from "llamaindex";
+import { checkRequiredEnvVars, getMilvusClient } from "./shared";
 
-export async function getDataSource(llm: LLM) {
+export async function getDataSource() {
   checkRequiredEnvVars();
-  const serviceContext = serviceContextFromDefaults({
-    llm,
-    chunkSize: CHUNK_SIZE,
-    chunkOverlap: CHUNK_OVERLAP,
-  });
   const milvusClient = getMilvusClient();
   const store = new MilvusVectorStore({ milvusClient });
 
-  return await VectorStoreIndex.fromVectorStore(store, serviceContext);
+  return await VectorStoreIndex.fromVectorStore(store);
 }
diff --git a/templates/components/vectordbs/typescript/milvus/shared.ts b/templates/components/vectordbs/typescript/milvus/shared.ts
index 5cdb356b..5f4139aa 100644
--- a/templates/components/vectordbs/typescript/milvus/shared.ts
+++ b/templates/components/vectordbs/typescript/milvus/shared.ts
@@ -1,8 +1,5 @@
 import { MilvusClient } from "@zilliz/milvus2-sdk-node";
 
-export const CHUNK_SIZE = 512;
-export const CHUNK_OVERLAP = 20;
-
 const REQUIRED_ENV_VARS = [
   "MILVUS_ADDRESS",
   "MILVUS_USERNAME",
diff --git a/templates/components/vectordbs/typescript/mongo/generate.ts b/templates/components/vectordbs/typescript/mongo/generate.ts
index abfd0a6e..da7aed57 100644
--- a/templates/components/vectordbs/typescript/mongo/generate.ts
+++ b/templates/components/vectordbs/typescript/mongo/generate.ts
@@ -7,6 +7,7 @@ import {
 } from "llamaindex";
 import { MongoClient } from "mongodb";
 import { getDocuments } from "./loader";
+import { initSettings } from "./settings";
 import { checkRequiredEnvVars } from "./shared";
 
 dotenv.config();
@@ -42,6 +43,7 @@ async function loadAndIndex() {
 
 (async () => {
   checkRequiredEnvVars();
+  initSettings();
   await loadAndIndex();
   console.log("Finished generating storage.");
 })();
diff --git a/templates/components/vectordbs/typescript/mongo/index.ts b/templates/components/vectordbs/typescript/mongo/index.ts
index ea2a3962..77dc7b4d 100644
--- a/templates/components/vectordbs/typescript/mongo/index.ts
+++ b/templates/components/vectordbs/typescript/mongo/index.ts
@@ -1,21 +1,11 @@
 /* eslint-disable turbo/no-undeclared-env-vars */
-import {
-  LLM,
-  MongoDBAtlasVectorSearch,
-  serviceContextFromDefaults,
-  VectorStoreIndex,
-} from "llamaindex";
+import { MongoDBAtlasVectorSearch, VectorStoreIndex } from "llamaindex";
 import { MongoClient } from "mongodb";
-import { checkRequiredEnvVars, CHUNK_OVERLAP, CHUNK_SIZE } from "./shared";
+import { checkRequiredEnvVars } from "./shared";
 
-export async function getDataSource(llm: LLM) {
+export async function getDataSource() {
   checkRequiredEnvVars();
   const client = new MongoClient(process.env.MONGO_URI!);
-  const serviceContext = serviceContextFromDefaults({
-    llm,
-    chunkSize: CHUNK_SIZE,
-    chunkOverlap: CHUNK_OVERLAP,
-  });
   const store = new MongoDBAtlasVectorSearch({
     mongodbClient: client,
     dbName: process.env.MONGODB_DATABASE!,
@@ -23,5 +13,5 @@ export async function getDataSource(llm: LLM) {
     indexName: process.env.MONGODB_VECTOR_INDEX,
   });
 
-  return await VectorStoreIndex.fromVectorStore(store, serviceContext);
+  return await VectorStoreIndex.fromVectorStore(store);
 }
diff --git a/templates/components/vectordbs/typescript/mongo/shared.ts b/templates/components/vectordbs/typescript/mongo/shared.ts
index ab467182..d6532a56 100644
--- a/templates/components/vectordbs/typescript/mongo/shared.ts
+++ b/templates/components/vectordbs/typescript/mongo/shared.ts
@@ -1,6 +1,3 @@
-export const CHUNK_SIZE = 512;
-export const CHUNK_OVERLAP = 20;
-
 const REQUIRED_ENV_VARS = [
   "MONGO_URI",
   "MONGODB_DATABASE",
diff --git a/templates/components/vectordbs/typescript/none/generate.ts b/templates/components/vectordbs/typescript/none/generate.ts
index 2773fbef..732ba211 100644
--- a/templates/components/vectordbs/typescript/none/generate.ts
+++ b/templates/components/vectordbs/typescript/none/generate.ts
@@ -1,14 +1,10 @@
-import {
-  ServiceContext,
-  serviceContextFromDefaults,
-  storageContextFromDefaults,
-  VectorStoreIndex,
-} from "llamaindex";
+import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
 
 import * as dotenv from "dotenv";
 
 import { getDocuments } from "./loader";
-import { CHUNK_OVERLAP, CHUNK_SIZE, STORAGE_CACHE_DIR } from "./shared";
+import { initSettings } from "./settings";
+import { STORAGE_CACHE_DIR } from "./shared";
 
 // Load environment variables from local .env file
 dotenv.config();
@@ -20,7 +16,7 @@ async function getRuntime(func: any) {
   return end - start;
 }
 
-async function generateDatasource(serviceContext: ServiceContext) {
+async function generateDatasource() {
   console.log(`Generating storage context...`);
   // Split documents, create embeddings and store them in the storage context
   const ms = await getRuntime(async () => {
@@ -30,18 +26,13 @@ async function generateDatasource(serviceContext: ServiceContext) {
     const documents = await getDocuments();
     await VectorStoreIndex.fromDocuments(documents, {
       storageContext,
-      serviceContext,
     });
   });
   console.log(`Storage context successfully generated in ${ms / 1000}s.`);
 }
 
 (async () => {
-  const serviceContext = serviceContextFromDefaults({
-    chunkSize: CHUNK_SIZE,
-    chunkOverlap: CHUNK_OVERLAP,
-  });
-
-  await generateDatasource(serviceContext);
+  initSettings();
+  await generateDatasource();
   console.log("Finished generating storage.");
 })();
diff --git a/templates/components/vectordbs/typescript/none/index.ts b/templates/components/vectordbs/typescript/none/index.ts
index 474d7e95..919ba3a6 100644
--- a/templates/components/vectordbs/typescript/none/index.ts
+++ b/templates/components/vectordbs/typescript/none/index.ts
@@ -1,18 +1,11 @@
 import {
-  LLM,
-  serviceContextFromDefaults,
   SimpleDocumentStore,
   storageContextFromDefaults,
   VectorStoreIndex,
 } from "llamaindex";
-import { CHUNK_OVERLAP, CHUNK_SIZE, STORAGE_CACHE_DIR } from "./shared";
+import { STORAGE_CACHE_DIR } from "./shared";
 
-export async function getDataSource(llm: LLM) {
-  const serviceContext = serviceContextFromDefaults({
-    llm,
-    chunkSize: CHUNK_SIZE,
-    chunkOverlap: CHUNK_OVERLAP,
-  });
+export async function getDataSource() {
   const storageContext = await storageContextFromDefaults({
     persistDir: `${STORAGE_CACHE_DIR}`,
   });
@@ -25,6 +18,5 @@ export async function getDataSource(llm: LLM) {
   }
   return await VectorStoreIndex.init({
     storageContext,
-    serviceContext,
   });
 }
diff --git a/templates/components/vectordbs/typescript/none/shared.ts b/templates/components/vectordbs/typescript/none/shared.ts
index 42a8664a..e7736e5b 100644
--- a/templates/components/vectordbs/typescript/none/shared.ts
+++ b/templates/components/vectordbs/typescript/none/shared.ts
@@ -1,3 +1 @@
 export const STORAGE_CACHE_DIR = "./cache";
-export const CHUNK_SIZE = 512;
-export const CHUNK_OVERLAP = 20;
diff --git a/templates/components/vectordbs/typescript/pg/generate.ts b/templates/components/vectordbs/typescript/pg/generate.ts
index b41e4c20..f5664b6f 100644
--- a/templates/components/vectordbs/typescript/pg/generate.ts
+++ b/templates/components/vectordbs/typescript/pg/generate.ts
@@ -6,6 +6,7 @@ import {
   storageContextFromDefaults,
 } from "llamaindex";
 import { getDocuments } from "./loader";
+import { initSettings } from "./settings";
 import {
   PGVECTOR_COLLECTION,
   PGVECTOR_SCHEMA,
@@ -37,6 +38,7 @@ async function loadAndIndex() {
 
 (async () => {
   checkRequiredEnvVars();
+  initSettings();
   await loadAndIndex();
   console.log("Finished generating storage.");
   process.exit(0);
diff --git a/templates/components/vectordbs/typescript/pg/index.ts b/templates/components/vectordbs/typescript/pg/index.ts
index 8c504e71..0b572d74 100644
--- a/templates/components/vectordbs/typescript/pg/index.ts
+++ b/templates/components/vectordbs/typescript/pg/index.ts
@@ -1,29 +1,17 @@
 /* eslint-disable turbo/no-undeclared-env-vars */
+import { PGVectorStore, VectorStoreIndex } from "llamaindex";
 import {
-  LLM,
-  PGVectorStore,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
-import {
-  CHUNK_OVERLAP,
-  CHUNK_SIZE,
   PGVECTOR_SCHEMA,
   PGVECTOR_TABLE,
   checkRequiredEnvVars,
 } from "./shared";
 
-export async function getDataSource(llm: LLM) {
+export async function getDataSource() {
   checkRequiredEnvVars();
   const pgvs = new PGVectorStore({
     connectionString: process.env.PG_CONNECTION_STRING,
     schemaName: PGVECTOR_SCHEMA,
     tableName: PGVECTOR_TABLE,
   });
-  const serviceContext = serviceContextFromDefaults({
-    llm,
-    chunkSize: CHUNK_SIZE,
-    chunkOverlap: CHUNK_OVERLAP,
-  });
-  return await VectorStoreIndex.fromVectorStore(pgvs, serviceContext);
+  return await VectorStoreIndex.fromVectorStore(pgvs);
 }
diff --git a/templates/components/vectordbs/typescript/pg/shared.ts b/templates/components/vectordbs/typescript/pg/shared.ts
index ba747934..88774df1 100644
--- a/templates/components/vectordbs/typescript/pg/shared.ts
+++ b/templates/components/vectordbs/typescript/pg/shared.ts
@@ -1,6 +1,4 @@
 export const PGVECTOR_COLLECTION = "data";
-export const CHUNK_SIZE = 512;
-export const CHUNK_OVERLAP = 20;
 export const PGVECTOR_SCHEMA = "public";
 export const PGVECTOR_TABLE = "llamaindex_embedding";
 
diff --git a/templates/components/vectordbs/typescript/pinecone/generate.ts b/templates/components/vectordbs/typescript/pinecone/generate.ts
index d879c6b1..c275bc58 100644
--- a/templates/components/vectordbs/typescript/pinecone/generate.ts
+++ b/templates/components/vectordbs/typescript/pinecone/generate.ts
@@ -6,6 +6,7 @@ import {
   storageContextFromDefaults,
 } from "llamaindex";
 import { getDocuments } from "./loader";
+import { initSettings } from "./settings";
 import { checkRequiredEnvVars } from "./shared";
 
 dotenv.config();
@@ -28,6 +29,7 @@ async function loadAndIndex() {
 
 (async () => {
   checkRequiredEnvVars();
+  initSettings();
   await loadAndIndex();
   console.log("Finished generating storage.");
 })();
diff --git a/templates/components/vectordbs/typescript/pinecone/index.ts b/templates/components/vectordbs/typescript/pinecone/index.ts
index 8fa949e7..226713d9 100644
--- a/templates/components/vectordbs/typescript/pinecone/index.ts
+++ b/templates/components/vectordbs/typescript/pinecone/index.ts
@@ -1,19 +1,9 @@
 /* eslint-disable turbo/no-undeclared-env-vars */
-import {
-  LLM,
-  PineconeVectorStore,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
-import { CHUNK_OVERLAP, CHUNK_SIZE, checkRequiredEnvVars } from "./shared";
+import { PineconeVectorStore, VectorStoreIndex } from "llamaindex";
+import { checkRequiredEnvVars } from "./shared";
 
-export async function getDataSource(llm: LLM) {
+export async function getDataSource() {
   checkRequiredEnvVars();
-  const serviceContext = serviceContextFromDefaults({
-    llm,
-    chunkSize: CHUNK_SIZE,
-    chunkOverlap: CHUNK_OVERLAP,
-  });
   const store = new PineconeVectorStore();
-  return await VectorStoreIndex.fromVectorStore(store, serviceContext);
+  return await VectorStoreIndex.fromVectorStore(store);
 }
diff --git a/templates/components/vectordbs/typescript/pinecone/shared.ts b/templates/components/vectordbs/typescript/pinecone/shared.ts
index ae2fd6b1..c4e9911d 100644
--- a/templates/components/vectordbs/typescript/pinecone/shared.ts
+++ b/templates/components/vectordbs/typescript/pinecone/shared.ts
@@ -1,6 +1,3 @@
-export const CHUNK_SIZE = 512;
-export const CHUNK_OVERLAP = 20;
-
 const REQUIRED_ENV_VARS = ["PINECONE_ENVIRONMENT", "PINECONE_API_KEY"];
 
 export function checkRequiredEnvVars() {
diff --git a/templates/types/streaming/express/src/controllers/chat-request.controller.ts b/templates/types/streaming/express/src/controllers/chat-request.controller.ts
index b81b5999..056edc5d 100644
--- a/templates/types/streaming/express/src/controllers/chat-request.controller.ts
+++ b/templates/types/streaming/express/src/controllers/chat-request.controller.ts
@@ -1,5 +1,5 @@
 import { Request, Response } from "express";
-import { ChatMessage, MessageContent, OpenAI } from "llamaindex";
+import { ChatMessage, MessageContent } from "llamaindex";
 import { createChatEngine } from "./engine/chat";
 
 const convertMessageContent = (
@@ -32,10 +32,6 @@ export const chatRequest = async (req: Request, res: Response) => {
       });
     }
 
-    const llm = new OpenAI({
-      model: process.env.MODEL || "gpt-3.5-turbo",
-    });
-
     // Convert message content from Vercel/AI format to LlamaIndex/OpenAI format
     // Note: The non-streaming template does not need the Vercel/AI format, we're still using it for consistency with the streaming template
     const userMessageContent = convertMessageContent(
@@ -43,7 +39,7 @@ export const chatRequest = async (req: Request, res: Response) => {
       data?.imageUrl,
     );
 
-    const chatEngine = await createChatEngine(llm);
+    const chatEngine = await createChatEngine();
 
     // Calling LlamaIndex's ChatEngine to get a response
     const response = await chatEngine.chat({
diff --git a/templates/types/streaming/express/src/controllers/chat.controller.ts b/templates/types/streaming/express/src/controllers/chat.controller.ts
index 1d9cd56a..7bdf98da 100644
--- a/templates/types/streaming/express/src/controllers/chat.controller.ts
+++ b/templates/types/streaming/express/src/controllers/chat.controller.ts
@@ -1,6 +1,6 @@
 import { streamToResponse } from "ai";
 import { Request, Response } from "express";
-import { ChatMessage, MessageContent, OpenAI } from "llamaindex";
+import { ChatMessage, MessageContent } from "llamaindex";
 import { createChatEngine } from "./engine/chat";
 import { LlamaIndexStream } from "./llamaindex-stream";
 
@@ -34,11 +34,7 @@ export const chat = async (req: Request, res: Response) => {
       });
     }
 
-    const llm = new OpenAI({
-      model: (process.env.MODEL as any) || "gpt-3.5-turbo",
-    });
-
-    const chatEngine = await createChatEngine(llm);
+    const chatEngine = await createChatEngine();
 
     // Convert message content from Vercel/AI format to LlamaIndex/OpenAI format
     const userMessageContent = convertMessageContent(
diff --git a/templates/types/streaming/express/src/controllers/engine/chat.ts b/templates/types/streaming/express/src/controllers/engine/chat.ts
index abb02e90..a42ac41c 100644
--- a/templates/types/streaming/express/src/controllers/engine/chat.ts
+++ b/templates/types/streaming/express/src/controllers/engine/chat.ts
@@ -1,7 +1,7 @@
-import { LLM, SimpleChatEngine } from "llamaindex";
+import { Settings, SimpleChatEngine } from "llamaindex";
 
-export async function createChatEngine(llm: LLM) {
+export async function createChatEngine() {
   return new SimpleChatEngine({
-    llm,
+    llm: Settings.llm,
   });
 }
diff --git a/templates/types/streaming/express/src/controllers/engine/settings.ts b/templates/types/streaming/express/src/controllers/engine/settings.ts
new file mode 100644
index 00000000..25c077a5
--- /dev/null
+++ b/templates/types/streaming/express/src/controllers/engine/settings.ts
@@ -0,0 +1,13 @@
+import { OpenAI, Settings } from "llamaindex";
+
+const CHUNK_SIZE = 512;
+const CHUNK_OVERLAP = 20;
+
+export const initSettings = async () => {
+  Settings.llm = new OpenAI({
+    model: process.env.MODEL ?? "gpt-3.5-turbo",
+    maxTokens: 512,
+  });
+  Settings.chunkSize = CHUNK_SIZE;
+  Settings.chunkOverlap = CHUNK_OVERLAP;
+};
diff --git a/templates/types/streaming/express/src/routes/chat.route.ts b/templates/types/streaming/express/src/routes/chat.route.ts
index 0fc0d4c9..34a3e005 100644
--- a/templates/types/streaming/express/src/routes/chat.route.ts
+++ b/templates/types/streaming/express/src/routes/chat.route.ts
@@ -1,9 +1,11 @@
 import express, { Router } from "express";
 import { chatRequest } from "../controllers/chat-request.controller";
 import { chat } from "../controllers/chat.controller";
+import { initSettings } from "../controllers/engine/settings";
 
 const llmRouter: Router = express.Router();
 
+initSettings();
 llmRouter.route("/").post(chat);
 llmRouter.route("/request").post(chatRequest);
 
diff --git a/templates/types/streaming/nextjs/app/api/chat/engine/chat.ts b/templates/types/streaming/nextjs/app/api/chat/engine/chat.ts
index abb02e90..a42ac41c 100644
--- a/templates/types/streaming/nextjs/app/api/chat/engine/chat.ts
+++ b/templates/types/streaming/nextjs/app/api/chat/engine/chat.ts
@@ -1,7 +1,7 @@
-import { LLM, SimpleChatEngine } from "llamaindex";
+import { Settings, SimpleChatEngine } from "llamaindex";
 
-export async function createChatEngine(llm: LLM) {
+export async function createChatEngine() {
   return new SimpleChatEngine({
-    llm,
+    llm: Settings.llm,
   });
 }
diff --git a/templates/types/streaming/nextjs/app/api/chat/engine/settings.ts b/templates/types/streaming/nextjs/app/api/chat/engine/settings.ts
new file mode 100644
index 00000000..25c077a5
--- /dev/null
+++ b/templates/types/streaming/nextjs/app/api/chat/engine/settings.ts
@@ -0,0 +1,13 @@
+import { OpenAI, Settings } from "llamaindex";
+
+const CHUNK_SIZE = 512;
+const CHUNK_OVERLAP = 20;
+
+export const initSettings = async () => {
+  Settings.llm = new OpenAI({
+    model: process.env.MODEL ?? "gpt-3.5-turbo",
+    maxTokens: 512,
+  });
+  Settings.chunkSize = CHUNK_SIZE;
+  Settings.chunkOverlap = CHUNK_OVERLAP;
+};
diff --git a/templates/types/streaming/nextjs/app/api/chat/route.ts b/templates/types/streaming/nextjs/app/api/chat/route.ts
index 484262f2..92e2f3b4 100644
--- a/templates/types/streaming/nextjs/app/api/chat/route.ts
+++ b/templates/types/streaming/nextjs/app/api/chat/route.ts
@@ -1,11 +1,13 @@
 import { initObservability } from "@/app/observability";
 import { StreamingTextResponse } from "ai";
-import { ChatMessage, MessageContent, OpenAI } from "llamaindex";
+import { ChatMessage, MessageContent } from "llamaindex";
 import { NextRequest, NextResponse } from "next/server";
 import { createChatEngine } from "./engine/chat";
+import { initSettings } from "./engine/settings";
 import { LlamaIndexStream } from "./llamaindex-stream";
 
 initObservability();
+initSettings();
 
 export const runtime = "nodejs";
 export const dynamic = "force-dynamic";
@@ -44,12 +46,7 @@ export async function POST(request: NextRequest) {
       );
     }
 
-    const llm = new OpenAI({
-      model: (process.env.MODEL as any) ?? "gpt-3.5-turbo",
-      maxTokens: 512,
-    });
-
-    const chatEngine = await createChatEngine(llm);
+    const chatEngine = await createChatEngine();
 
     // Convert message content from Vercel/AI format to LlamaIndex/OpenAI format
     const userMessageContent = convertMessageContent(
-- 
GitLab