diff --git a/templates/types/streaming/fastapi/README-template.md b/templates/types/streaming/fastapi/README-template.md
index f0bfa5e089a960e2327b6ccdd4548948952b36f3..77fa879319be255a6cb422b65c0b9602e7125076 100644
--- a/templates/types/streaming/fastapi/README-template.md
+++ b/templates/types/streaming/fastapi/README-template.md
@@ -9,7 +9,7 @@ poetry install
 poetry shell
 ```
 
-By default, we use the OpenAI LLM (though you can customize, see app/api/routers/chat.py). As a result you need to specify an `OPENAI_API_KEY` in an .env file in this directory.
+By default, we use the OpenAI LLM (though you can customize, see `app/context.py`). As a result you need to specify an `OPENAI_API_KEY` in an .env file in this directory.
 
 Example `backend/.env` file:
 
diff --git a/templates/types/streaming/fastapi/app/api/routers/chat.py b/templates/types/streaming/fastapi/app/api/routers/chat.py
index c55b3bbed5cb4ddc10a65e78d20796fb2c936362..9dd9eb6ed30544899a02c7eb3678239a5b94ea6d 100644
--- a/templates/types/streaming/fastapi/app/api/routers/chat.py
+++ b/templates/types/streaming/fastapi/app/api/routers/chat.py
@@ -1,10 +1,10 @@
 from typing import List
 
 from fastapi.responses import StreamingResponse
+from llama_index.chat_engine.types import BaseChatEngine
 
-from app.utils.index import get_index
+from app.engine.index import get_chat_engine
 from fastapi import APIRouter, Depends, HTTPException, Request, status
-from llama_index import VectorStoreIndex
 from llama_index.llms.base import ChatMessage
 from llama_index.llms.types import MessageRole
 from pydantic import BaseModel
@@ -25,7 +25,7 @@ class _ChatData(BaseModel):
 async def chat(
     request: Request,
     data: _ChatData,
-    index: VectorStoreIndex = Depends(get_index),
+    chat_engine: BaseChatEngine = Depends(get_chat_engine),
 ):
     # check preconditions and get last message
     if len(data.messages) == 0:
@@ -49,7 +49,6 @@ async def chat(
     ]
 
     # query chat engine
-    chat_engine = index.as_chat_engine()
     response = chat_engine.stream_chat(lastMessage.content, messages)
 
     # stream response
diff --git a/templates/types/streaming/fastapi/app/context.py b/templates/types/streaming/fastapi/app/context.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae00de217c8741e080c981cc3fed21f24fe19961
--- /dev/null
+++ b/templates/types/streaming/fastapi/app/context.py
@@ -0,0 +1,11 @@
+import os
+
+from llama_index import ServiceContext
+from llama_index.llms import OpenAI
+
+
+def create_base_context():
+    model = os.getenv("MODEL", "gpt-3.5-turbo")
+    return ServiceContext.from_defaults(
+        llm=OpenAI(model=model),
+    )
diff --git a/templates/types/streaming/fastapi/app/utils/__init__.py b/templates/types/streaming/fastapi/app/engine/__init__.py
similarity index 100%
rename from templates/types/streaming/fastapi/app/utils/__init__.py
rename to templates/types/streaming/fastapi/app/engine/__init__.py
diff --git a/templates/types/streaming/fastapi/app/engine/constants.py b/templates/types/streaming/fastapi/app/engine/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..6dba7d2e0b870bfbec91f19e054d775ab25ceb23
--- /dev/null
+++ b/templates/types/streaming/fastapi/app/engine/constants.py
@@ -0,0 +1,4 @@
+STORAGE_DIR = "./storage"  # directory to cache the generated index
+DATA_DIR = "./data"  # directory containing the documents to index
+CHUNK_SIZE = 1024
+CHUNK_OVERLAP = 20
diff --git a/templates/types/streaming/fastapi/app/engine/context.py b/templates/types/streaming/fastapi/app/engine/context.py
new file mode 100644
index 0000000000000000000000000000000000000000..4756d813d812c7244df2818ef81f426e6fe13ca0
--- /dev/null
+++ b/templates/types/streaming/fastapi/app/engine/context.py
@@ -0,0 +1,13 @@
+from llama_index import ServiceContext
+
+from app.context import create_base_context
+from app.engine.constants import CHUNK_SIZE, CHUNK_OVERLAP
+
+
+def create_service_context():
+    base = create_base_context()
+    return ServiceContext.from_defaults(
+        llm=base.llm,
+        chunk_size=CHUNK_SIZE,
+        chunk_overlap=CHUNK_OVERLAP,
+    )
diff --git a/templates/types/streaming/fastapi/app/engine/generate.py b/templates/types/streaming/fastapi/app/engine/generate.py
new file mode 100644
index 0000000000000000000000000000000000000000..3abb7491abe405d9d5377e37f78604ee1f9bd2b1
--- /dev/null
+++ b/templates/types/streaming/fastapi/app/engine/generate.py
@@ -0,0 +1,30 @@
+import logging
+
+from dotenv import load_dotenv
+
+from app.engine.constants import DATA_DIR, STORAGE_DIR
+from app.engine.context import create_service_context
+
+load_dotenv()
+
+from llama_index import (
+    SimpleDirectoryReader,
+    VectorStoreIndex,
+)
+
+logger = logging.getLogger("uvicorn")
+
+
+def generate_datasource(service_context):
+    logger.info("Creating new index")
+    # load the documents and create the index
+    documents = SimpleDirectoryReader(DATA_DIR).load_data()
+    index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+    # store it for later
+    index.storage_context.persist(STORAGE_DIR)
+    logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
+
+
+if __name__ == "__main__":
+    service_context = create_service_context()
+    generate_datasource(service_context)
diff --git a/templates/types/streaming/fastapi/app/engine/index.py b/templates/types/streaming/fastapi/app/engine/index.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f7d36030c9485b359e2c23c855fd6e2ddc90fef
--- /dev/null
+++ b/templates/types/streaming/fastapi/app/engine/index.py
@@ -0,0 +1,25 @@
+import logging
+import os
+from llama_index import (
+    StorageContext,
+    load_index_from_storage,
+)
+
+from app.engine.constants import STORAGE_DIR
+from app.engine.context import create_service_context
+
+
+def get_chat_engine():
+    service_context = create_service_context()
+    # check if storage already exists
+    if not os.path.exists(STORAGE_DIR):
+        raise Exception(
+            "StorageContext is empty - call 'npm run generate' to generate the storage first"
+        )
+    logger = logging.getLogger("uvicorn")
+    # load the existing index
+    logger.info(f"Loading index from {STORAGE_DIR}...")
+    storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
+    index = load_index_from_storage(storage_context, service_context=service_context)
+    logger.info(f"Finished loading index from {STORAGE_DIR}")
+    return index.as_chat_engine()
diff --git a/templates/types/streaming/fastapi/app/utils/index.py b/templates/types/streaming/fastapi/app/utils/index.py
deleted file mode 100644
index cb16cdba37897fc203bdd0358c5f088628375a3f..0000000000000000000000000000000000000000
--- a/templates/types/streaming/fastapi/app/utils/index.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import logging
-import os
-
-from llama_index import (
-    SimpleDirectoryReader,
-    StorageContext,
-    VectorStoreIndex,
-    load_index_from_storage,
-    ServiceContext,
-)
-from llama_index.llms import OpenAI
-
-
-STORAGE_DIR = "./storage"  # directory to cache the generated index
-DATA_DIR = "./data"  # directory containing the documents to index
-
-service_context = ServiceContext.from_defaults(
-    llm=OpenAI(model="gpt-3.5-turbo")
-)
-
-def get_index():
-    logger = logging.getLogger("uvicorn")
-    # check if storage already exists
-    if not os.path.exists(STORAGE_DIR):
-        logger.info("Creating new index")
-        # load the documents and create the index
-        documents = SimpleDirectoryReader(DATA_DIR).load_data()
-        index = VectorStoreIndex.from_documents(documents,service_context=service_context)
-        # store it for later
-        index.storage_context.persist(STORAGE_DIR)
-        logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
-    else:
-        # load the existing index
-        logger.info(f"Loading index from {STORAGE_DIR}...")
-        storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
-        index = load_index_from_storage(storage_context,service_context=service_context)
-        logger.info(f"Finished loading index from {STORAGE_DIR}")
-    return index
diff --git a/templates/types/streaming/fastapi/main.py b/templates/types/streaming/fastapi/main.py
index 00cb79c44d58f4819243b8ee16c3108bef950382..ba56f0345bacc5ad73e4218a781bee57427e1ec9 100644
--- a/templates/types/streaming/fastapi/main.py
+++ b/templates/types/streaming/fastapi/main.py
@@ -1,4 +1,5 @@
 from dotenv import load_dotenv
+
 load_dotenv()
 
 import logging