From d7a47abe38a329acd375250cb12118275df60961 Mon Sep 17 00:00:00 2001
From: Laurie Voss <github@seldo.com>
Date: Wed, 15 Nov 2023 15:52:56 -0800
Subject: [PATCH] Lots of new docs

---
 packages/create-llama/README.md               | 72 +++++++++++++++----
 .../types/simple/fastapi/app/utils/index.py   |  9 ++-
 .../streaming/fastapi/app/utils/index.py      |  9 ++-
 3 files changed, 74 insertions(+), 16 deletions(-)

diff --git a/packages/create-llama/README.md b/packages/create-llama/README.md
index 29326943f..1f6ba4777 100644
--- a/packages/create-llama/README.md
+++ b/packages/create-llama/README.md
@@ -2,19 +2,69 @@
 
 The easiest way to get started with [LlamaIndex](https://www.llamaindex.ai/) is by using `create-llama`. This CLI tool enables you to quickly start building a new LlamaIndex application, with everything set up for you.
 
-## Features
+Just run
 
-- NextJS, ExpressJS, or FastAPI (python) stateless backend generation 💻
-- Streaming or non-streaming backend âš¡
-- Optional `shadcn` frontend generation 🎨
+```bash
+npx create-llama@latest
+```
+
+to get started, or see below for more options. Once your app is generated, run
+
+```bash
+npm run dev
+```
+
+to start the development server. You can then visit [http://localhost:3000](http://localhost:3000) to see your app.
+
+## What you'll get
+
+- A Next.js-powered front-end. The app is set up as a chat interface that can answer questions about your data (see below)
+  - You can style it with HTML and CSS, or you can optionally use components from [shadcn/ui](https://ui.shadcn.com/)
+- Your choice of 3 back-ends:
+  - **Next.js**: if you select this option, you’ll have a full stack Next.js application that you can deploy to a host like [Vercel](https://vercel.com/) in just a few clicks. This uses [LlamaIndex.TS](https://www.npmjs.com/package/llamaindex), our TypeScript library.
+  - **Express**: if you want a more traditional Node.js application you can generate an Express backend. This also uses LlamaIndex.TS.
+  - **Python FastAPI**: if you select this option you’ll get a backend powered by the [llama-index python package](https://pypi.org/project/llama-index/), which you can deploy to a service like Render or fly.io.
+- The back-end has a single endpoint that allows you to send the state of your chat and receive additional responses
+- You can choose whether you want a streaming or non-streaming back-end (if you're not sure, we recommend streaming)
+- You can choose whether you want to use `ContextChatEngine` or `SimpleChatEngine`
+  - `SimpleChatEngine` will just talk to the LLM directly without using your data
+  - `ContextChatEngine` will use your data to answer questions (see below).
+- The app uses OpenAI by default, so you'll need an OpenAI API key, or you can customize it to use any of the dozens of LLMs we support.
+
+## Using your data
+
+If you've enabled `ContextChatEngine`, you can supply your own data and the app will index it and answer questions. Your generated app will have a folder called `data`:
 
-## Get Started
+- With the Next.js backend this is `./data`
+- With the Express or Python backend this is in `./backend/data`
 
-You can run `create-llama` in interactive or non-interactive mode.
+The app will ingest any supported files you put in this directory. Your Next.js and Express apps use LlamaIndex.TS so they will be able to ingest any PDF, text, CSV, Markdown, Word and HTML files. The Python backend can read even more types, including video and audio files.
 
-### Interactive
+Before you can use your data, you need to index it. If you're using the Next.js or Express apps, run:
 
-You can create a new project interactively by running:
+```bash
+npm run generate
+```
+
+Then re-start your app. Remember you'll need to re-run `generate` if you add new files to your `data` folder. If you're using the Python backend, you can trigger indexing of your data by deleting the `./storage` folder and re-starting the app.
+
+## Don't want a front-end?
+
+It's optional! If you've selected the Python or Express back-ends, just delete the `frontend` folder and you'll get an API without any front-end code.
+
+## Customizing the LLM
+
+By default the app will use OpenAI's gpt-3.5-turbo model. If you want to use GPT-4, you can modify this by editing a file:
+
+- In the Next.js backend, edit `./app/api/chat/route.ts` and replace `gpt-3.5-turbo` with `gpt-4`
+- In the Express backend, edit `./backend/src/controllers/chat.controller.ts` and likewise replace `gpt-3.5-turbo` with `gpt-4`
+- In the Python backend, edit `./backend/app/utils/index.py` and once again replace `gpt-3.5-turbo` with `gpt-4`
+
+You can also replace OpenAI with one of our [dozens of other supported LLMs](https://docs.llamaindex.ai/en/stable/module_guides/models/llms/modules.html).
+
+## Example
+
+The simplest thing to do is run `create-llama` in interactive mode:
 
 ```bash
 npx create-llama@latest
@@ -26,9 +76,7 @@ yarn create llama
 pnpm create llama@latest
 ```
 
-You will be asked for the name of your project, along with other configuration options.
-
-Here is an example:
+You will be asked for the name of your project, along with other configuration options, something like this:
 
 ```bash
 >> npm create llama@latest
@@ -45,7 +93,7 @@ Ok to proceed? (y) y
 Creating a new LlamaIndex app in /home/my-app.
 ```
 
-### Non-interactive
+### Running non-interactively
 
 You can also pass command line arguments to set up a new project
 non-interactively. See `create-llama --help`:
diff --git a/packages/create-llama/templates/types/simple/fastapi/app/utils/index.py b/packages/create-llama/templates/types/simple/fastapi/app/utils/index.py
index 076ca7663..0ee09b19d 100644
--- a/packages/create-llama/templates/types/simple/fastapi/app/utils/index.py
+++ b/packages/create-llama/templates/types/simple/fastapi/app/utils/index.py
@@ -6,12 +6,17 @@ from llama_index import (
     StorageContext,
     VectorStoreIndex,
     load_index_from_storage,
+    ServiceContext,
 )
+from llama_index.llms import OpenAI
 
 
 STORAGE_DIR = "./storage"  # directory to cache the generated index
 DATA_DIR = "./data"  # directory containing the documents to index
 
+service_context = ServiceContext.from_defaults(
+    llm=OpenAI("gpt-3.5-turbo")
+)
 
 def get_index():
     logger = logging.getLogger("uvicorn")
@@ -20,7 +25,7 @@ def get_index():
         logger.info("Creating new index")
         # load the documents and create the index
         documents = SimpleDirectoryReader(DATA_DIR).load_data()
-        index = VectorStoreIndex.from_documents(documents)
+        index = VectorStoreIndex.from_documents(documents,service_context=service_context)
         # store it for later
         index.storage_context.persist(STORAGE_DIR)
         logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
@@ -28,6 +33,6 @@ def get_index():
         # load the existing index
         logger.info(f"Loading index from {STORAGE_DIR}...")
         storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
-        index = load_index_from_storage(storage_context)
+        index = load_index_from_storage(storage_context,service_context=service_context)
         logger.info(f"Finished loading index from {STORAGE_DIR}")
     return index
diff --git a/packages/create-llama/templates/types/streaming/fastapi/app/utils/index.py b/packages/create-llama/templates/types/streaming/fastapi/app/utils/index.py
index 076ca7663..0ee09b19d 100644
--- a/packages/create-llama/templates/types/streaming/fastapi/app/utils/index.py
+++ b/packages/create-llama/templates/types/streaming/fastapi/app/utils/index.py
@@ -6,12 +6,17 @@ from llama_index import (
     StorageContext,
     VectorStoreIndex,
     load_index_from_storage,
+    ServiceContext,
 )
+from llama_index.llms import OpenAI
 
 
 STORAGE_DIR = "./storage"  # directory to cache the generated index
 DATA_DIR = "./data"  # directory containing the documents to index
 
+service_context = ServiceContext.from_defaults(
+    llm=OpenAI("gpt-3.5-turbo")
+)
 
 def get_index():
     logger = logging.getLogger("uvicorn")
@@ -20,7 +25,7 @@ def get_index():
         logger.info("Creating new index")
         # load the documents and create the index
         documents = SimpleDirectoryReader(DATA_DIR).load_data()
-        index = VectorStoreIndex.from_documents(documents)
+        index = VectorStoreIndex.from_documents(documents,service_context=service_context)
         # store it for later
         index.storage_context.persist(STORAGE_DIR)
         logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
@@ -28,6 +33,6 @@ def get_index():
         # load the existing index
         logger.info(f"Loading index from {STORAGE_DIR}...")
         storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
-        index = load_index_from_storage(storage_context)
+        index = load_index_from_storage(storage_context,service_context=service_context)
         logger.info(f"Finished loading index from {STORAGE_DIR}")
     return index
-- 
GitLab