diff --git a/packages/create-llama/templates/.gitignore b/packages/create-llama/templates/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ec6c67b630467343abb46cfeea0535ce4b339554 --- /dev/null +++ b/packages/create-llama/templates/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +poetry.lock +storage diff --git a/packages/create-llama/templates/streaming/fastapi/README-template.md b/packages/create-llama/templates/streaming/fastapi/README-template.md new file mode 100644 index 0000000000000000000000000000000000000000..baa5fa63fcb1c07f8d74af3aa2eabd7bb493fda2 --- /dev/null +++ b/packages/create-llama/templates/streaming/fastapi/README-template.md @@ -0,0 +1,36 @@ +This is a [LlamaIndex](https://www.llamaindex.ai/) project using [FastAPI](https://fastapi.tiangolo.com/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama). + +## Getting Started + +First, setup the environment: + +``` +poetry install +poetry shell +``` + +Second, run the development server: + +``` +python main.py +``` + +Then call the API endpoint `/api/chat` to see the result: + +``` +curl --location 'localhost:8000/api/chat' \ +--header 'Content-Type: application/json' \ +--data '{ "messages": [{ "role": "user", "content": "Hello" }] }' +``` + +You can start editing the API by modifying `app/api/routers/chat.py`. The endpoint auto-updates as you save the file. + +Open [http://localhost:8000/docs](http://localhost:8000/docs) with your browser to see the Swagger UI of the API. + +## Learn More + +To learn more about LlamaIndex, take a look at the following resources: + +- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex. + +You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome! diff --git a/packages/create-llama/templates/streaming/fastapi/app/__init__.py b/packages/create-llama/templates/streaming/fastapi/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/packages/create-llama/templates/streaming/fastapi/app/api/__init__.py b/packages/create-llama/templates/streaming/fastapi/app/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/packages/create-llama/templates/streaming/fastapi/app/api/routers/__init__.py b/packages/create-llama/templates/streaming/fastapi/app/api/routers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/packages/create-llama/templates/streaming/fastapi/app/api/routers/chat.py b/packages/create-llama/templates/streaming/fastapi/app/api/routers/chat.py new file mode 100644 index 0000000000000000000000000000000000000000..bc9b5ed651efff1a91f659b8130004b0b90be6a9 --- /dev/null +++ b/packages/create-llama/templates/streaming/fastapi/app/api/routers/chat.py @@ -0,0 +1,79 @@ +import logging +import os +from typing import List +from fastapi import APIRouter, Depends, HTTPException, Request, status +from llama_index import ( + StorageContext, + load_index_from_storage, + SimpleDirectoryReader, + VectorStoreIndex, +) +from llama_index.llms.base import MessageRole +from pydantic import BaseModel +from sse_starlette.sse import EventSourceResponse + +STORAGE_DIR = "./storage" # directory to cache the generated index +DATA_DIR = "./data" # directory containing the documents to index + +chat_router = r = APIRouter() + + +class Message(BaseModel): + role: MessageRole + content: str + + +class _ChatData(BaseModel): + messages: List[Message] + + +def get_index(): + logger = logging.getLogger("uvicorn") + # check if storage already exists + if not os.path.exists(STORAGE_DIR): + logger.info("Creating new index") + # load the documents and create the index + documents = SimpleDirectoryReader(DATA_DIR).load_data() + index = VectorStoreIndex.from_documents(documents) + # store it for later + index.storage_context.persist(STORAGE_DIR) + logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}") + else: + # load the existing index + logger.info(f"Loading index from {STORAGE_DIR}...") + storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR) + index = load_index_from_storage(storage_context) + logger.info(f"Finished loading index from {STORAGE_DIR}") + return index + + +@r.post("/") +async def chat( + request: Request, data: _ChatData, index: VectorStoreIndex = Depends(get_index) +) -> Message: + # check preconditions + if len(data.messages) == 0: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No messages provided", + ) + lastMessage = data.messages.pop() + if lastMessage.role != MessageRole.USER: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Last message must be from user", + ) + + # query chat engine + chat_engine = index.as_chat_engine() + response = chat_engine.stream_chat(lastMessage.content, data.messages) + + # stream response + async def event_generator(): + for token in response.response_gen: + # If client closes connection, stop sending events + if await request.is_disconnected(): + break + yield token + + return EventSourceResponse(event_generator()) diff --git a/packages/create-llama/templates/streaming/fastapi/data/brk-2022.pdf b/packages/create-llama/templates/streaming/fastapi/data/brk-2022.pdf new file mode 100644 index 0000000000000000000000000000000000000000..876ea6a37f0fe785dffc74434e805b65a802b91f Binary files /dev/null and b/packages/create-llama/templates/streaming/fastapi/data/brk-2022.pdf differ diff --git a/packages/create-llama/templates/streaming/fastapi/gitignore b/packages/create-llama/templates/streaming/fastapi/gitignore new file mode 100644 index 0000000000000000000000000000000000000000..069fcb4020566da83dbc398c06dce5bbe92aface --- /dev/null +++ b/packages/create-llama/templates/streaming/fastapi/gitignore @@ -0,0 +1,2 @@ +__pycache__ +storage diff --git a/packages/create-llama/templates/streaming/fastapi/main.py b/packages/create-llama/templates/streaming/fastapi/main.py new file mode 100644 index 0000000000000000000000000000000000000000..e307354bc3b935a52d96c0e138a937969df4d4cf --- /dev/null +++ b/packages/create-llama/templates/streaming/fastapi/main.py @@ -0,0 +1,23 @@ +import os +import uvicorn +from app.api.routers.chat import chat_router +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +app = FastAPI() + +origin = os.getenv("CORS_ORIGIN") +if origin: + app.add_middleware( + CORSMiddleware, + allow_origins=[origin], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + +app.include_router(chat_router, prefix="/api/chat") + + +if __name__ == "__main__": + uvicorn.run(app="main:app", host="0.0.0.0", reload=True) diff --git a/packages/create-llama/templates/streaming/fastapi/pyproject.toml b/packages/create-llama/templates/streaming/fastapi/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..73d3cc51070a81f764999100cc7d7ec1df8f36c8 --- /dev/null +++ b/packages/create-llama/templates/streaming/fastapi/pyproject.toml @@ -0,0 +1,19 @@ +[tool.poetry] +name = "llamaindex-fastapi-streaming" +version = "0.1.0" +description = "" +authors = ["Marcus Schiesser <mail@marcusschiesser.de>"] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.11,<3.12" +fastapi = "^0.104.1" +uvicorn = { extras = ["standard"], version = "^0.23.2" } +llama-index = "^0.8.56" +pypdf = "^3.17.0" +sse-starlette = "^1.6.5" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/packages/create-llama/templates/streaming/fastapi/tests/__init__.py b/packages/create-llama/templates/streaming/fastapi/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391