Skip to content
Snippets Groups Projects
Commit 24ea84f1 authored by Marcus Schiesser's avatar Marcus Schiesser
Browse files

feat[cl-fastapi]: draft for new fastapi structure (supporting engines)

parent db34efaa
No related branches found
No related tags found
No related merge requests found
...@@ -9,7 +9,7 @@ poetry install ...@@ -9,7 +9,7 @@ poetry install
poetry shell poetry shell
``` ```
By default, we use the OpenAI LLM (though you can customize, see app/api/routers/chat.py). As a result you need to specify an `OPENAI_API_KEY` in an .env file in this directory. By default, we use the OpenAI LLM (though you can customize, see `app/context.py`). As a result you need to specify an `OPENAI_API_KEY` in an .env file in this directory.
Example `backend/.env` file: Example `backend/.env` file:
......
from typing import List from typing import List
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
from llama_index.chat_engine.types import BaseChatEngine
from app.utils.index import get_index from app.engine.index import get_chat_engine
from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi import APIRouter, Depends, HTTPException, Request, status
from llama_index import VectorStoreIndex
from llama_index.llms.base import ChatMessage from llama_index.llms.base import ChatMessage
from llama_index.llms.types import MessageRole from llama_index.llms.types import MessageRole
from pydantic import BaseModel from pydantic import BaseModel
...@@ -25,7 +25,7 @@ class _ChatData(BaseModel): ...@@ -25,7 +25,7 @@ class _ChatData(BaseModel):
async def chat( async def chat(
request: Request, request: Request,
data: _ChatData, data: _ChatData,
index: VectorStoreIndex = Depends(get_index), chat_engine: BaseChatEngine = Depends(get_chat_engine),
): ):
# check preconditions and get last message # check preconditions and get last message
if len(data.messages) == 0: if len(data.messages) == 0:
...@@ -49,7 +49,6 @@ async def chat( ...@@ -49,7 +49,6 @@ async def chat(
] ]
# query chat engine # query chat engine
chat_engine = index.as_chat_engine()
response = chat_engine.stream_chat(lastMessage.content, messages) response = chat_engine.stream_chat(lastMessage.content, messages)
# stream response # stream response
......
import os
from llama_index import ServiceContext
from llama_index.llms import OpenAI
def create_base_context():
model = os.getenv("MODEL", "gpt-3.5-turbo")
return ServiceContext.from_defaults(
llm=OpenAI(model=model),
)
STORAGE_DIR = "./storage" # directory to cache the generated index
DATA_DIR = "./data" # directory containing the documents to index
CHUNK_SIZE = 1024
CHUNK_OVERLAP = 20
from llama_index import ServiceContext
from app.context import create_base_context
from app.engine.constants import CHUNK_SIZE, CHUNK_OVERLAP
def create_service_context():
base = create_base_context()
return ServiceContext.from_defaults(
llm=base.llm,
chunk_size=CHUNK_SIZE,
chunk_overlap=CHUNK_OVERLAP,
)
import logging
from dotenv import load_dotenv
from app.engine.constants import DATA_DIR, STORAGE_DIR
from app.engine.context import create_service_context
load_dotenv()
from llama_index import (
SimpleDirectoryReader,
VectorStoreIndex,
)
logger = logging.getLogger("uvicorn")
def generate_datasource(service_context):
logger.info("Creating new index")
# load the documents and create the index
documents = SimpleDirectoryReader(DATA_DIR).load_data()
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
# store it for later
index.storage_context.persist(STORAGE_DIR)
logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
if __name__ == "__main__":
service_context = create_service_context()
generate_datasource(service_context)
import logging
import os
from llama_index import (
StorageContext,
load_index_from_storage,
)
from app.engine.constants import STORAGE_DIR
from app.engine.context import create_service_context
def get_chat_engine():
service_context = create_service_context()
# check if storage already exists
if not os.path.exists(STORAGE_DIR):
raise Exception(
"StorageContext is empty - call 'npm run generate' to generate the storage first"
)
logger = logging.getLogger("uvicorn")
# load the existing index
logger.info(f"Loading index from {STORAGE_DIR}...")
storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
index = load_index_from_storage(storage_context, service_context=service_context)
logger.info(f"Finished loading index from {STORAGE_DIR}")
return index.as_chat_engine()
import logging
import os
from llama_index import (
SimpleDirectoryReader,
StorageContext,
VectorStoreIndex,
load_index_from_storage,
ServiceContext,
)
from llama_index.llms import OpenAI
STORAGE_DIR = "./storage" # directory to cache the generated index
DATA_DIR = "./data" # directory containing the documents to index
service_context = ServiceContext.from_defaults(
llm=OpenAI(model="gpt-3.5-turbo")
)
def get_index():
logger = logging.getLogger("uvicorn")
# check if storage already exists
if not os.path.exists(STORAGE_DIR):
logger.info("Creating new index")
# load the documents and create the index
documents = SimpleDirectoryReader(DATA_DIR).load_data()
index = VectorStoreIndex.from_documents(documents,service_context=service_context)
# store it for later
index.storage_context.persist(STORAGE_DIR)
logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
else:
# load the existing index
logger.info(f"Loading index from {STORAGE_DIR}...")
storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
index = load_index_from_storage(storage_context,service_context=service_context)
logger.info(f"Finished loading index from {STORAGE_DIR}")
return index
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import logging import logging
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment