diff --git a/.changeset/proud-seals-yell.md b/.changeset/proud-seals-yell.md new file mode 100644 index 0000000000000000000000000000000000000000..a1279f21a9d7c47e7aa2f032ef721a63fb5bf699 --- /dev/null +++ b/.changeset/proud-seals-yell.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Add template for structured extraction diff --git a/helpers/index.ts b/helpers/index.ts index 53e302ab5d6f005d68cefa6314d685e03818b802..ae83f56cc7f3840f3ebaefd8f8e7f32d98182d14 100644 --- a/helpers/index.ts +++ b/helpers/index.ts @@ -163,7 +163,11 @@ export const installTemplate = async ( // This is a backend, so we need to copy the test data and create the env file. // Copy the environment file to the target directory. - if (props.template === "streaming" || props.template === "multiagent") { + if ( + props.template === "streaming" || + props.template === "multiagent" || + props.template === "extractor" + ) { await createBackendEnvFile(props.root, { modelConfig: props.modelConfig, llamaCloudKey: props.llamaCloudKey, diff --git a/helpers/types.ts b/helpers/types.ts index ee0ee85350bafbe7d2395c948ebf0514ffe91cc9..aadd4e65470a0c80aeb09e3dd0cca00881e1c1be 100644 --- a/helpers/types.ts +++ b/helpers/types.ts @@ -18,6 +18,7 @@ export type ModelConfig = { isConfigured(): boolean; }; export type TemplateType = + | "extractor" | "streaming" | "community" | "llamapack" diff --git a/questions.ts b/questions.ts index 429a29f361ff3c76e18e36e45f4d363b46a26dc3..f900a396c674cf9554de78854c771e255c9be34b 100644 --- a/questions.ts +++ b/questions.ts @@ -342,6 +342,7 @@ export const askQuestions = async ( title: "Multi-agent app (using llama-agents)", value: "multiagent", }, + { title: "Structured Extractor", value: "extractor" }, { title: `Community template from ${styledRepo}`, value: "community", @@ -405,7 +406,7 @@ export const askQuestions = async ( return; // early return - no further questions needed for llamapack projects } - if (program.template === "multiagent") { + if (program.template === "multiagent" || program.template === "extractor") { // TODO: multi-agents currently only supports FastAPI program.framework = preferences.framework = "fastapi"; } diff --git a/templates/types/extractor/fastapi/README-template.md b/templates/types/extractor/fastapi/README-template.md new file mode 100644 index 0000000000000000000000000000000000000000..7b33b09c9cd62e9c2f9c9b0c6717f06bce679346 --- /dev/null +++ b/templates/types/extractor/fastapi/README-template.md @@ -0,0 +1,68 @@ +This is a [LlamaIndex](https://www.llamaindex.ai/) project using [FastAPI](https://fastapi.tiangolo.com/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama) featuring [structured extraction](https://docs.llamaindex.ai/en/stable/examples/structured_outputs/structured_outputs/?h=structured+output). + +## Getting Started + +First, setup the environment with poetry: + +> **_Note:_** This step is not needed if you are using the dev-container. + +```shell +poetry install +poetry shell +``` + +Then check the parameters that have been pre-configured in the `.env` file in this directory. (E.g. you might need to configure an `OPENAI_API_KEY` if you're using OpenAI as model provider). + +Second, generate the embeddings of the documents in the `./data` directory (if this folder exists - otherwise, skip this step): + +```shell +poetry run generate +``` + +Third, run the API in one command: + +```shell +poetry run python main.py +``` + +The example provides the `/api/extractor/query` API endpoint. + +This query endpoint returns structured data in the format of the [Output](./app/api/routers/output.py) class. Modify this class to change the output format. + +You can test the endpoint with the following curl request: + +```shell +curl --location 'localhost:8000/api/extractor/query' \ +--header 'Content-Type: application/json' \ +--data '{ "query": "What is the maximum weight for a parcel?" }' +``` + +Which will return a response that the RAG pipeline is confident about the answer. + +Try + +```shell +curl --location 'localhost:8000/api/extractor/query' \ +--header 'Content-Type: application/json' \ +--data '{ "query": "What is the weather today?" }' +``` + +To retrieve a response with low confidence since the question is not related to the provided document in the `./data` directory. + +You can start editing the API endpoint by modifying [`extractor.py`](./app/api/routers/extractor.py). The endpoints auto-update as you save the file. + +Open [http://localhost:8000/docs](http://localhost:8000/docs) with your browser to see the Swagger UI of the API. + +The API allows CORS for all origins to simplify development. You can change this behavior by setting the `ENVIRONMENT` environment variable to `prod`: + +``` +ENVIRONMENT=prod python main.py +``` + +## Learn More + +To learn more about LlamaIndex, take a look at the following resources: + +- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex. + +You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome! diff --git a/templates/types/extractor/fastapi/app/__init__.py b/templates/types/extractor/fastapi/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/templates/types/extractor/fastapi/app/api/__init__.py b/templates/types/extractor/fastapi/app/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/templates/types/extractor/fastapi/app/api/routers/__init__.py b/templates/types/extractor/fastapi/app/api/routers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/templates/types/extractor/fastapi/app/api/routers/extractor.py b/templates/types/extractor/fastapi/app/api/routers/extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..2327991ed332ce0b3e709ee69c3f530d8b1c38bb --- /dev/null +++ b/templates/types/extractor/fastapi/app/api/routers/extractor.py @@ -0,0 +1,58 @@ +import logging +import os + +from fastapi import APIRouter, HTTPException +from llama_index.core.settings import Settings +from pydantic import BaseModel + +from app.api.routers.output import Output +from app.engine.index import get_index + +extractor_router = r = APIRouter() + +logger = logging.getLogger("uvicorn") + + +class RequestData(BaseModel): + query: str + + class Config: + json_schema_extra = { + "examples": [ + {"query": "What's the maximum weight for a parcel?"}, + ], + } + + +@r.post("/query") +async def query_request( + data: RequestData, +): + # Create a query engine using that returns responses in the format of the Output class + query_engine = get_query_engine(Output) + + response = await query_engine.aquery(data.query) + + output_data = response.response.dict() + return Output(**output_data) + + +def get_query_engine(output_cls: BaseModel): + top_k = os.getenv("TOP_K", 3) + + index = get_index() + if index is None: + raise HTTPException( + status_code=500, + detail=str( + "StorageContext is empty - call 'poetry run generate' to generate the storage first" + ), + ) + + sllm = Settings.llm.as_structured_llm(output_cls) + + return index.as_query_engine( + similarity_top_k=int(top_k), + llm=sllm, + response_mode="tree_summarize", + ) diff --git a/templates/types/extractor/fastapi/app/api/routers/output.py b/templates/types/extractor/fastapi/app/api/routers/output.py new file mode 100644 index 0000000000000000000000000000000000000000..56656777fb7eaec36e66369109cd89ea32999736 --- /dev/null +++ b/templates/types/extractor/fastapi/app/api/routers/output.py @@ -0,0 +1,32 @@ +import logging +from llama_index.core.schema import BaseModel, Field +from typing import List + +logger = logging.getLogger("uvicorn") + + +class Output(BaseModel): + response: str = Field(..., description="The answer to the question.") + page_numbers: List[int] = Field( + ..., + description="The page numbers of the sources used to answer this question. Do not include a page number if the context is irrelevant.", + ) + confidence: float = Field( + ..., + ge=0, + le=1, + description="Confidence value between 0-1 of the correctness of the result.", + ) + confidence_explanation: str = Field( + ..., description="Explanation for the confidence score" + ) + + class Config: + json_schema_extra = { + "example": { + "response": "This is an example answer.", + "page_numbers": [1, 2, 3], + "confidence": 0.85, + "confidence_explanation": "This is an explanation for the confidence score.", + } + } diff --git a/templates/types/extractor/fastapi/main.py b/templates/types/extractor/fastapi/main.py new file mode 100644 index 0000000000000000000000000000000000000000..768c4203a7faf901726eebb0a2e1fd2f97487824 --- /dev/null +++ b/templates/types/extractor/fastapi/main.py @@ -0,0 +1,45 @@ +from dotenv import load_dotenv + +load_dotenv() + +import logging +import os +import uvicorn +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import RedirectResponse +from app.api.routers.extractor import extractor_router +from app.settings import init_settings + + +app = FastAPI() + +init_settings() + +environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set +logger = logging.getLogger("uvicorn") + +if environment == "dev": + logger.warning("Running in development mode - allowing CORS for all origins") + app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + # Redirect to documentation page when accessing base URL + @app.get("/") + async def redirect_to_docs(): + return RedirectResponse(url="/docs") + + +app.include_router(extractor_router, prefix="/api/extractor") + +if __name__ == "__main__": + app_host = os.getenv("APP_HOST", "0.0.0.0") + app_port = int(os.getenv("APP_PORT", "8000")) + reload = True if environment == "dev" else False + + uvicorn.run(app="main:app", host=app_host, port=app_port, reload=reload) diff --git a/templates/types/extractor/fastapi/pyproject.toml b/templates/types/extractor/fastapi/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..b5bdea0f80eb4918d3ef9b9c7279f156142038d9 --- /dev/null +++ b/templates/types/extractor/fastapi/pyproject.toml @@ -0,0 +1,21 @@ +[tool.poetry] +name = "app" +version = "0.1.0" +description = "" +authors = ["Marcus Schiesser <mail@marcusschiesser.de>"] +readme = "README.md" + +[tool.poetry.scripts] +generate = "app.engine.generate:generate_datasource" + +[tool.poetry.dependencies] +python = "^3.11,<3.12" +fastapi = "^0.109.1" +uvicorn = { extras = ["standard"], version = "^0.23.2" } +python-dotenv = "^1.0.0" +llama-index = "^0.10.58" +cachetools = "^5.3.3" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/templates/types/streaming/fastapi/app/llmhub.py b/templates/types/streaming/fastapi/app/llmhub.py deleted file mode 100644 index 69e0e324971a0ea68c001e15e88320124483a80a..0000000000000000000000000000000000000000 --- a/templates/types/streaming/fastapi/app/llmhub.py +++ /dev/null @@ -1,61 +0,0 @@ -from llama_index.embeddings.openai import OpenAIEmbedding -from llama_index.core.settings import Settings -from typing import Dict -import os - -DEFAULT_MODEL = "gpt-3.5-turbo" -DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large" - -class TSIEmbedding(OpenAIEmbedding): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self._query_engine = self._text_engine = self.model_name - -def llm_config_from_env() -> Dict: - from llama_index.core.constants import DEFAULT_TEMPERATURE - - model = os.getenv("MODEL", DEFAULT_MODEL) - temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) - max_tokens = os.getenv("LLM_MAX_TOKENS") - api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY") - api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL") - - config = { - "model": model, - "api_key": api_key, - "api_base": api_base, - "temperature": float(temperature), - "max_tokens": int(max_tokens) if max_tokens is not None else None, - } - return config - - -def embedding_config_from_env() -> Dict: - from llama_index.core.constants import DEFAULT_EMBEDDING_DIM - - model = os.getenv("EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL) - dimension = os.getenv("EMBEDDING_DIM", DEFAULT_EMBEDDING_DIM) - api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY") - api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL") - - config = { - "model_name": model, - "dimension": int(dimension) if dimension is not None else None, - "api_key": api_key, - "api_base": api_base, - } - return config - -def init_llmhub(): - from llama_index.llms.openai_like import OpenAILike - - llm_configs = llm_config_from_env() - embedding_configs = embedding_config_from_env() - - Settings.embed_model = TSIEmbedding(**embedding_configs) - Settings.llm = OpenAILike( - **llm_configs, - is_chat_model=True, - is_function_calling_model=False, - context_window=4096, - ) \ No newline at end of file