diff --git a/.changeset/proud-seals-yell.md b/.changeset/proud-seals-yell.md
new file mode 100644
index 0000000000000000000000000000000000000000..a1279f21a9d7c47e7aa2f032ef721a63fb5bf699
--- /dev/null
+++ b/.changeset/proud-seals-yell.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Add template for structured extraction
diff --git a/helpers/index.ts b/helpers/index.ts
index 53e302ab5d6f005d68cefa6314d685e03818b802..ae83f56cc7f3840f3ebaefd8f8e7f32d98182d14 100644
--- a/helpers/index.ts
+++ b/helpers/index.ts
@@ -163,7 +163,11 @@ export const installTemplate = async (
     // This is a backend, so we need to copy the test data and create the env file.
 
     // Copy the environment file to the target directory.
-    if (props.template === "streaming" || props.template === "multiagent") {
+    if (
+      props.template === "streaming" ||
+      props.template === "multiagent" ||
+      props.template === "extractor"
+    ) {
       await createBackendEnvFile(props.root, {
         modelConfig: props.modelConfig,
         llamaCloudKey: props.llamaCloudKey,
diff --git a/helpers/types.ts b/helpers/types.ts
index ee0ee85350bafbe7d2395c948ebf0514ffe91cc9..aadd4e65470a0c80aeb09e3dd0cca00881e1c1be 100644
--- a/helpers/types.ts
+++ b/helpers/types.ts
@@ -18,6 +18,7 @@ export type ModelConfig = {
   isConfigured(): boolean;
 };
 export type TemplateType =
+  | "extractor"
   | "streaming"
   | "community"
   | "llamapack"
diff --git a/questions.ts b/questions.ts
index 429a29f361ff3c76e18e36e45f4d363b46a26dc3..f900a396c674cf9554de78854c771e255c9be34b 100644
--- a/questions.ts
+++ b/questions.ts
@@ -342,6 +342,7 @@ export const askQuestions = async (
               title: "Multi-agent app (using llama-agents)",
               value: "multiagent",
             },
+            { title: "Structured Extractor", value: "extractor" },
             {
               title: `Community template from ${styledRepo}`,
               value: "community",
@@ -405,7 +406,7 @@ export const askQuestions = async (
     return; // early return - no further questions needed for llamapack projects
   }
 
-  if (program.template === "multiagent") {
+  if (program.template === "multiagent" || program.template === "extractor") {
     // TODO: multi-agents currently only supports FastAPI
     program.framework = preferences.framework = "fastapi";
   }
diff --git a/templates/types/extractor/fastapi/README-template.md b/templates/types/extractor/fastapi/README-template.md
new file mode 100644
index 0000000000000000000000000000000000000000..7b33b09c9cd62e9c2f9c9b0c6717f06bce679346
--- /dev/null
+++ b/templates/types/extractor/fastapi/README-template.md
@@ -0,0 +1,68 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project using [FastAPI](https://fastapi.tiangolo.com/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama) featuring [structured extraction](https://docs.llamaindex.ai/en/stable/examples/structured_outputs/structured_outputs/?h=structured+output).
+
+## Getting Started
+
+First, setup the environment with poetry:
+
+> **_Note:_** This step is not needed if you are using the dev-container.
+
+```shell
+poetry install
+poetry shell
+```
+
+Then check the parameters that have been pre-configured in the `.env` file in this directory. (E.g. you might need to configure an `OPENAI_API_KEY` if you're using OpenAI as model provider).
+
+Second, generate the embeddings of the documents in the `./data` directory (if this folder exists - otherwise, skip this step):
+
+```shell
+poetry run generate
+```
+
+Third, run the API in one command:
+
+```shell
+poetry run python main.py
+```
+
+The example provides the `/api/extractor/query` API endpoint.
+
+This query endpoint returns structured data in the format of the [Output](./app/api/routers/output.py) class. Modify this class to change the output format.
+
+You can test the endpoint with the following curl request:
+
+```shell
+curl --location 'localhost:8000/api/extractor/query' \
+--header 'Content-Type: application/json' \
+--data '{ "query": "What is the maximum weight for a parcel?" }'
+```
+
+Which will return a response that the RAG pipeline is confident about the answer.
+
+Try
+
+```shell
+curl --location 'localhost:8000/api/extractor/query' \
+--header 'Content-Type: application/json' \
+--data '{ "query": "What is the weather today?" }'
+```
+
+To retrieve a response with low confidence since the question is not related to the provided document in the `./data` directory.
+
+You can start editing the API endpoint by modifying [`extractor.py`](./app/api/routers/extractor.py). The endpoints auto-update as you save the file.
+
+Open [http://localhost:8000/docs](http://localhost:8000/docs) with your browser to see the Swagger UI of the API.
+
+The API allows CORS for all origins to simplify development. You can change this behavior by setting the `ENVIRONMENT` environment variable to `prod`:
+
+```
+ENVIRONMENT=prod python main.py
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
+
+You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
diff --git a/templates/types/extractor/fastapi/app/__init__.py b/templates/types/extractor/fastapi/app/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/templates/types/extractor/fastapi/app/api/__init__.py b/templates/types/extractor/fastapi/app/api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/templates/types/extractor/fastapi/app/api/routers/__init__.py b/templates/types/extractor/fastapi/app/api/routers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/templates/types/extractor/fastapi/app/api/routers/extractor.py b/templates/types/extractor/fastapi/app/api/routers/extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..2327991ed332ce0b3e709ee69c3f530d8b1c38bb
--- /dev/null
+++ b/templates/types/extractor/fastapi/app/api/routers/extractor.py
@@ -0,0 +1,58 @@
+import logging
+import os
+
+from fastapi import APIRouter, HTTPException
+from llama_index.core.settings import Settings
+from pydantic import BaseModel
+
+from app.api.routers.output import Output
+from app.engine.index import get_index
+
+extractor_router = r = APIRouter()
+
+logger = logging.getLogger("uvicorn")
+
+
+class RequestData(BaseModel):
+    query: str
+
+    class Config:
+        json_schema_extra = {
+            "examples": [
+                {"query": "What's the maximum weight for a parcel?"},
+            ],
+        }
+
+
+@r.post("/query")
+async def query_request(
+    data: RequestData,
+):
+    # Create a query engine using that returns responses in the format of the Output class
+    query_engine = get_query_engine(Output)
+
+    response = await query_engine.aquery(data.query)
+
+    output_data = response.response.dict()
+    return Output(**output_data)
+
+
+def get_query_engine(output_cls: BaseModel):
+    top_k = os.getenv("TOP_K", 3)
+
+    index = get_index()
+    if index is None:
+        raise HTTPException(
+            status_code=500,
+            detail=str(
+                "StorageContext is empty - call 'poetry run generate' to generate the storage first"
+            ),
+        )
+
+    sllm = Settings.llm.as_structured_llm(output_cls)
+
+    return index.as_query_engine(
+        similarity_top_k=int(top_k),
+        llm=sllm,
+        response_mode="tree_summarize",
+    )
diff --git a/templates/types/extractor/fastapi/app/api/routers/output.py b/templates/types/extractor/fastapi/app/api/routers/output.py
new file mode 100644
index 0000000000000000000000000000000000000000..56656777fb7eaec36e66369109cd89ea32999736
--- /dev/null
+++ b/templates/types/extractor/fastapi/app/api/routers/output.py
@@ -0,0 +1,32 @@
+import logging
+from llama_index.core.schema import BaseModel, Field
+from typing import List
+
+logger = logging.getLogger("uvicorn")
+
+
+class Output(BaseModel):
+    response: str = Field(..., description="The answer to the question.")
+    page_numbers: List[int] = Field(
+        ...,
+        description="The page numbers of the sources used to answer this question. Do not include a page number if the context is irrelevant.",
+    )
+    confidence: float = Field(
+        ...,
+        ge=0,
+        le=1,
+        description="Confidence value between 0-1 of the correctness of the result.",
+    )
+    confidence_explanation: str = Field(
+        ..., description="Explanation for the confidence score"
+    )
+
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "response": "This is an example answer.",
+                "page_numbers": [1, 2, 3],
+                "confidence": 0.85,
+                "confidence_explanation": "This is an explanation for the confidence score.",
+            }
+        }
diff --git a/templates/types/extractor/fastapi/main.py b/templates/types/extractor/fastapi/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..768c4203a7faf901726eebb0a2e1fd2f97487824
--- /dev/null
+++ b/templates/types/extractor/fastapi/main.py
@@ -0,0 +1,45 @@
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import logging
+import os
+import uvicorn
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import RedirectResponse
+from app.api.routers.extractor import extractor_router
+from app.settings import init_settings
+
+
+app = FastAPI()
+
+init_settings()
+
+environment = os.getenv("ENVIRONMENT", "dev")  # Default to 'development' if not set
+logger = logging.getLogger("uvicorn")
+
+if environment == "dev":
+    logger.warning("Running in development mode - allowing CORS for all origins")
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
+    # Redirect to documentation page when accessing base URL
+    @app.get("/")
+    async def redirect_to_docs():
+        return RedirectResponse(url="/docs")
+
+
+app.include_router(extractor_router, prefix="/api/extractor")
+
+if __name__ == "__main__":
+    app_host = os.getenv("APP_HOST", "0.0.0.0")
+    app_port = int(os.getenv("APP_PORT", "8000"))
+    reload = True if environment == "dev" else False
+
+    uvicorn.run(app="main:app", host=app_host, port=app_port, reload=reload)
diff --git a/templates/types/extractor/fastapi/pyproject.toml b/templates/types/extractor/fastapi/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..b5bdea0f80eb4918d3ef9b9c7279f156142038d9
--- /dev/null
+++ b/templates/types/extractor/fastapi/pyproject.toml
@@ -0,0 +1,21 @@
+[tool.poetry]
+name = "app"
+version = "0.1.0"
+description = ""
+authors = ["Marcus Schiesser <mail@marcusschiesser.de>"]
+readme = "README.md"
+
+[tool.poetry.scripts]
+generate = "app.engine.generate:generate_datasource"
+
+[tool.poetry.dependencies]
+python = "^3.11,<3.12"
+fastapi = "^0.109.1"
+uvicorn = { extras = ["standard"], version = "^0.23.2" }
+python-dotenv = "^1.0.0"
+llama-index = "^0.10.58"
+cachetools = "^5.3.3"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/templates/types/streaming/fastapi/app/llmhub.py b/templates/types/streaming/fastapi/app/llmhub.py
deleted file mode 100644
index 69e0e324971a0ea68c001e15e88320124483a80a..0000000000000000000000000000000000000000
--- a/templates/types/streaming/fastapi/app/llmhub.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.core.settings import Settings
-from typing import Dict
-import os
-
-DEFAULT_MODEL = "gpt-3.5-turbo"
-DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large"
-
-class TSIEmbedding(OpenAIEmbedding):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self._query_engine = self._text_engine = self.model_name
-
-def llm_config_from_env() -> Dict:
-    from llama_index.core.constants import DEFAULT_TEMPERATURE
-
-    model = os.getenv("MODEL", DEFAULT_MODEL)
-    temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
-    max_tokens = os.getenv("LLM_MAX_TOKENS")
-    api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY")
-    api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL")
-
-    config = {
-        "model": model,
-        "api_key": api_key,
-        "api_base": api_base,
-        "temperature": float(temperature),
-        "max_tokens": int(max_tokens) if max_tokens is not None else None,
-    }
-    return config
-
-
-def embedding_config_from_env() -> Dict:
-    from llama_index.core.constants import DEFAULT_EMBEDDING_DIM
-    
-    model = os.getenv("EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL)
-    dimension = os.getenv("EMBEDDING_DIM", DEFAULT_EMBEDDING_DIM)
-    api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY")
-    api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL")
-
-    config = {
-        "model_name": model,
-        "dimension": int(dimension) if dimension is not None else None,
-        "api_key": api_key,
-        "api_base": api_base,
-    }
-    return config
-
-def init_llmhub():
-    from llama_index.llms.openai_like import OpenAILike
-
-    llm_configs = llm_config_from_env()
-    embedding_configs = embedding_config_from_env()
-
-    Settings.embed_model = TSIEmbedding(**embedding_configs)
-    Settings.llm = OpenAILike(
-        **llm_configs,
-        is_chat_model=True,
-        is_function_calling_model=False,
-        context_window=4096,
-    )
\ No newline at end of file