From c094b0c6bfee34b92a4daa2718e17307442e2a5f Mon Sep 17 00:00:00 2001
From: "Huu Le (Lee)" <39040748+leehuwuj@users.noreply.github.com>
Date: Fri, 26 Apr 2024 14:42:34 +0700
Subject: [PATCH] Use ingestion pipeline in Python code (#61)

---------
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
---
 .changeset/short-ducks-drum.md                |  5 ++
 templates/components/loaders/python/file.py   |  5 +-
 .../vectordbs/python/astra/__init__.py        |  0
 .../vectordbs/python/astra/generate.py        | 37 ----------
 .../python/astra/{index.py => vectordb.py}    | 13 +---
 .../vectordbs/python/milvus/__init__.py       |  0
 .../vectordbs/python/milvus/generate.py       | 39 -----------
 .../vectordbs/python/milvus/index.py          | 22 ------
 .../vectordbs/python/milvus/vectordb.py       | 13 ++++
 .../vectordbs/python/mongo/__init__.py        |  0
 .../vectordbs/python/mongo/generate.py        | 43 ------------
 .../vectordbs/python/mongo/index.py           | 20 ------
 .../vectordbs/python/mongo/vectordb.py        | 11 +++
 .../vectordbs/python/none/__init__.py         |  0
 .../vectordbs/python/none/constants.py        |  1 -
 .../vectordbs/python/none/generate.py         | 32 ---------
 .../components/vectordbs/python/none/index.py | 20 ------
 .../vectordbs/python/none/vectordb.py         | 13 ++++
 .../vectordbs/python/pg/__init__.py           |  0
 .../vectordbs/python/pg/constants.py          |  2 -
 .../vectordbs/python/pg/generate.py           | 35 ----------
 .../components/vectordbs/python/pg/index.py   | 13 ----
 .../python/pg/{utils.py => vectordb.py}       |  8 ++-
 .../vectordbs/python/pinecone/__init__.py     |  0
 .../vectordbs/python/pinecone/generate.py     | 39 -----------
 .../vectordbs/python/pinecone/index.py        | 20 ------
 .../vectordbs/python/pinecone/vectordb.py     | 11 +++
 .../vectordbs/python/qdrant/__init__.py       |  0
 .../vectordbs/python/qdrant/generate.py       | 37 ----------
 .../vectordbs/python/qdrant/index.py          | 20 ------
 .../vectordbs/python/qdrant/vectordb.py       | 11 +++
 .../types/streaming/fastapi/app/constants.py  |  1 +
 .../streaming/fastapi/app/engine/generate.py  | 70 +++++++++++++++++++
 .../streaming/fastapi/app/engine/index.py     | 13 ++++
 34 files changed, 157 insertions(+), 397 deletions(-)
 create mode 100644 .changeset/short-ducks-drum.md
 delete mode 100644 templates/components/vectordbs/python/astra/__init__.py
 delete mode 100644 templates/components/vectordbs/python/astra/generate.py
 rename templates/components/vectordbs/python/astra/{index.py => vectordb.py} (52%)
 delete mode 100644 templates/components/vectordbs/python/milvus/__init__.py
 delete mode 100644 templates/components/vectordbs/python/milvus/generate.py
 delete mode 100644 templates/components/vectordbs/python/milvus/index.py
 create mode 100644 templates/components/vectordbs/python/milvus/vectordb.py
 delete mode 100644 templates/components/vectordbs/python/mongo/__init__.py
 delete mode 100644 templates/components/vectordbs/python/mongo/generate.py
 delete mode 100644 templates/components/vectordbs/python/mongo/index.py
 create mode 100644 templates/components/vectordbs/python/mongo/vectordb.py
 delete mode 100644 templates/components/vectordbs/python/none/__init__.py
 delete mode 100644 templates/components/vectordbs/python/none/constants.py
 delete mode 100644 templates/components/vectordbs/python/none/generate.py
 delete mode 100644 templates/components/vectordbs/python/none/index.py
 create mode 100644 templates/components/vectordbs/python/none/vectordb.py
 delete mode 100644 templates/components/vectordbs/python/pg/__init__.py
 delete mode 100644 templates/components/vectordbs/python/pg/constants.py
 delete mode 100644 templates/components/vectordbs/python/pg/generate.py
 delete mode 100644 templates/components/vectordbs/python/pg/index.py
 rename templates/components/vectordbs/python/pg/{utils.py => vectordb.py} (84%)
 delete mode 100644 templates/components/vectordbs/python/pinecone/__init__.py
 delete mode 100644 templates/components/vectordbs/python/pinecone/generate.py
 delete mode 100644 templates/components/vectordbs/python/pinecone/index.py
 create mode 100644 templates/components/vectordbs/python/pinecone/vectordb.py
 delete mode 100644 templates/components/vectordbs/python/qdrant/__init__.py
 delete mode 100644 templates/components/vectordbs/python/qdrant/generate.py
 delete mode 100644 templates/components/vectordbs/python/qdrant/index.py
 create mode 100644 templates/components/vectordbs/python/qdrant/vectordb.py
 create mode 100644 templates/types/streaming/fastapi/app/constants.py
 create mode 100644 templates/types/streaming/fastapi/app/engine/generate.py
 create mode 100644 templates/types/streaming/fastapi/app/engine/index.py

diff --git a/.changeset/short-ducks-drum.md b/.changeset/short-ducks-drum.md
new file mode 100644
index 00000000..4980e727
--- /dev/null
+++ b/.changeset/short-ducks-drum.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Use ingestion pipeline for Python
diff --git a/templates/components/loaders/python/file.py b/templates/components/loaders/python/file.py
index a814b0d0..6f72c29f 100644
--- a/templates/components/loaders/python/file.py
+++ b/templates/components/loaders/python/file.py
@@ -27,10 +27,7 @@ def llama_parse_parser():
 def get_file_documents(config: FileLoaderConfig):
     from llama_index.core.readers import SimpleDirectoryReader
 
-    reader = SimpleDirectoryReader(
-        config.data_dir,
-        recursive=True,
-    )
+    reader = SimpleDirectoryReader(config.data_dir, recursive=True, filename_as_id=True)
     if config.use_llama_parse:
         parser = llama_parse_parser()
         reader.file_extractor = {".pdf": parser}
diff --git a/templates/components/vectordbs/python/astra/__init__.py b/templates/components/vectordbs/python/astra/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/templates/components/vectordbs/python/astra/generate.py b/templates/components/vectordbs/python/astra/generate.py
deleted file mode 100644
index 4d2a54af..00000000
--- a/templates/components/vectordbs/python/astra/generate.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from dotenv import load_dotenv
-
-load_dotenv()
-
-import os
-import logging
-from llama_index.core.storage import StorageContext
-from llama_index.core.indices import VectorStoreIndex
-from llama_index.vector_stores.astra_db import AstraDBVectorStore
-from app.settings import init_settings
-from app.engine.loaders import get_documents
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
-
-
-def generate_datasource():
-    init_settings()
-    logger.info("Creating new index")
-    documents = get_documents()
-    store = AstraDBVectorStore(
-        token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
-        api_endpoint=os.environ["ASTRA_DB_ENDPOINT"],
-        collection_name=os.environ["ASTRA_DB_COLLECTION"],
-        embedding_dimension=int(os.environ["EMBEDDING_DIM"]),
-    )
-    storage_context = StorageContext.from_defaults(vector_store=store)
-    VectorStoreIndex.from_documents(
-        documents,
-        storage_context=storage_context,
-        show_progress=True,  # this will show you a progress bar as the embeddings are created
-    )
-    logger.info(f"Successfully created embeddings in the AstraDB")
-
-
-if __name__ == "__main__":
-    generate_datasource()
diff --git a/templates/components/vectordbs/python/astra/index.py b/templates/components/vectordbs/python/astra/vectordb.py
similarity index 52%
rename from templates/components/vectordbs/python/astra/index.py
rename to templates/components/vectordbs/python/astra/vectordb.py
index b1389f76..0cd962d7 100644
--- a/templates/components/vectordbs/python/astra/index.py
+++ b/templates/components/vectordbs/python/astra/vectordb.py
@@ -1,21 +1,12 @@
-import logging
 import os
-
-from llama_index.core.indices import VectorStoreIndex
 from llama_index.vector_stores.astra_db import AstraDBVectorStore
 
 
-logger = logging.getLogger("uvicorn")
-
-
-def get_index():
-    logger.info("Connecting to index from AstraDB...")
+def get_vector_store():
     store = AstraDBVectorStore(
         token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
         api_endpoint=os.environ["ASTRA_DB_ENDPOINT"],
         collection_name=os.environ["ASTRA_DB_COLLECTION"],
         embedding_dimension=int(os.environ["EMBEDDING_DIM"]),
     )
-    index = VectorStoreIndex.from_vector_store(store)
-    logger.info("Finished connecting to index from AstraDB.")
-    return index
+    return store
diff --git a/templates/components/vectordbs/python/milvus/__init__.py b/templates/components/vectordbs/python/milvus/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/templates/components/vectordbs/python/milvus/generate.py b/templates/components/vectordbs/python/milvus/generate.py
deleted file mode 100644
index b5bfc9f9..00000000
--- a/templates/components/vectordbs/python/milvus/generate.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from dotenv import load_dotenv
-
-load_dotenv()
-
-import os
-import logging
-from llama_index.core.storage import StorageContext
-from llama_index.core.indices import VectorStoreIndex
-from llama_index.vector_stores.milvus import MilvusVectorStore
-from app.settings import init_settings
-from app.engine.loaders import get_documents
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
-
-
-def generate_datasource():
-    init_settings()
-    logger.info("Creating new index")
-    # load the documents and create the index
-    documents = get_documents()
-    store = MilvusVectorStore(
-        uri=os.environ["MILVUS_ADDRESS"],
-        user=os.getenv("MILVUS_USERNAME"),
-        password=os.getenv("MILVUS_PASSWORD"),
-        collection_name=os.getenv("MILVUS_COLLECTION"),
-        dim=int(os.getenv("EMBEDDING_DIM")),
-    )
-    storage_context = StorageContext.from_defaults(vector_store=store)
-    VectorStoreIndex.from_documents(
-        documents,
-        storage_context=storage_context,
-        show_progress=True,  # this will show you a progress bar as the embeddings are created
-    )
-    logger.info(f"Successfully created embeddings in the Milvus")
-
-
-if __name__ == "__main__":
-    generate_datasource()
diff --git a/templates/components/vectordbs/python/milvus/index.py b/templates/components/vectordbs/python/milvus/index.py
deleted file mode 100644
index ffd87e63..00000000
--- a/templates/components/vectordbs/python/milvus/index.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import logging
-import os
-
-from llama_index.core.indices import VectorStoreIndex
-from llama_index.vector_stores.milvus import MilvusVectorStore
-
-
-logger = logging.getLogger("uvicorn")
-
-
-def get_index():
-    logger.info("Connecting to index from Milvus...")
-    store = MilvusVectorStore(
-        uri=os.getenv("MILVUS_ADDRESS"),
-        user=os.getenv("MILVUS_USERNAME"),
-        password=os.getenv("MILVUS_PASSWORD"),
-        collection_name=os.getenv("MILVUS_COLLECTION"),
-        dim=int(os.getenv("EMBEDDING_DIM")),
-    )
-    index = VectorStoreIndex.from_vector_store(store)
-    logger.info("Finished connecting to index from Milvus.")
-    return index
diff --git a/templates/components/vectordbs/python/milvus/vectordb.py b/templates/components/vectordbs/python/milvus/vectordb.py
new file mode 100644
index 00000000..5791f15d
--- /dev/null
+++ b/templates/components/vectordbs/python/milvus/vectordb.py
@@ -0,0 +1,13 @@
+import os
+from llama_index.vector_stores.milvus import MilvusVectorStore
+
+
+def get_vector_store():
+    store = MilvusVectorStore(
+        uri=os.environ["MILVUS_ADDRESS"],
+        user=os.getenv("MILVUS_USERNAME"),
+        password=os.getenv("MILVUS_PASSWORD"),
+        collection_name=os.getenv("MILVUS_COLLECTION"),
+        dim=int(os.getenv("EMBEDDING_DIM")),
+    )
+    return store
diff --git a/templates/components/vectordbs/python/mongo/__init__.py b/templates/components/vectordbs/python/mongo/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/templates/components/vectordbs/python/mongo/generate.py b/templates/components/vectordbs/python/mongo/generate.py
deleted file mode 100644
index abe844c0..00000000
--- a/templates/components/vectordbs/python/mongo/generate.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from dotenv import load_dotenv
-
-load_dotenv()
-
-import os
-import logging
-from llama_index.core.storage import StorageContext
-from llama_index.core.indices import VectorStoreIndex
-from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
-from app.settings import init_settings
-from app.engine.loaders import get_documents
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
-
-
-def generate_datasource():
-    init_settings()
-    logger.info("Creating new index")
-    # load the documents and create the index
-    documents = get_documents()
-    store = MongoDBAtlasVectorSearch(
-        db_name=os.environ["MONGODB_DATABASE"],
-        collection_name=os.environ["MONGODB_VECTORS"],
-        index_name=os.environ["MONGODB_VECTOR_INDEX"],
-    )
-    storage_context = StorageContext.from_defaults(vector_store=store)
-    VectorStoreIndex.from_documents(
-        documents,
-        storage_context=storage_context,
-        show_progress=True,  # this will show you a progress bar as the embeddings are created
-    )
-    logger.info(
-        f"Successfully created embeddings in the MongoDB collection {os.environ['MONGODB_VECTORS']}"
-    )
-    logger.info(
-        """IMPORTANT: You can't query your index yet because you need to create a vector search index in MongoDB's UI now.
-See https://github.com/run-llama/mongodb-demo/tree/main?tab=readme-ov-file#create-a-vector-search-index"""
-    )
-
-
-if __name__ == "__main__":
-    generate_datasource()
diff --git a/templates/components/vectordbs/python/mongo/index.py b/templates/components/vectordbs/python/mongo/index.py
deleted file mode 100644
index 6dba7c1d..00000000
--- a/templates/components/vectordbs/python/mongo/index.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import logging
-import os
-
-from llama_index.core.indices import VectorStoreIndex
-from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
-
-
-logger = logging.getLogger("uvicorn")
-
-
-def get_index():
-    logger.info("Connecting to index from MongoDB...")
-    store = MongoDBAtlasVectorSearch(
-        db_name=os.environ["MONGODB_DATABASE"],
-        collection_name=os.environ["MONGODB_VECTORS"],
-        index_name=os.environ["MONGODB_VECTOR_INDEX"],
-    )
-    index = VectorStoreIndex.from_vector_store(store)
-    logger.info("Finished connecting to index from MongoDB.")
-    return index
diff --git a/templates/components/vectordbs/python/mongo/vectordb.py b/templates/components/vectordbs/python/mongo/vectordb.py
new file mode 100644
index 00000000..d1fc5768
--- /dev/null
+++ b/templates/components/vectordbs/python/mongo/vectordb.py
@@ -0,0 +1,11 @@
+import os
+from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
+
+
+def get_vector_store():
+    store = MongoDBAtlasVectorSearch(
+        db_name=os.environ["MONGODB_DATABASE"],
+        collection_name=os.environ["MONGODB_VECTORS"],
+        index_name=os.environ["MONGODB_VECTOR_INDEX"],
+    )
+    return store
diff --git a/templates/components/vectordbs/python/none/__init__.py b/templates/components/vectordbs/python/none/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/templates/components/vectordbs/python/none/constants.py b/templates/components/vectordbs/python/none/constants.py
deleted file mode 100644
index 254998eb..00000000
--- a/templates/components/vectordbs/python/none/constants.py
+++ /dev/null
@@ -1 +0,0 @@
-STORAGE_DIR = "storage"  # directory to cache the generated index
diff --git a/templates/components/vectordbs/python/none/generate.py b/templates/components/vectordbs/python/none/generate.py
deleted file mode 100644
index e38d89cb..00000000
--- a/templates/components/vectordbs/python/none/generate.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from dotenv import load_dotenv
-
-load_dotenv()
-
-import logging
-from llama_index.core.indices import (
-    VectorStoreIndex,
-)
-from app.engine.constants import STORAGE_DIR
-from app.engine.loaders import get_documents
-from app.settings import init_settings
-
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
-
-
-def generate_datasource():
-    init_settings()
-    logger.info("Creating new index")
-    # load the documents and create the index
-    documents = get_documents()
-    index = VectorStoreIndex.from_documents(
-        documents,
-    )
-    # store it for later
-    index.storage_context.persist(STORAGE_DIR)
-    logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
-
-
-if __name__ == "__main__":
-    generate_datasource()
diff --git a/templates/components/vectordbs/python/none/index.py b/templates/components/vectordbs/python/none/index.py
deleted file mode 100644
index 8b77414a..00000000
--- a/templates/components/vectordbs/python/none/index.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import logging
-import os
-
-from app.engine.constants import STORAGE_DIR
-from llama_index.core.storage import StorageContext
-from llama_index.core.indices import load_index_from_storage
-
-logger = logging.getLogger("uvicorn")
-
-
-def get_index():
-    # check if storage already exists
-    if not os.path.exists(STORAGE_DIR):
-        return None
-    # load the existing index
-    logger.info(f"Loading index from {STORAGE_DIR}...")
-    storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
-    index = load_index_from_storage(storage_context)
-    logger.info(f"Finished loading index from {STORAGE_DIR}")
-    return index
diff --git a/templates/components/vectordbs/python/none/vectordb.py b/templates/components/vectordbs/python/none/vectordb.py
new file mode 100644
index 00000000..279f7a51
--- /dev/null
+++ b/templates/components/vectordbs/python/none/vectordb.py
@@ -0,0 +1,13 @@
+import os
+
+from llama_index.core.vector_stores import SimpleVectorStore
+from app.constants import STORAGE_DIR
+
+
+def get_vector_store():
+    if not os.path.exists(STORAGE_DIR):
+        vector_store = SimpleVectorStore()
+    else:
+        vector_store = SimpleVectorStore.from_persist_dir(STORAGE_DIR)
+    vector_store.stores_text = True
+    return vector_store
diff --git a/templates/components/vectordbs/python/pg/__init__.py b/templates/components/vectordbs/python/pg/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/templates/components/vectordbs/python/pg/constants.py b/templates/components/vectordbs/python/pg/constants.py
deleted file mode 100644
index a4ebd918..00000000
--- a/templates/components/vectordbs/python/pg/constants.py
+++ /dev/null
@@ -1,2 +0,0 @@
-PGVECTOR_SCHEMA = "public"
-PGVECTOR_TABLE = "llamaindex_embedding"
\ No newline at end of file
diff --git a/templates/components/vectordbs/python/pg/generate.py b/templates/components/vectordbs/python/pg/generate.py
deleted file mode 100644
index 79fa3bd7..00000000
--- a/templates/components/vectordbs/python/pg/generate.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from dotenv import load_dotenv
-
-load_dotenv()
-
-import logging
-from llama_index.core.indices import VectorStoreIndex
-from llama_index.core.storage import StorageContext
-
-from app.engine.loaders import get_documents
-from app.settings import init_settings
-from app.engine.utils import init_pg_vector_store_from_env
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
-
-
-def generate_datasource():
-    init_settings()
-    logger.info("Creating new index")
-    # load the documents and create the index
-    documents = get_documents()
-    store = init_pg_vector_store_from_env()
-    storage_context = StorageContext.from_defaults(vector_store=store)
-    VectorStoreIndex.from_documents(
-        documents,
-        storage_context=storage_context,
-        show_progress=True,  # this will show you a progress bar as the embeddings are created
-    )
-    logger.info(
-        f"Successfully created embeddings in the PG vector store, schema={store.schema_name} table={store.table_name}"
-    )
-
-
-if __name__ == "__main__":
-    generate_datasource()
diff --git a/templates/components/vectordbs/python/pg/index.py b/templates/components/vectordbs/python/pg/index.py
deleted file mode 100644
index 3c4f3180..00000000
--- a/templates/components/vectordbs/python/pg/index.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import logging
-from llama_index.core.indices.vector_store import VectorStoreIndex
-from app.engine.utils import init_pg_vector_store_from_env
-
-logger = logging.getLogger("uvicorn")
-
-
-def get_index():
-    logger.info("Connecting to index from PGVector...")
-    store = init_pg_vector_store_from_env()
-    index = VectorStoreIndex.from_vector_store(store)
-    logger.info("Finished connecting to index from PGVector.")
-    return index
diff --git a/templates/components/vectordbs/python/pg/utils.py b/templates/components/vectordbs/python/pg/vectordb.py
similarity index 84%
rename from templates/components/vectordbs/python/pg/utils.py
rename to templates/components/vectordbs/python/pg/vectordb.py
index 39127846..da5eb1a2 100644
--- a/templates/components/vectordbs/python/pg/utils.py
+++ b/templates/components/vectordbs/python/pg/vectordb.py
@@ -1,10 +1,13 @@
 import os
 from llama_index.vector_stores.postgres import PGVectorStore
 from urllib.parse import urlparse
-from app.engine.constants import PGVECTOR_SCHEMA, PGVECTOR_TABLE
 
+STORAGE_DIR = "storage"
+PGVECTOR_SCHEMA = "public"
+PGVECTOR_TABLE = "llamaindex_embedding"
 
-def init_pg_vector_store_from_env():
+
+def get_vector_store():
     original_conn_string = os.environ.get("PG_CONNECTION_STRING")
     if original_conn_string is None or original_conn_string == "":
         raise ValueError("PG_CONNECTION_STRING environment variable is not set.")
@@ -24,4 +27,5 @@ def init_pg_vector_store_from_env():
         async_connection_string=async_conn_string,
         schema_name=PGVECTOR_SCHEMA,
         table_name=PGVECTOR_TABLE,
+        embed_dim=int(os.environ.get("EMBEDDING_DIM", 768)),
     )
diff --git a/templates/components/vectordbs/python/pinecone/__init__.py b/templates/components/vectordbs/python/pinecone/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/templates/components/vectordbs/python/pinecone/generate.py b/templates/components/vectordbs/python/pinecone/generate.py
deleted file mode 100644
index 5f233ba2..00000000
--- a/templates/components/vectordbs/python/pinecone/generate.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from dotenv import load_dotenv
-
-load_dotenv()
-
-import os
-import logging
-from llama_index.core.storage import StorageContext
-from llama_index.core.indices import VectorStoreIndex
-from llama_index.vector_stores.pinecone import PineconeVectorStore
-from app.settings import init_settings
-from app.engine.loaders import get_documents
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
-
-
-def generate_datasource():
-    init_settings()
-    logger.info("Creating new index")
-    # load the documents and create the index
-    documents = get_documents()
-    store = PineconeVectorStore(
-        api_key=os.environ["PINECONE_API_KEY"],
-        index_name=os.environ["PINECONE_INDEX_NAME"],
-        environment=os.environ["PINECONE_ENVIRONMENT"],
-    )
-    storage_context = StorageContext.from_defaults(vector_store=store)
-    VectorStoreIndex.from_documents(
-        documents,
-        storage_context=storage_context,
-        show_progress=True,  # this will show you a progress bar as the embeddings are created
-    )
-    logger.info(
-        f"Successfully created embeddings and save to your Pinecone index {os.environ['PINECONE_INDEX_NAME']}"
-    )
-
-
-if __name__ == "__main__":
-    generate_datasource()
diff --git a/templates/components/vectordbs/python/pinecone/index.py b/templates/components/vectordbs/python/pinecone/index.py
deleted file mode 100644
index 98824ffd..00000000
--- a/templates/components/vectordbs/python/pinecone/index.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import logging
-import os
-
-from llama_index.core.indices import VectorStoreIndex
-from llama_index.vector_stores.pinecone import PineconeVectorStore
-
-
-logger = logging.getLogger("uvicorn")
-
-
-def get_index():
-    logger.info("Connecting to index from Pinecone...")
-    store = PineconeVectorStore(
-        api_key=os.environ["PINECONE_API_KEY"],
-        index_name=os.environ["PINECONE_INDEX_NAME"],
-        environment=os.environ["PINECONE_ENVIRONMENT"],
-    )
-    index = VectorStoreIndex.from_vector_store(store)
-    logger.info("Finished connecting to index from Pinecone.")
-    return index
diff --git a/templates/components/vectordbs/python/pinecone/vectordb.py b/templates/components/vectordbs/python/pinecone/vectordb.py
new file mode 100644
index 00000000..d6ff2cf8
--- /dev/null
+++ b/templates/components/vectordbs/python/pinecone/vectordb.py
@@ -0,0 +1,11 @@
+import os
+from llama_index.vector_stores.pinecone import PineconeVectorStore
+
+
+def get_vector_store():
+    store = PineconeVectorStore(
+        api_key=os.environ["PINECONE_API_KEY"],
+        index_name=os.environ["PINECONE_INDEX_NAME"],
+        environment=os.environ["PINECONE_ENVIRONMENT"],
+    )
+    return store
diff --git a/templates/components/vectordbs/python/qdrant/__init__.py b/templates/components/vectordbs/python/qdrant/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/templates/components/vectordbs/python/qdrant/generate.py b/templates/components/vectordbs/python/qdrant/generate.py
deleted file mode 100644
index db7c055e..00000000
--- a/templates/components/vectordbs/python/qdrant/generate.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import logging
-import os
-from app.engine.loaders import get_documents
-from app.settings import init_settings
-from dotenv import load_dotenv
-from llama_index.core.indices import VectorStoreIndex
-from llama_index.core.storage import StorageContext
-from llama_index.vector_stores.qdrant import QdrantVectorStore
-load_dotenv()
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
-
-
-def generate_datasource():
-    init_settings()
-    logger.info("Creating new index with Qdrant")
-    # load the documents and create the index
-    documents = get_documents()
-    store = QdrantVectorStore(
-        collection_name=os.getenv("QDRANT_COLLECTION"),
-        url=os.getenv("QDRANT_URL"),
-        api_key=os.getenv("QDRANT_API_KEY"),
-    )
-    storage_context = StorageContext.from_defaults(vector_store=store)
-    VectorStoreIndex.from_documents(
-        documents,
-        storage_context=storage_context,
-        show_progress=True,  # this will show you a progress bar as the embeddings are created
-    )
-    logger.info(
-        f"Successfully uploaded documents to the {os.getenv('QDRANT_COLLECTION')} collection."
-    )
-
-
-if __name__ == "__main__":
-    generate_datasource()
diff --git a/templates/components/vectordbs/python/qdrant/index.py b/templates/components/vectordbs/python/qdrant/index.py
deleted file mode 100644
index 0a388d8a..00000000
--- a/templates/components/vectordbs/python/qdrant/index.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import logging
-import os
-
-from llama_index.core.indices import VectorStoreIndex
-from llama_index.vector_stores.qdrant import QdrantVectorStore
-
-
-logger = logging.getLogger("uvicorn")
-
-
-def get_index():
-    logger.info("Connecting to Qdrant collection..")
-    store = QdrantVectorStore(
-        collection_name=os.getenv("QDRANT_COLLECTION"),
-        url=os.getenv("QDRANT_URL"),
-        api_key=os.getenv("QDRANT_API_KEY"),
-    )
-    index = VectorStoreIndex.from_vector_store(store)
-    logger.info("Finished connecting to Qdrant collection.")
-    return index
diff --git a/templates/components/vectordbs/python/qdrant/vectordb.py b/templates/components/vectordbs/python/qdrant/vectordb.py
new file mode 100644
index 00000000..5f36c202
--- /dev/null
+++ b/templates/components/vectordbs/python/qdrant/vectordb.py
@@ -0,0 +1,11 @@
+import os
+from llama_index.vector_stores.qdrant import QdrantVectorStore
+
+
+def get_vector_store():
+    store = QdrantVectorStore(
+        collection_name=os.getenv("QDRANT_COLLECTION"),
+        url=os.getenv("QDRANT_URL"),
+        api_key=os.getenv("QDRANT_API_KEY"),
+    )
+    return store
diff --git a/templates/types/streaming/fastapi/app/constants.py b/templates/types/streaming/fastapi/app/constants.py
new file mode 100644
index 00000000..61daefe5
--- /dev/null
+++ b/templates/types/streaming/fastapi/app/constants.py
@@ -0,0 +1 @@
+STORAGE_DIR = "storage"  # directory to save the stores to (document store and if used, the `SimpleVectorStore`)
diff --git a/templates/types/streaming/fastapi/app/engine/generate.py b/templates/types/streaming/fastapi/app/engine/generate.py
new file mode 100644
index 00000000..3e1686dd
--- /dev/null
+++ b/templates/types/streaming/fastapi/app/engine/generate.py
@@ -0,0 +1,70 @@
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import os
+import logging
+from llama_index.core.settings import Settings
+from llama_index.core.ingestion import IngestionPipeline
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.vector_stores import SimpleVectorStore
+from llama_index.core.storage.docstore import SimpleDocumentStore
+from app.constants import STORAGE_DIR
+from app.settings import init_settings
+from app.engine.loaders import get_documents
+from app.engine.vectordb import get_vector_store
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+
+def get_doc_store():
+    if not os.path.exists(STORAGE_DIR):
+        docstore = SimpleDocumentStore()
+        return docstore
+    else:
+        return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
+
+
+def generate_datasource():
+    init_settings()
+    logger.info("Creating new index")
+
+    # load the documents and create the index
+    documents = get_documents()
+    docstore = get_doc_store()
+    vector_store = get_vector_store()
+
+    # Create ingestion pipeline
+    ingestion_pipeline = IngestionPipeline(
+        transformations=[
+            SentenceSplitter(
+                chunk_size=Settings.chunk_size,
+                chunk_overlap=Settings.chunk_overlap,
+            ),
+            Settings.embed_model,
+        ],
+        docstore=docstore,
+        docstore_strategy="upserts_and_delete",
+    )
+
+    # llama_index having an typing issue when passing vector_store to IngestionPipeline
+    # so we need to set it manually after initialization
+    ingestion_pipeline.vector_store = vector_store
+
+    # Run the ingestion pipeline and store the results
+    ingestion_pipeline.run(show_progress=True, documents=documents)
+
+    # Default vector store only keeps data in memory, so we need to persist it
+    # Can remove if using a different vector store
+    if isinstance(vector_store, SimpleVectorStore):
+        vector_store.persist(os.path.join(STORAGE_DIR, "vector_store.json"))
+    # Persist the docstore to apply ingestion strategy
+    docstore.persist(os.path.join(STORAGE_DIR, "docstore.json"))
+
+    logger.info("Finished creating new index.")
+
+
+if __name__ == "__main__":
+    generate_datasource()
diff --git a/templates/types/streaming/fastapi/app/engine/index.py b/templates/types/streaming/fastapi/app/engine/index.py
new file mode 100644
index 00000000..3cc2beb7
--- /dev/null
+++ b/templates/types/streaming/fastapi/app/engine/index.py
@@ -0,0 +1,13 @@
+import logging
+from llama_index.core.indices.vector_store import VectorStoreIndex
+from app.engine.vectordb import get_vector_store
+
+logger = logging.getLogger("uvicorn")
+
+
+def get_index():
+    logger.info("Loading the index...")
+    store = get_vector_store()
+    index = VectorStoreIndex.from_vector_store(store)
+    logger.info("Loaded index successfully.")
+    return index
-- 
GitLab