From a1feb524e953edbe62c5b8d8c9b500de360efafe Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Fri, 3 May 2024 11:06:02 +0800
Subject: [PATCH] Revert "Use ingestion pipeline in Python code (#61)"

This reverts commit c094b0c6bfee34b92a4daa2718e17307442e2a5f.
---
 .changeset/short-ducks-drum.md                |  5 --
 templates/components/loaders/python/file.py   |  5 +-
 .../vectordbs/python/astra/__init__.py        |  0
 .../vectordbs/python/astra/generate.py        | 37 ++++++++++
 .../python/astra/{vectordb.py => index.py}    | 13 +++-
 .../vectordbs/python/milvus/__init__.py       |  0
 .../vectordbs/python/milvus/generate.py       | 39 +++++++++++
 .../vectordbs/python/milvus/index.py          | 22 ++++++
 .../vectordbs/python/milvus/vectordb.py       | 13 ----
 .../vectordbs/python/mongo/__init__.py        |  0
 .../vectordbs/python/mongo/generate.py        | 43 ++++++++++++
 .../vectordbs/python/mongo/index.py           | 20 ++++++
 .../vectordbs/python/mongo/vectordb.py        | 11 ---
 .../vectordbs/python/none/__init__.py         |  0
 .../vectordbs/python/none/constants.py        |  1 +
 .../vectordbs/python/none/generate.py         | 32 +++++++++
 .../components/vectordbs/python/none/index.py | 20 ++++++
 .../vectordbs/python/none/vectordb.py         | 13 ----
 .../vectordbs/python/pg/__init__.py           |  0
 .../vectordbs/python/pg/constants.py          |  2 +
 .../vectordbs/python/pg/generate.py           | 35 ++++++++++
 .../components/vectordbs/python/pg/index.py   | 13 ++++
 .../python/pg/{vectordb.py => utils.py}       |  8 +--
 .../vectordbs/python/pinecone/__init__.py     |  0
 .../vectordbs/python/pinecone/generate.py     | 39 +++++++++++
 .../vectordbs/python/pinecone/index.py        | 20 ++++++
 .../vectordbs/python/pinecone/vectordb.py     | 11 ---
 .../vectordbs/python/qdrant/__init__.py       |  0
 .../vectordbs/python/qdrant/generate.py       | 37 ++++++++++
 .../vectordbs/python/qdrant/index.py          | 20 ++++++
 .../vectordbs/python/qdrant/vectordb.py       | 11 ---
 .../types/streaming/fastapi/app/constants.py  |  1 -
 .../streaming/fastapi/app/engine/generate.py  | 70 -------------------
 .../streaming/fastapi/app/engine/index.py     | 13 ----
 34 files changed, 397 insertions(+), 157 deletions(-)
 delete mode 100644 .changeset/short-ducks-drum.md
 create mode 100644 templates/components/vectordbs/python/astra/__init__.py
 create mode 100644 templates/components/vectordbs/python/astra/generate.py
 rename templates/components/vectordbs/python/astra/{vectordb.py => index.py} (52%)
 create mode 100644 templates/components/vectordbs/python/milvus/__init__.py
 create mode 100644 templates/components/vectordbs/python/milvus/generate.py
 create mode 100644 templates/components/vectordbs/python/milvus/index.py
 delete mode 100644 templates/components/vectordbs/python/milvus/vectordb.py
 create mode 100644 templates/components/vectordbs/python/mongo/__init__.py
 create mode 100644 templates/components/vectordbs/python/mongo/generate.py
 create mode 100644 templates/components/vectordbs/python/mongo/index.py
 delete mode 100644 templates/components/vectordbs/python/mongo/vectordb.py
 create mode 100644 templates/components/vectordbs/python/none/__init__.py
 create mode 100644 templates/components/vectordbs/python/none/constants.py
 create mode 100644 templates/components/vectordbs/python/none/generate.py
 create mode 100644 templates/components/vectordbs/python/none/index.py
 delete mode 100644 templates/components/vectordbs/python/none/vectordb.py
 create mode 100644 templates/components/vectordbs/python/pg/__init__.py
 create mode 100644 templates/components/vectordbs/python/pg/constants.py
 create mode 100644 templates/components/vectordbs/python/pg/generate.py
 create mode 100644 templates/components/vectordbs/python/pg/index.py
 rename templates/components/vectordbs/python/pg/{vectordb.py => utils.py} (84%)
 create mode 100644 templates/components/vectordbs/python/pinecone/__init__.py
 create mode 100644 templates/components/vectordbs/python/pinecone/generate.py
 create mode 100644 templates/components/vectordbs/python/pinecone/index.py
 delete mode 100644 templates/components/vectordbs/python/pinecone/vectordb.py
 create mode 100644 templates/components/vectordbs/python/qdrant/__init__.py
 create mode 100644 templates/components/vectordbs/python/qdrant/generate.py
 create mode 100644 templates/components/vectordbs/python/qdrant/index.py
 delete mode 100644 templates/components/vectordbs/python/qdrant/vectordb.py
 delete mode 100644 templates/types/streaming/fastapi/app/constants.py
 delete mode 100644 templates/types/streaming/fastapi/app/engine/generate.py
 delete mode 100644 templates/types/streaming/fastapi/app/engine/index.py

diff --git a/.changeset/short-ducks-drum.md b/.changeset/short-ducks-drum.md
deleted file mode 100644
index 4980e727..00000000
--- a/.changeset/short-ducks-drum.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-"create-llama": patch
----
-
-Use ingestion pipeline for Python
diff --git a/templates/components/loaders/python/file.py b/templates/components/loaders/python/file.py
index 6f72c29f..a814b0d0 100644
--- a/templates/components/loaders/python/file.py
+++ b/templates/components/loaders/python/file.py
@@ -27,7 +27,10 @@ def llama_parse_parser():
 def get_file_documents(config: FileLoaderConfig):
     from llama_index.core.readers import SimpleDirectoryReader
 
-    reader = SimpleDirectoryReader(config.data_dir, recursive=True, filename_as_id=True)
+    reader = SimpleDirectoryReader(
+        config.data_dir,
+        recursive=True,
+    )
     if config.use_llama_parse:
         parser = llama_parse_parser()
         reader.file_extractor = {".pdf": parser}
diff --git a/templates/components/vectordbs/python/astra/__init__.py b/templates/components/vectordbs/python/astra/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/templates/components/vectordbs/python/astra/generate.py b/templates/components/vectordbs/python/astra/generate.py
new file mode 100644
index 00000000..4d2a54af
--- /dev/null
+++ b/templates/components/vectordbs/python/astra/generate.py
@@ -0,0 +1,37 @@
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import os
+import logging
+from llama_index.core.storage import StorageContext
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.vector_stores.astra_db import AstraDBVectorStore
+from app.settings import init_settings
+from app.engine.loaders import get_documents
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+
+def generate_datasource():
+    init_settings()
+    logger.info("Creating new index")
+    documents = get_documents()
+    store = AstraDBVectorStore(
+        token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
+        api_endpoint=os.environ["ASTRA_DB_ENDPOINT"],
+        collection_name=os.environ["ASTRA_DB_COLLECTION"],
+        embedding_dimension=int(os.environ["EMBEDDING_DIM"]),
+    )
+    storage_context = StorageContext.from_defaults(vector_store=store)
+    VectorStoreIndex.from_documents(
+        documents,
+        storage_context=storage_context,
+        show_progress=True,  # this will show you a progress bar as the embeddings are created
+    )
+    logger.info(f"Successfully created embeddings in the AstraDB")
+
+
+if __name__ == "__main__":
+    generate_datasource()
diff --git a/templates/components/vectordbs/python/astra/vectordb.py b/templates/components/vectordbs/python/astra/index.py
similarity index 52%
rename from templates/components/vectordbs/python/astra/vectordb.py
rename to templates/components/vectordbs/python/astra/index.py
index 0cd962d7..b1389f76 100644
--- a/templates/components/vectordbs/python/astra/vectordb.py
+++ b/templates/components/vectordbs/python/astra/index.py
@@ -1,12 +1,21 @@
+import logging
 import os
+
+from llama_index.core.indices import VectorStoreIndex
 from llama_index.vector_stores.astra_db import AstraDBVectorStore
 
 
-def get_vector_store():
+logger = logging.getLogger("uvicorn")
+
+
+def get_index():
+    logger.info("Connecting to index from AstraDB...")
     store = AstraDBVectorStore(
         token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
         api_endpoint=os.environ["ASTRA_DB_ENDPOINT"],
         collection_name=os.environ["ASTRA_DB_COLLECTION"],
         embedding_dimension=int(os.environ["EMBEDDING_DIM"]),
     )
-    return store
+    index = VectorStoreIndex.from_vector_store(store)
+    logger.info("Finished connecting to index from AstraDB.")
+    return index
diff --git a/templates/components/vectordbs/python/milvus/__init__.py b/templates/components/vectordbs/python/milvus/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/templates/components/vectordbs/python/milvus/generate.py b/templates/components/vectordbs/python/milvus/generate.py
new file mode 100644
index 00000000..b5bfc9f9
--- /dev/null
+++ b/templates/components/vectordbs/python/milvus/generate.py
@@ -0,0 +1,39 @@
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import os
+import logging
+from llama_index.core.storage import StorageContext
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.vector_stores.milvus import MilvusVectorStore
+from app.settings import init_settings
+from app.engine.loaders import get_documents
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+
+def generate_datasource():
+    init_settings()
+    logger.info("Creating new index")
+    # load the documents and create the index
+    documents = get_documents()
+    store = MilvusVectorStore(
+        uri=os.environ["MILVUS_ADDRESS"],
+        user=os.getenv("MILVUS_USERNAME"),
+        password=os.getenv("MILVUS_PASSWORD"),
+        collection_name=os.getenv("MILVUS_COLLECTION"),
+        dim=int(os.getenv("EMBEDDING_DIM")),
+    )
+    storage_context = StorageContext.from_defaults(vector_store=store)
+    VectorStoreIndex.from_documents(
+        documents,
+        storage_context=storage_context,
+        show_progress=True,  # this will show you a progress bar as the embeddings are created
+    )
+    logger.info(f"Successfully created embeddings in the Milvus")
+
+
+if __name__ == "__main__":
+    generate_datasource()
diff --git a/templates/components/vectordbs/python/milvus/index.py b/templates/components/vectordbs/python/milvus/index.py
new file mode 100644
index 00000000..ffd87e63
--- /dev/null
+++ b/templates/components/vectordbs/python/milvus/index.py
@@ -0,0 +1,22 @@
+import logging
+import os
+
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.vector_stores.milvus import MilvusVectorStore
+
+
+logger = logging.getLogger("uvicorn")
+
+
+def get_index():
+    logger.info("Connecting to index from Milvus...")
+    store = MilvusVectorStore(
+        uri=os.getenv("MILVUS_ADDRESS"),
+        user=os.getenv("MILVUS_USERNAME"),
+        password=os.getenv("MILVUS_PASSWORD"),
+        collection_name=os.getenv("MILVUS_COLLECTION"),
+        dim=int(os.getenv("EMBEDDING_DIM")),
+    )
+    index = VectorStoreIndex.from_vector_store(store)
+    logger.info("Finished connecting to index from Milvus.")
+    return index
diff --git a/templates/components/vectordbs/python/milvus/vectordb.py b/templates/components/vectordbs/python/milvus/vectordb.py
deleted file mode 100644
index 5791f15d..00000000
--- a/templates/components/vectordbs/python/milvus/vectordb.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import os
-from llama_index.vector_stores.milvus import MilvusVectorStore
-
-
-def get_vector_store():
-    store = MilvusVectorStore(
-        uri=os.environ["MILVUS_ADDRESS"],
-        user=os.getenv("MILVUS_USERNAME"),
-        password=os.getenv("MILVUS_PASSWORD"),
-        collection_name=os.getenv("MILVUS_COLLECTION"),
-        dim=int(os.getenv("EMBEDDING_DIM")),
-    )
-    return store
diff --git a/templates/components/vectordbs/python/mongo/__init__.py b/templates/components/vectordbs/python/mongo/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/templates/components/vectordbs/python/mongo/generate.py b/templates/components/vectordbs/python/mongo/generate.py
new file mode 100644
index 00000000..abe844c0
--- /dev/null
+++ b/templates/components/vectordbs/python/mongo/generate.py
@@ -0,0 +1,43 @@
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import os
+import logging
+from llama_index.core.storage import StorageContext
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
+from app.settings import init_settings
+from app.engine.loaders import get_documents
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+
+def generate_datasource():
+    init_settings()
+    logger.info("Creating new index")
+    # load the documents and create the index
+    documents = get_documents()
+    store = MongoDBAtlasVectorSearch(
+        db_name=os.environ["MONGODB_DATABASE"],
+        collection_name=os.environ["MONGODB_VECTORS"],
+        index_name=os.environ["MONGODB_VECTOR_INDEX"],
+    )
+    storage_context = StorageContext.from_defaults(vector_store=store)
+    VectorStoreIndex.from_documents(
+        documents,
+        storage_context=storage_context,
+        show_progress=True,  # this will show you a progress bar as the embeddings are created
+    )
+    logger.info(
+        f"Successfully created embeddings in the MongoDB collection {os.environ['MONGODB_VECTORS']}"
+    )
+    logger.info(
+        """IMPORTANT: You can't query your index yet because you need to create a vector search index in MongoDB's UI now.
+See https://github.com/run-llama/mongodb-demo/tree/main?tab=readme-ov-file#create-a-vector-search-index"""
+    )
+
+
+if __name__ == "__main__":
+    generate_datasource()
diff --git a/templates/components/vectordbs/python/mongo/index.py b/templates/components/vectordbs/python/mongo/index.py
new file mode 100644
index 00000000..6dba7c1d
--- /dev/null
+++ b/templates/components/vectordbs/python/mongo/index.py
@@ -0,0 +1,20 @@
+import logging
+import os
+
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
+
+
+logger = logging.getLogger("uvicorn")
+
+
+def get_index():
+    logger.info("Connecting to index from MongoDB...")
+    store = MongoDBAtlasVectorSearch(
+        db_name=os.environ["MONGODB_DATABASE"],
+        collection_name=os.environ["MONGODB_VECTORS"],
+        index_name=os.environ["MONGODB_VECTOR_INDEX"],
+    )
+    index = VectorStoreIndex.from_vector_store(store)
+    logger.info("Finished connecting to index from MongoDB.")
+    return index
diff --git a/templates/components/vectordbs/python/mongo/vectordb.py b/templates/components/vectordbs/python/mongo/vectordb.py
deleted file mode 100644
index d1fc5768..00000000
--- a/templates/components/vectordbs/python/mongo/vectordb.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import os
-from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
-
-
-def get_vector_store():
-    store = MongoDBAtlasVectorSearch(
-        db_name=os.environ["MONGODB_DATABASE"],
-        collection_name=os.environ["MONGODB_VECTORS"],
-        index_name=os.environ["MONGODB_VECTOR_INDEX"],
-    )
-    return store
diff --git a/templates/components/vectordbs/python/none/__init__.py b/templates/components/vectordbs/python/none/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/templates/components/vectordbs/python/none/constants.py b/templates/components/vectordbs/python/none/constants.py
new file mode 100644
index 00000000..254998eb
--- /dev/null
+++ b/templates/components/vectordbs/python/none/constants.py
@@ -0,0 +1 @@
+STORAGE_DIR = "storage"  # directory to cache the generated index
diff --git a/templates/components/vectordbs/python/none/generate.py b/templates/components/vectordbs/python/none/generate.py
new file mode 100644
index 00000000..e38d89cb
--- /dev/null
+++ b/templates/components/vectordbs/python/none/generate.py
@@ -0,0 +1,32 @@
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import logging
+from llama_index.core.indices import (
+    VectorStoreIndex,
+)
+from app.engine.constants import STORAGE_DIR
+from app.engine.loaders import get_documents
+from app.settings import init_settings
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+
+def generate_datasource():
+    init_settings()
+    logger.info("Creating new index")
+    # load the documents and create the index
+    documents = get_documents()
+    index = VectorStoreIndex.from_documents(
+        documents,
+    )
+    # store it for later
+    index.storage_context.persist(STORAGE_DIR)
+    logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
+
+
+if __name__ == "__main__":
+    generate_datasource()
diff --git a/templates/components/vectordbs/python/none/index.py b/templates/components/vectordbs/python/none/index.py
new file mode 100644
index 00000000..8b77414a
--- /dev/null
+++ b/templates/components/vectordbs/python/none/index.py
@@ -0,0 +1,20 @@
+import logging
+import os
+
+from app.engine.constants import STORAGE_DIR
+from llama_index.core.storage import StorageContext
+from llama_index.core.indices import load_index_from_storage
+
+logger = logging.getLogger("uvicorn")
+
+
+def get_index():
+    # check if storage already exists
+    if not os.path.exists(STORAGE_DIR):
+        return None
+    # load the existing index
+    logger.info(f"Loading index from {STORAGE_DIR}...")
+    storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
+    index = load_index_from_storage(storage_context)
+    logger.info(f"Finished loading index from {STORAGE_DIR}")
+    return index
diff --git a/templates/components/vectordbs/python/none/vectordb.py b/templates/components/vectordbs/python/none/vectordb.py
deleted file mode 100644
index 279f7a51..00000000
--- a/templates/components/vectordbs/python/none/vectordb.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import os
-
-from llama_index.core.vector_stores import SimpleVectorStore
-from app.constants import STORAGE_DIR
-
-
-def get_vector_store():
-    if not os.path.exists(STORAGE_DIR):
-        vector_store = SimpleVectorStore()
-    else:
-        vector_store = SimpleVectorStore.from_persist_dir(STORAGE_DIR)
-    vector_store.stores_text = True
-    return vector_store
diff --git a/templates/components/vectordbs/python/pg/__init__.py b/templates/components/vectordbs/python/pg/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/templates/components/vectordbs/python/pg/constants.py b/templates/components/vectordbs/python/pg/constants.py
new file mode 100644
index 00000000..a4ebd918
--- /dev/null
+++ b/templates/components/vectordbs/python/pg/constants.py
@@ -0,0 +1,2 @@
+PGVECTOR_SCHEMA = "public"
+PGVECTOR_TABLE = "llamaindex_embedding"
\ No newline at end of file
diff --git a/templates/components/vectordbs/python/pg/generate.py b/templates/components/vectordbs/python/pg/generate.py
new file mode 100644
index 00000000..79fa3bd7
--- /dev/null
+++ b/templates/components/vectordbs/python/pg/generate.py
@@ -0,0 +1,35 @@
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import logging
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.core.storage import StorageContext
+
+from app.engine.loaders import get_documents
+from app.settings import init_settings
+from app.engine.utils import init_pg_vector_store_from_env
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+
+def generate_datasource():
+    init_settings()
+    logger.info("Creating new index")
+    # load the documents and create the index
+    documents = get_documents()
+    store = init_pg_vector_store_from_env()
+    storage_context = StorageContext.from_defaults(vector_store=store)
+    VectorStoreIndex.from_documents(
+        documents,
+        storage_context=storage_context,
+        show_progress=True,  # this will show you a progress bar as the embeddings are created
+    )
+    logger.info(
+        f"Successfully created embeddings in the PG vector store, schema={store.schema_name} table={store.table_name}"
+    )
+
+
+if __name__ == "__main__":
+    generate_datasource()
diff --git a/templates/components/vectordbs/python/pg/index.py b/templates/components/vectordbs/python/pg/index.py
new file mode 100644
index 00000000..3c4f3180
--- /dev/null
+++ b/templates/components/vectordbs/python/pg/index.py
@@ -0,0 +1,13 @@
+import logging
+from llama_index.core.indices.vector_store import VectorStoreIndex
+from app.engine.utils import init_pg_vector_store_from_env
+
+logger = logging.getLogger("uvicorn")
+
+
+def get_index():
+    logger.info("Connecting to index from PGVector...")
+    store = init_pg_vector_store_from_env()
+    index = VectorStoreIndex.from_vector_store(store)
+    logger.info("Finished connecting to index from PGVector.")
+    return index
diff --git a/templates/components/vectordbs/python/pg/vectordb.py b/templates/components/vectordbs/python/pg/utils.py
similarity index 84%
rename from templates/components/vectordbs/python/pg/vectordb.py
rename to templates/components/vectordbs/python/pg/utils.py
index da5eb1a2..39127846 100644
--- a/templates/components/vectordbs/python/pg/vectordb.py
+++ b/templates/components/vectordbs/python/pg/utils.py
@@ -1,13 +1,10 @@
 import os
 from llama_index.vector_stores.postgres import PGVectorStore
 from urllib.parse import urlparse
+from app.engine.constants import PGVECTOR_SCHEMA, PGVECTOR_TABLE
 
-STORAGE_DIR = "storage"
-PGVECTOR_SCHEMA = "public"
-PGVECTOR_TABLE = "llamaindex_embedding"
 
-
-def get_vector_store():
+def init_pg_vector_store_from_env():
     original_conn_string = os.environ.get("PG_CONNECTION_STRING")
     if original_conn_string is None or original_conn_string == "":
         raise ValueError("PG_CONNECTION_STRING environment variable is not set.")
@@ -27,5 +24,4 @@ def get_vector_store():
         async_connection_string=async_conn_string,
         schema_name=PGVECTOR_SCHEMA,
         table_name=PGVECTOR_TABLE,
-        embed_dim=int(os.environ.get("EMBEDDING_DIM", 768)),
     )
diff --git a/templates/components/vectordbs/python/pinecone/__init__.py b/templates/components/vectordbs/python/pinecone/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/templates/components/vectordbs/python/pinecone/generate.py b/templates/components/vectordbs/python/pinecone/generate.py
new file mode 100644
index 00000000..5f233ba2
--- /dev/null
+++ b/templates/components/vectordbs/python/pinecone/generate.py
@@ -0,0 +1,39 @@
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import os
+import logging
+from llama_index.core.storage import StorageContext
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.vector_stores.pinecone import PineconeVectorStore
+from app.settings import init_settings
+from app.engine.loaders import get_documents
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+
+def generate_datasource():
+    init_settings()
+    logger.info("Creating new index")
+    # load the documents and create the index
+    documents = get_documents()
+    store = PineconeVectorStore(
+        api_key=os.environ["PINECONE_API_KEY"],
+        index_name=os.environ["PINECONE_INDEX_NAME"],
+        environment=os.environ["PINECONE_ENVIRONMENT"],
+    )
+    storage_context = StorageContext.from_defaults(vector_store=store)
+    VectorStoreIndex.from_documents(
+        documents,
+        storage_context=storage_context,
+        show_progress=True,  # this will show you a progress bar as the embeddings are created
+    )
+    logger.info(
+        f"Successfully created embeddings and save to your Pinecone index {os.environ['PINECONE_INDEX_NAME']}"
+    )
+
+
+if __name__ == "__main__":
+    generate_datasource()
diff --git a/templates/components/vectordbs/python/pinecone/index.py b/templates/components/vectordbs/python/pinecone/index.py
new file mode 100644
index 00000000..98824ffd
--- /dev/null
+++ b/templates/components/vectordbs/python/pinecone/index.py
@@ -0,0 +1,20 @@
+import logging
+import os
+
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.vector_stores.pinecone import PineconeVectorStore
+
+
+logger = logging.getLogger("uvicorn")
+
+
+def get_index():
+    logger.info("Connecting to index from Pinecone...")
+    store = PineconeVectorStore(
+        api_key=os.environ["PINECONE_API_KEY"],
+        index_name=os.environ["PINECONE_INDEX_NAME"],
+        environment=os.environ["PINECONE_ENVIRONMENT"],
+    )
+    index = VectorStoreIndex.from_vector_store(store)
+    logger.info("Finished connecting to index from Pinecone.")
+    return index
diff --git a/templates/components/vectordbs/python/pinecone/vectordb.py b/templates/components/vectordbs/python/pinecone/vectordb.py
deleted file mode 100644
index d6ff2cf8..00000000
--- a/templates/components/vectordbs/python/pinecone/vectordb.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import os
-from llama_index.vector_stores.pinecone import PineconeVectorStore
-
-
-def get_vector_store():
-    store = PineconeVectorStore(
-        api_key=os.environ["PINECONE_API_KEY"],
-        index_name=os.environ["PINECONE_INDEX_NAME"],
-        environment=os.environ["PINECONE_ENVIRONMENT"],
-    )
-    return store
diff --git a/templates/components/vectordbs/python/qdrant/__init__.py b/templates/components/vectordbs/python/qdrant/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/templates/components/vectordbs/python/qdrant/generate.py b/templates/components/vectordbs/python/qdrant/generate.py
new file mode 100644
index 00000000..db7c055e
--- /dev/null
+++ b/templates/components/vectordbs/python/qdrant/generate.py
@@ -0,0 +1,37 @@
+import logging
+import os
+from app.engine.loaders import get_documents
+from app.settings import init_settings
+from dotenv import load_dotenv
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.core.storage import StorageContext
+from llama_index.vector_stores.qdrant import QdrantVectorStore
+load_dotenv()
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+
+def generate_datasource():
+    init_settings()
+    logger.info("Creating new index with Qdrant")
+    # load the documents and create the index
+    documents = get_documents()
+    store = QdrantVectorStore(
+        collection_name=os.getenv("QDRANT_COLLECTION"),
+        url=os.getenv("QDRANT_URL"),
+        api_key=os.getenv("QDRANT_API_KEY"),
+    )
+    storage_context = StorageContext.from_defaults(vector_store=store)
+    VectorStoreIndex.from_documents(
+        documents,
+        storage_context=storage_context,
+        show_progress=True,  # this will show you a progress bar as the embeddings are created
+    )
+    logger.info(
+        f"Successfully uploaded documents to the {os.getenv('QDRANT_COLLECTION')} collection."
+    )
+
+
+if __name__ == "__main__":
+    generate_datasource()
diff --git a/templates/components/vectordbs/python/qdrant/index.py b/templates/components/vectordbs/python/qdrant/index.py
new file mode 100644
index 00000000..0a388d8a
--- /dev/null
+++ b/templates/components/vectordbs/python/qdrant/index.py
@@ -0,0 +1,20 @@
+import logging
+import os
+
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.vector_stores.qdrant import QdrantVectorStore
+
+
+logger = logging.getLogger("uvicorn")
+
+
+def get_index():
+    logger.info("Connecting to Qdrant collection..")
+    store = QdrantVectorStore(
+        collection_name=os.getenv("QDRANT_COLLECTION"),
+        url=os.getenv("QDRANT_URL"),
+        api_key=os.getenv("QDRANT_API_KEY"),
+    )
+    index = VectorStoreIndex.from_vector_store(store)
+    logger.info("Finished connecting to Qdrant collection.")
+    return index
diff --git a/templates/components/vectordbs/python/qdrant/vectordb.py b/templates/components/vectordbs/python/qdrant/vectordb.py
deleted file mode 100644
index 5f36c202..00000000
--- a/templates/components/vectordbs/python/qdrant/vectordb.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import os
-from llama_index.vector_stores.qdrant import QdrantVectorStore
-
-
-def get_vector_store():
-    store = QdrantVectorStore(
-        collection_name=os.getenv("QDRANT_COLLECTION"),
-        url=os.getenv("QDRANT_URL"),
-        api_key=os.getenv("QDRANT_API_KEY"),
-    )
-    return store
diff --git a/templates/types/streaming/fastapi/app/constants.py b/templates/types/streaming/fastapi/app/constants.py
deleted file mode 100644
index 61daefe5..00000000
--- a/templates/types/streaming/fastapi/app/constants.py
+++ /dev/null
@@ -1 +0,0 @@
-STORAGE_DIR = "storage"  # directory to save the stores to (document store and if used, the `SimpleVectorStore`)
diff --git a/templates/types/streaming/fastapi/app/engine/generate.py b/templates/types/streaming/fastapi/app/engine/generate.py
deleted file mode 100644
index 3e1686dd..00000000
--- a/templates/types/streaming/fastapi/app/engine/generate.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from dotenv import load_dotenv
-
-load_dotenv()
-
-import os
-import logging
-from llama_index.core.settings import Settings
-from llama_index.core.ingestion import IngestionPipeline
-from llama_index.core.node_parser import SentenceSplitter
-from llama_index.core.vector_stores import SimpleVectorStore
-from llama_index.core.storage.docstore import SimpleDocumentStore
-from app.constants import STORAGE_DIR
-from app.settings import init_settings
-from app.engine.loaders import get_documents
-from app.engine.vectordb import get_vector_store
-
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
-
-
-def get_doc_store():
-    if not os.path.exists(STORAGE_DIR):
-        docstore = SimpleDocumentStore()
-        return docstore
-    else:
-        return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
-
-
-def generate_datasource():
-    init_settings()
-    logger.info("Creating new index")
-
-    # load the documents and create the index
-    documents = get_documents()
-    docstore = get_doc_store()
-    vector_store = get_vector_store()
-
-    # Create ingestion pipeline
-    ingestion_pipeline = IngestionPipeline(
-        transformations=[
-            SentenceSplitter(
-                chunk_size=Settings.chunk_size,
-                chunk_overlap=Settings.chunk_overlap,
-            ),
-            Settings.embed_model,
-        ],
-        docstore=docstore,
-        docstore_strategy="upserts_and_delete",
-    )
-
-    # llama_index having an typing issue when passing vector_store to IngestionPipeline
-    # so we need to set it manually after initialization
-    ingestion_pipeline.vector_store = vector_store
-
-    # Run the ingestion pipeline and store the results
-    ingestion_pipeline.run(show_progress=True, documents=documents)
-
-    # Default vector store only keeps data in memory, so we need to persist it
-    # Can remove if using a different vector store
-    if isinstance(vector_store, SimpleVectorStore):
-        vector_store.persist(os.path.join(STORAGE_DIR, "vector_store.json"))
-    # Persist the docstore to apply ingestion strategy
-    docstore.persist(os.path.join(STORAGE_DIR, "docstore.json"))
-
-    logger.info("Finished creating new index.")
-
-
-if __name__ == "__main__":
-    generate_datasource()
diff --git a/templates/types/streaming/fastapi/app/engine/index.py b/templates/types/streaming/fastapi/app/engine/index.py
deleted file mode 100644
index 3cc2beb7..00000000
--- a/templates/types/streaming/fastapi/app/engine/index.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import logging
-from llama_index.core.indices.vector_store import VectorStoreIndex
-from app.engine.vectordb import get_vector_store
-
-logger = logging.getLogger("uvicorn")
-
-
-def get_index():
-    logger.info("Loading the index...")
-    store = get_vector_store()
-    index = VectorStoreIndex.from_vector_store(store)
-    logger.info("Loaded index successfully.")
-    return index
-- 
GitLab