diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a1398b5c90b92927831f8743b0304497d05a797f..531933cf10f601fce96c40825be825fbb6b98152 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -53,3 +53,8 @@ repos:
     hooks:
       - id: toml-sort-fix
         exclude: poetry.lock
+  - repo: https://github.com/adamchainz/blacken-docs
+    rev: "1.16.0"
+    hooks:
+      - id: blacken-docs
+        additional_dependencies: [black==23.10.1]
diff --git a/README.md b/README.md
index 7be75112dfb2cd49d9eac5e807a1c123d3ffa72c..a42ec47e76f6a9a24d1d8bbabbe7a7960e753dd3 100644
--- a/README.md
+++ b/README.md
@@ -70,9 +70,11 @@ To build a simple vector store index using OpenAI:
 
 ```python
 import os
+
 os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"
 
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
+
 documents = SimpleDirectoryReader("YOUR_DATA_DIRECTORY").load_data()
 index = VectorStoreIndex.from_documents(documents)
 ```
@@ -81,22 +83,26 @@ To build a simple vector store index using non-OpenAI LLMs, e.g. Llama 2 hosted
 
 ```python
 import os
+
 os.environ["REPLICATE_API_TOKEN"] = "YOUR_REPLICATE_API_TOKEN"
 
 from llama_index.llms import Replicate
+
 llama2_7b_chat = "meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e"
 llm = Replicate(
     model=llama2_7b_chat,
     temperature=0.01,
-    additional_kwargs={"top_p": 1, "max_new_tokens":300}
+    additional_kwargs={"top_p": 1, "max_new_tokens": 300},
 )
 
 from llama_index.embeddings import HuggingFaceEmbedding
 from llama_index import ServiceContext
+
 embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
 
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
+
 documents = SimpleDirectoryReader("YOUR_DATA_DIRECTORY").load_data()
 index = VectorStoreIndex.from_documents(documents, service_context=service_context)
 ```
@@ -121,7 +127,7 @@ To reload from disk:
 from llama_index import StorageContext, load_index_from_storage
 
 # rebuild storage context
-storage_context = StorageContext.from_defaults(persist_dir='./storage')
+storage_context = StorageContext.from_defaults(persist_dir="./storage")
 # load index
 index = load_index_from_storage(storage_context)
 ```
diff --git a/docs/community/faq/llms.md b/docs/community/faq/llms.md
index 50810a8f7a797c697d9694664e05481894687665..58aff0e0bab8942d673de74bd268a3d1c8dba834 100644
--- a/docs/community/faq/llms.md
+++ b/docs/community/faq/llms.md
@@ -53,7 +53,9 @@ llm_predictor = LLMPredictor(system_prompt="Always respond in Italian.")
 
 service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
 
-query_engine = load_index_from_storage(storage_context, service_context=service_context).as_query_engine()
+query_engine = load_index_from_storage(
+    storage_context, service_context=service_context
+).as_query_engine()
 ```
 
 ---
diff --git a/docs/community/integrations/chatgpt_plugins.md b/docs/community/integrations/chatgpt_plugins.md
index db410611ccdda118d3cb98961acfda9cc4a51c80..456a77d1ae9b0a23c3b42fc845c04852f0477e15 100644
--- a/docs/community/integrations/chatgpt_plugins.md
+++ b/docs/community/integrations/chatgpt_plugins.md
@@ -33,6 +33,7 @@ loader = SimpleWebPageReader(html_to_text=True)
 url = "http://www.paulgraham.com/worked.html"
 documents = loader.load_data(urls=[url])
 
+
 # Convert LlamaIndex Documents to JSON format
 def dump_docs_to_json(documents: List[Document], out_path: str) -> Dict:
     """Convert LlamaIndex Documents to JSON format and save it."""
@@ -51,8 +52,7 @@ def dump_docs_to_json(documents: List[Document], out_path: str) -> Dict:
         }
         result_json.append(cur_dict)
 
-    json.dump(result_json, open(out_path, 'w'))
-
+    json.dump(result_json, open(out_path, "w"))
 ```
 
 For more details, check out the [full example notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/chatgpt_plugin/ChatGPT_Retrieval_Plugin_Upload.ipynb).
@@ -72,22 +72,19 @@ import os
 # load documents
 bearer_token = os.getenv("BEARER_TOKEN")
 reader = ChatGPTRetrievalPluginReader(
-    endpoint_url="http://localhost:8000",
-    bearer_token=bearer_token
+    endpoint_url="http://localhost:8000", bearer_token=bearer_token
 )
 documents = reader.load_data("What did the author do growing up?")
 
 # build and query index
 from llama_index import SummaryIndex
+
 index = SummaryIndex.from_documents(documents)
 # set Logging to DEBUG for more detailed outputs
-query_engine = vector_index.as_query_engine(
-    response_mode="compact"
-)
+query_engine = vector_index.as_query_engine(response_mode="compact")
 response = query_engine.query(
     "Summarize the retrieved content and describe what the author did growing up",
 )
-
 ```
 
 For more details, check out the [full example notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/chatgpt_plugin/ChatGPTRetrievalPluginReaderDemo.ipynb).
@@ -107,7 +104,7 @@ from llama_index import SimpleDirectoryReader
 import os
 
 # load documents
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 
 # build index
 bearer_token = os.getenv("BEARER_TOKEN")
@@ -124,7 +121,6 @@ query_engine = vector_index.as_query_engine(
     response_mode="compact",
 )
 response = query_engine.query("What did the author do growing up?")
-
 ```
 
 For more details, check out the [full example notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/chatgpt_plugin/ChatGPTRetrievalPluginIndexDemo.ipynb).
diff --git a/docs/community/integrations/deepeval.md b/docs/community/integrations/deepeval.md
index 6d03aa60089cb257b8789fdee598223745671d05..d927efd6c4b1c35b89a04313207f7510fbb6deaf 100644
--- a/docs/community/integrations/deepeval.md
+++ b/docs/community/integrations/deepeval.md
@@ -49,7 +49,6 @@ You can more about the [DeepEval Framework](https://docs.confident-ai.com/docs/f
 DeepEval integrates nicely with LlamaIndex's `BaseEvaluator` class. Below is an example of the factual consistency documentation.
 
 ```python
-
 from llama_index.response.schema import Response
 from typing import List
 from llama_index.schema import Document
@@ -84,7 +83,7 @@ from llama_index import download_loader
 WikipediaReader = download_loader("WikipediaReader")
 
 loader = WikipediaReader()
-documents = loader.load_data(pages=['Tokyo'])
+documents = loader.load_data(pages=["Tokyo"])
 tree_index = TreeIndex.from_documents(documents=documents)
 vector_index = VectorStoreIndex.from_documents(
     documents, service_context=service_context_gpt4
diff --git a/docs/community/integrations/graphsignal.md b/docs/community/integrations/graphsignal.md
index 69745a987a4b2d5f3034b80219fb96baad2c80cb..d6ad8ddd2a71ec2f60011d1f7a87a59f97b4469e 100644
--- a/docs/community/integrations/graphsignal.md
+++ b/docs/community/integrations/graphsignal.md
@@ -18,7 +18,7 @@ pip install graphsignal
 import graphsignal
 
 # Provide an API key directly or via GRAPHSIGNAL_API_KEY environment variable
-graphsignal.configure(api_key='my-api-key', deployment='my-llama-index-app-prod')
+graphsignal.configure(api_key="my-api-key", deployment="my-llama-index-app-prod")
 ```
 
 You can get an API key [here](https://app.graphsignal.com/).
@@ -30,7 +30,7 @@ See the [Quick Start guide](https://graphsignal.com/docs/guides/quick-start/), [
 To additionally trace any function or code, you can use a decorator or a context manager:
 
 ```python
-with graphsignal.start_trace('load-external-data'):
+with graphsignal.start_trace("load-external-data"):
     reader.load_data()
 ```
 
diff --git a/docs/community/integrations/guidance.md b/docs/community/integrations/guidance.md
index 4b6f5e16b524bcfdfd882f5a41ba4299cf5c27ab..4004c8a56d97018708d25f3c82b527c3e4bde0be 100644
--- a/docs/community/integrations/guidance.md
+++ b/docs/community/integrations/guidance.md
@@ -21,6 +21,7 @@ class Song(BaseModel):
     title: str
     length_seconds: int
 
+
 class Album(BaseModel):
     name: str
     artist: str
@@ -38,23 +39,30 @@ and supplying a suitable prompt template.
 program = GuidancePydanticProgram(
     output_cls=Album,
     prompt_template_str="Generate an example album, with an artist and a list of songs. Using the movie {{movie_name}} as inspiration",
-    guidance_llm=OpenAI('text-davinci-003'),
+    guidance_llm=OpenAI("text-davinci-003"),
     verbose=True,
 )
-
 ```
 
 Now we can run the program by calling it with additional user input.
 Here let's go for something spooky and create an album inspired by the Shining.
 
 ```python
-output = program(movie_name='The Shining')
+output = program(movie_name="The Shining")
 ```
 
 We have our pydantic object:
 
 ```python
-Album(name='The Shining', artist='Jack Torrance', songs=[Song(title='All Work and No Play', length_seconds=180), Song(title='The Overlook Hotel', length_seconds=240), Song(title='The Shining', length_seconds=210)])
+Album(
+    name="The Shining",
+    artist="Jack Torrance",
+    songs=[
+        Song(title="All Work and No Play", length_seconds=180),
+        Song(title="The Overlook Hotel", length_seconds=240),
+        Song(title="The Shining", length_seconds=210),
+    ],
+)
 ```
 
 You can play with [this notebook](/examples/output_parsing/guidance_pydantic_program.ipynb) for more details.
@@ -73,14 +81,16 @@ from llama_index.question_gen.guidance_generator import GuidanceQuestionGenerato
 from guidance.llms import OpenAI as GuidanceOpenAI
 
 # define guidance based question generator
-question_gen = GuidanceQuestionGenerator.from_defaults(guidance_llm=GuidanceOpenAI('text-davinci-003'), verbose=False)
+question_gen = GuidanceQuestionGenerator.from_defaults(
+    guidance_llm=GuidanceOpenAI("text-davinci-003"), verbose=False
+)
 
 # define query engine tools
 query_engine_tools = ...
 
 # construct sub-question query engine
 s_engine = SubQuestionQueryEngine.from_defaults(
-    question_gen=question_gen  # use guidance based question_gen defined above
+    question_gen=question_gen,  # use guidance based question_gen defined above
     query_engine_tools=query_engine_tools,
 )
 ```
diff --git a/docs/community/integrations/lmformatenforcer.md b/docs/community/integrations/lmformatenforcer.md
index a4d344a12eaa1cad162d593dddaed09ab10fc63a..24306b1b0572a72497e3a1a619b84d3e4d48651a 100644
--- a/docs/community/integrations/lmformatenforcer.md
+++ b/docs/community/integrations/lmformatenforcer.md
@@ -17,6 +17,7 @@ class Song(BaseModel):
     title: str
     length_seconds: int
 
+
 class Album(BaseModel):
     name: str
     artist: str
@@ -35,20 +36,27 @@ program = LMFormatEnforcerPydanticProgram(
     llm=LlamaCPP(),
     verbose=True,
 )
-
 ```
 
 Now we can run the program by calling it with additional user input.
 Here let's go for something spooky and create an album inspired by the Shining.
 
 ```python
-output = program(movie_name='The Shining')
+output = program(movie_name="The Shining")
 ```
 
 We have our pydantic object:
 
 ```python
-Album(name='The Shining: A Musical Journey Through the Haunted Halls of the Overlook Hotel', artist='The Shining Choir', songs=[Song(title='Redrum', length_seconds=300), Song(title='All Work and No Play Makes Jack a Dull Boy', length_seconds=240), Song(title="Heeeeere's Johnny!", length_seconds=180)])
+Album(
+    name="The Shining: A Musical Journey Through the Haunted Halls of the Overlook Hotel",
+    artist="The Shining Choir",
+    songs=[
+        Song(title="Redrum", length_seconds=300),
+        Song(title="All Work and No Play Makes Jack a Dull Boy", length_seconds=240),
+        Song(title="Heeeeere's Johnny!", length_seconds=180),
+    ],
+)
 ```
 
 You can play with [this notebook](/examples/output_parsing/lmformatenforcer_pydantic_program.ipynb) for more details.
diff --git a/docs/community/integrations/managed_indices.md b/docs/community/integrations/managed_indices.md
index 83bc5cd409e907798106d1b7fe09d9d4dd30e084..c8769da348b5548eaba190b5e483ffc0f38e4143 100644
--- a/docs/community/integrations/managed_indices.md
+++ b/docs/community/integrations/managed_indices.md
@@ -27,8 +27,13 @@ from llama_index.managed import VectaraIndex
 vectara_customer_id = os.environ.get("VECTARA_CUSTOMER_ID")
 vectara_corpus_id = os.environ.get("VECTARA_CORPUS_ID")
 vectara_api_key = os.environ.get("VECTARA_API_KEY")
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
-index = VectaraIndex.from_documents(documents, vectara_customer_id=vectara_customer_id, vectara_corpus_id=vectara_corpus_id, vectara_api_key=vectara_api_key)
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
+index = VectaraIndex.from_documents(
+    documents,
+    vectara_customer_id=vectara_customer_id,
+    vectara_corpus_id=vectara_corpus_id,
+    vectara_api_key=vectara_api_key,
+)
 
 # Query index
 query_engine = index.as_query_engine()
@@ -42,7 +47,7 @@ from llama_index import ManagedIndex, SimpleDirectoryReade
 from llama_index.managed import VectaraIndex
 
 # Load documents and build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectaraIndex.from_documents(documents)
 
 # Query index
diff --git a/docs/community/integrations/trulens.md b/docs/community/integrations/trulens.md
index 07c435727fa8ab389134a5cabeecc9dde0d3ba25..037320cea188dc2ed457e5fb069627d3f35f9959 100644
--- a/docs/community/integrations/trulens.md
+++ b/docs/community/integrations/trulens.md
@@ -20,7 +20,6 @@ pip install trulens-eval
 
 ```python
 from trulens_eval import TruLlama
-
 ```
 
 ## Try it out!
diff --git a/docs/community/integrations/using_with_langchain.md b/docs/community/integrations/using_with_langchain.md
index 93957e34e03863664b253364f70795c1307e794a..11ce40d7f6022a5b0372dd7bc1f0473e3117bd2d 100644
--- a/docs/community/integrations/using_with_langchain.md
+++ b/docs/community/integrations/using_with_langchain.md
@@ -27,11 +27,10 @@ tool_config = IndexToolConfig(
     query_engine=query_engine,
     name=f"Vector Index",
     description=f"useful for when you want to answer queries about X",
-    tool_kwargs={"return_direct": True}
+    tool_kwargs={"return_direct": True},
 )
 
 tool = LlamaIndexTool.from_tool_config(tool_config)
-
 ```
 
 ### Llama Demo Notebook: Tool + Memory module
diff --git a/docs/community/integrations/vector_stores.md b/docs/community/integrations/vector_stores.md
index 77e5462aaecb2571677b5321a3c6f941f8f37070..47df4538e8cf58fa1b8273fb8ba2f3b828b2c8be 100644
--- a/docs/community/integrations/vector_stores.md
+++ b/docs/community/integrations/vector_stores.md
@@ -51,13 +51,12 @@ that's initialized as part of the default storage context.
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
 # Load documents and build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 
 # Query index
 query_engine = index.as_query_engine()
 response = query_engine.query("What did the author do growing up?")
-
 ```
 
 **Custom Vector Store Index Construction/Querying**
@@ -70,11 +69,11 @@ from llama_index.vector_stores import DeepLakeVectorStore
 
 # construct vector store and customize storage context
 storage_context = StorageContext.from_defaults(
-    vector_store = DeepLakeVectorStore(dataset_path="<dataset_path>")
+    vector_store=DeepLakeVectorStore(dataset_path="<dataset_path>")
 )
 
 # Load documents and build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 
 # Query index
@@ -103,6 +102,7 @@ Then connect and use Elasticsearch as a vector database with LlamaIndex
 
 ```python
 from llama_index.vector_stores import ElasticsearchStore
+
 vector_store = ElasticsearchStore(
     index_name="llm-project",
     es_url="http://localhost:9200",
@@ -127,10 +127,9 @@ Then connect and use Redis as a vector database with LlamaIndex
 
 ```python
 from llama_index.vector_stores import RedisVectorStore
+
 vector_store = RedisVectorStore(
-    index_name="llm-project",
-    redis_url="redis://localhost:6379",
-    overwrite=True
+    index_name="llm-project", redis_url="redis://localhost:6379", overwrite=True
 )
 ```
 
@@ -196,14 +195,13 @@ vector_store = WeaviateVectorStore(weaviate_client=client)
 Zep stores texts, metadata, and embeddings. All are returned in search results.
 
 ```python
-
 from llama_index.vector_stores.zep import ZepVectorStore
 
 vector_store = ZepVectorStore(
     api_url="<api_url>",
     api_key="<api_key>",
     collection_name="<unique_collection_name>",  # Can either be an existing collection or a new one
-    embedding_dimensions=1536 # Optional, required if creating a new collection
+    embedding_dimensions=1536,  # Optional, required if creating a new collection
 )
 
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
@@ -225,12 +223,7 @@ from llama_index.vector_stores import PineconeVectorStore
 # Creating a Pinecone index
 api_key = "api_key"
 pinecone.init(api_key=api_key, environment="us-west1-gcp")
-pinecone.create_index(
-    "quickstart",
-    dimension=1536,
-    metric="euclidean",
-    pod_type="p1"
-)
+pinecone.create_index("quickstart", dimension=1536, metric="euclidean", pod_type="p1")
 index = pinecone.Index("quickstart")
 
 # can define filters specific to this vector index (so you can
@@ -239,8 +232,7 @@ metadata_filters = {"title": "paul_graham_essay"}
 
 # construct vector store
 vector_store = PineconeVectorStore(
-    pinecone_index=index,
-    metadata_filters=metadata_filters
+    pinecone_index=index, metadata_filters=metadata_filters
 )
 ```
 
@@ -252,9 +244,7 @@ from llama_index.vector_stores import QdrantVectorStore
 
 # Creating a Qdrant vector store
 client = qdrant_client.QdrantClient(
-    host="<qdrant-host>",
-    api_key="<qdrant-api-key>",
-    https=True
+    host="<qdrant-host>", api_key="<qdrant-api-key>", https=True
 )
 collection_name = "paul_graham"
 
@@ -276,6 +266,7 @@ cassio.init(database_id="1234abcd-...", token="AstraCS:...")
 
 # For a Cassandra cluster:
 from cassandra.cluster import Cluster
+
 cluster = Cluster(["127.0.0.1"])
 cassio.init(session=cluster.connect(), keyspace="my_keyspace")
 
@@ -328,11 +319,7 @@ import pymilvus
 from llama_index.vector_stores import MilvusVectorStore
 
 # construct vector store
-vector_store = MilvusVectorStore(
-    uri='https://localhost:19530',
-    overwrite='True'
-)
-
+vector_store = MilvusVectorStore(uri="https://localhost:19530", overwrite="True")
 ```
 
 **Note**: `MilvusVectorStore` depends on the `pymilvus` library.
@@ -352,9 +339,7 @@ from llama_index.vector_stores import MilvusVectorStore
 
 # construct vector store
 vector_store = MilvusVectorStore(
-    uri='foo.vectordb.zillizcloud.com',
-    token="your_token_here",
-    overwrite='True'
+    uri="foo.vectordb.zillizcloud.com", token="your_token_here", overwrite="True"
 )
 ```
 
@@ -372,17 +357,15 @@ from llama_index.vector_stores import MyScaleVectorStore
 
 # Creating a MyScale client
 client = clickhouse_connect.get_client(
-    host='YOUR_CLUSTER_HOST',
+    host="YOUR_CLUSTER_HOST",
     port=8443,
-    username='YOUR_USERNAME',
-    password='YOUR_CLUSTER_PASSWORD'
+    username="YOUR_USERNAME",
+    password="YOUR_CLUSTER_PASSWORD",
 )
 
 
 # construct vector store
-vector_store = MyScaleVectorStore(
-    myscale_client=client
-)
+vector_store = MyScaleVectorStore(myscale_client=client)
 ```
 
 **Timescale**
@@ -391,7 +374,7 @@ vector_store = MyScaleVectorStore(
 from llama_index.vector_stores import TimescaleVectorStore
 
 vector_store = TimescaleVectorStore.from_params(
-    service_url='YOUR TIMESCALE SERVICE URL',
+    service_url="YOUR TIMESCALE SERVICE URL",
     table_name="paul_graham_essay",
 )
 ```
@@ -423,7 +406,7 @@ from llama_index.vector_stores import (
 )
 
 # construct vector store
-vector_store = DocArrayHnswVectorStore(work_dir='hnsw_index')
+vector_store = DocArrayHnswVectorStore(work_dir="hnsw_index")
 
 # alternatively, construct the in-memory vector store
 vector_store = DocArrayInMemoryVectorStore()
@@ -464,9 +447,8 @@ neo4j_vector = Neo4jVectorStore(
     username="neo4j",
     password="pleaseletmein",
     url="bolt://localhost:7687",
-    embed_dim=1536
+    embed_dim=1536,
 )
-
 ```
 
 **Azure Cognitive Search**
@@ -504,7 +486,7 @@ import dashvector
 from llama_index.vector_stores import DashVectorStore
 
 # init dashvector client
-client = dashvector.Client(api_key='your-dashvector-api-key')
+client = dashvector.Client(api_key="your-dashvector-api-key")
 
 # creating a DashVector collection
 client.create("quickstart", dimension=1536)
@@ -523,7 +505,6 @@ LlamaIndex supports loading data from a huge number of sources. See [Data Connec
 Chroma stores both documents and vectors. This is an example of how to use Chroma:
 
 ```python
-
 from llama_index.readers.chroma import ChromaReader
 from llama_index.indices import SummaryIndex
 
@@ -531,10 +512,10 @@ from llama_index.indices import SummaryIndex
 # This requires a collection name and a persist directory.
 reader = ChromaReader(
     collection_name="chroma_collection",
-    persist_directory="examples/data_connectors/chroma_collection"
+    persist_directory="examples/data_connectors/chroma_collection",
 )
 
-query_vector=[n1, n2, n3, ...]
+query_vector = [n1, n2, n3, ...]
 
 documents = reader.load_data(collection_name="demo", query_vector=query_vector, limit=5)
 index = SummaryIndex.from_documents(documents)
@@ -547,7 +528,6 @@ display(Markdown(f"<b>{response}</b>"))
 Qdrant also stores both documents and vectors. This is an example of how to use Qdrant:
 
 ```python
-
 from llama_index.readers.qdrant import QdrantReader
 
 reader = QdrantReader(host="localhost")
@@ -563,7 +543,6 @@ query_vector = [n1, n2, n3, ...]
 # for more details
 
 documents = reader.load_data(collection_name="demo", query_vector=query_vector, limit=5)
-
 ```
 
 NOTE: Since Weaviate can store a hybrid of document and vector objects, the user may either choose to explicitly specify `class_name` and `properties` in order to query documents, or they may choose to specify a raw GraphQL query. See below for usage.
@@ -575,7 +554,7 @@ NOTE: Since Weaviate can store a hybrid of document and vector objects, the user
 documents = reader.load_data(
     class_name="<class_name>",
     properties=["property1", "property2", "..."],
-    separate_documents=True
+    separate_documents=True,
 )
 
 # 2) example GraphQL query
@@ -598,7 +577,6 @@ NOTE: Both Pinecone and Faiss data loaders assume that the respective data sourc
 For instance, this is an example usage of the Pinecone data loader `PineconeReader`:
 
 ```python
-
 from llama_index.readers.pinecone import PineconeReader
 
 reader = PineconeReader(api_key=api_key, environment="us-west1-gcp")
@@ -608,12 +586,15 @@ id_to_text_map = {
     "id2": "text blob 2",
 }
 
-query_vector=[n1, n2, n3, ..]
+query_vector = [n1, n2, n3, ...]
 
 documents = reader.load_data(
-    index_name="quickstart", id_to_text_map=id_to_text_map, top_k=3, vector=query_vector, separate_documents=True
+    index_name="quickstart",
+    id_to_text_map=id_to_text_map,
+    top_k=3,
+    vector=query_vector,
+    separate_documents=True,
 )
-
 ```
 
 [Example notebooks can be found here](https://github.com/jerryjliu/llama_index/tree/main/docs/examples/data_connectors).
diff --git a/docs/getting_started/customization.rst b/docs/getting_started/customization.rst
index b0f7ea8723d9414b2245da6c486a3af0bde83093..f0a152d2b1f2e500486560135c58e10cb9514995 100644
--- a/docs/getting_started/customization.rst
+++ b/docs/getting_started/customization.rst
@@ -9,7 +9,7 @@ In this tutorial, we start with the code you wrote for the `starter example <sta
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     query_engine = index.as_query_engine()
     response = query_engine.query("What did the author do growing up?")
@@ -22,6 +22,7 @@ In this tutorial, we start with the code you wrote for the `starter example <sta
 .. code-block:: python
 
     from llama_index import ServiceContext
+
     service_context = ServiceContext.from_defaults(chunk_size=1000)
 
 The `ServiceContext </module_guides/supporting_modules/service_context.html>`_ is a bundle of services and configurations used across a LlamaIndex pipeline.
@@ -31,7 +32,7 @@ The `ServiceContext </module_guides/supporting_modules/service_context.html>`_ i
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents, service_context=service_context)
     query_engine = index.as_query_engine()
     response = query_engine.query("What did the author do growing up?")
@@ -59,7 +60,7 @@ The `ServiceContext </module_guides/supporting_modules/service_context.html>`_ i
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
     query_engine = index.as_query_engine()
     response = query_engine.query("What did the author do growing up?")
@@ -74,7 +75,7 @@ The `ServiceContext </module_guides/supporting_modules/service_context.html>`_ i
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     query_engine = index.as_query_engine(similarity_top_k=5)
     response = query_engine.query("What did the author do growing up?")
@@ -90,6 +91,7 @@ The `ServiceContext </module_guides/supporting_modules/service_context.html>`_ i
 
     from llama_index import ServiceContext
     from llama_index.llms import PaLM
+
     service_context = ServiceContext.from_defaults(llm=PaLM())
 
 You can learn more about `customizing LLMs </module_guides/models/llms.html>`_.
@@ -99,7 +101,7 @@ You can learn more about `customizing LLMs </module_guides/models/llms.html>`_.
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     query_engine = index.as_query_engine(service_context=service_context)
     response = query_engine.query("What did the author do growing up?")
@@ -115,9 +117,9 @@ You can learn more about `customizing LLMs </module_guides/models/llms.html>`_.
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
-    query_engine = index.as_query_engine(response_mode='tree_summarize')
+    query_engine = index.as_query_engine(response_mode="tree_summarize")
     response = query_engine.query("What did the author do growing up?")
     print(response)
 
@@ -133,7 +135,7 @@ You can learn more about `query engines <../core_modules/query_modules/query_eng
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     query_engine = index.as_query_engine(streaming=True)
     response = query_engine.query("What did the author do growing up?")
@@ -150,7 +152,7 @@ You can learn more about `streaming responses </module_guides/putting_it_all_tog
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     query_engine = index.as_chat_engine()
     response = query_engine.chat("What did the author do growing up?")
diff --git a/docs/getting_started/starter_example.md b/docs/getting_started/starter_example.md
index 10f15ff0c8c9b2a8fb551cdbfa176f9bbf51102a..656421d1c987419c2d107de68f3925e7ffcbe394 100644
--- a/docs/getting_started/starter_example.md
+++ b/docs/getting_started/starter_example.md
@@ -33,7 +33,7 @@ In the same folder where you created the `data` folder, create a file called `st
 ```python
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 ```
 
@@ -87,18 +87,23 @@ Of course, you don't get the benefits of persisting unless you load the data. So
 
 ```python
 import os.path
-from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage
+from llama_index import (
+    VectorStoreIndex,
+    SimpleDirectoryReader,
+    StorageContext,
+    load_index_from_storage,
+)
 
 # check if storage already exists
-if (not os.path.exists('./storage')):
+if not os.path.exists("./storage"):
     # load the documents and create the index
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     # store it for later
     index.storage_context.persist()
 else:
     # load the existing index
-    storage_context = StorageContext.from_defaults(persist_dir='./storage')
+    storage_context = StorageContext.from_defaults(persist_dir="./storage")
     index = load_index_from_storage(storage_context)
 
 # either way we can now query the index
diff --git a/docs/module_guides/deploying/agents/tools/llamahub_tools_guide.md b/docs/module_guides/deploying/agents/tools/llamahub_tools_guide.md
index 815b1eb9e973e7e4d5f42dd48df5de395867e451..1d9f4573fc6c4939cf7bc2ce3e8e923bc151c29a 100644
--- a/docs/module_guides/deploying/agents/tools/llamahub_tools_guide.md
+++ b/docs/module_guides/deploying/agents/tools/llamahub_tools_guide.md
@@ -32,9 +32,9 @@ from llama_hub.wikipedia.base import WikipediaReader
 from llama_index.tools.ondemand_loader_tool import OnDemandLoaderTool
 
 tool = OnDemandLoaderTool.from_defaults(
-	reader,
-	name="Wikipedia Tool",
-	description="A tool for loading data and querying articles from Wikipedia"
+    reader,
+    name="Wikipedia Tool",
+    description="A tool for loading data and querying articles from Wikipedia",
 )
 ```
 
@@ -58,8 +58,6 @@ tool = wiki_spec.to_tool_list()[1]
 
 # Create the Agent with load/search tools
 agent = OpenAIAgent.from_tools(
- LoadAndSearchToolSpec.from_defaults(
-    tool
- ).to_tool_list(), verbose=True
+    LoadAndSearchToolSpec.from_defaults(tool).to_tool_list(), verbose=True
 )
 ```
diff --git a/docs/module_guides/deploying/agents/tools/root.md b/docs/module_guides/deploying/agents/tools/root.md
index 70b8e35df3fe6cbe9a3da561b0baf5e8a0333e43..0d4ade35d617de381c4039f2a3a6d6d33a133a4f 100644
--- a/docs/module_guides/deploying/agents/tools/root.md
+++ b/docs/module_guides/deploying/agents/tools/root.md
@@ -31,7 +31,6 @@ from llama_hub.tools.gmail.base import GmailToolSpec
 
 tool_spec = GmailToolSpec()
 agent = OpenAIAgent.from_tools(tool_spec.to_tool_list(), verbose=True)
-
 ```
 
 See our Usage Pattern Guide for more details.
diff --git a/docs/module_guides/deploying/agents/tools/usage_pattern.md b/docs/module_guides/deploying/agents/tools/usage_pattern.md
index 904dadd47e2547a6bd24bfd1c53c9ed93b234b57..a7f687848a3a5191f7a9884521ee26e681cb9865 100644
--- a/docs/module_guides/deploying/agents/tools/usage_pattern.md
+++ b/docs/module_guides/deploying/agents/tools/usage_pattern.md
@@ -14,6 +14,7 @@ from llama_index.tools.function_tool import FunctionTool
 # Use a tool spec from Llama-Hub
 tool_spec = GmailToolSpec()
 
+
 # Create a custom tool. Type annotations and docstring are used for the
 # tool definition sent to the Function calling API.
 def add_numbers(x: int, y: int) -> int:
@@ -22,13 +23,16 @@ def add_numbers(x: int, y: int) -> int:
     """
     return x + y
 
+
 function_tool = FunctionTool.from_defaults(fn=add_numbers)
 
 tools = tool_spec.to_tool_list() + [function_tool]
 agent = OpenAIAgent.from_tools(tools, verbose=True)
 
 # use agent
-agent.chat("Can you create a new email to helpdesk and support @example.com about a service outage")
+agent.chat(
+    "Can you create a new email to helpdesk and support @example.com about a service outage"
+)
 ```
 
 Full Tool details can be found on our [LlamaHub](https://llamahub.ai) page. Each tool contains a "Usage" section showing how that tool can be used.
@@ -46,5 +50,4 @@ from langchain.agents import initialize_agent
 agent_executor = initialize_agent(
     langchain_tools, llm, agent="conversational-react-description", memory=memory
 )
-
 ```
diff --git a/docs/module_guides/deploying/agents/usage_pattern.md b/docs/module_guides/deploying/agents/usage_pattern.md
index 4122171d1151909e6cab25d7a53c09f9ebac868f..3b77b84770539916adcc66bfef883832a220b6f9 100644
--- a/docs/module_guides/deploying/agents/usage_pattern.md
+++ b/docs/module_guides/deploying/agents/usage_pattern.md
@@ -10,11 +10,13 @@ from llama_index.tools import FunctionTool
 from llama_index.llms import OpenAI
 from llama_index.agent import ReActAgent
 
+
 # define sample Tool
 def multiply(a: int, b: int) -> int:
     """Multiple two integers and returns the result integer"""
     return a * b
 
+
 multiply_tool = FunctionTool.from_defaults(fn=multiply)
 
 # initialize llm
@@ -37,7 +39,6 @@ agent.chat("What is 2123 * 215123")
 It is easy to wrap query engines as tools for an agent as well. Simply do the following:
 
 ```python
-
 from llama_index.agent import ReActAgent
 from llama_index.tools import QueryEngineTool
 
@@ -66,7 +67,6 @@ query_engine_tools = [
 
 # initialize ReAct agent
 agent = ReActAgent.from_tools(query_engine_tools, llm=llm, verbose=True)
-
 ```
 
 ## Use other agents as Tools
@@ -81,15 +81,13 @@ query_engine_tools = [
     QueryEngineTool(
         query_engine=sql_agent,
         metadata=ToolMetadata(
-            name="sql_agent",
-            description="Agent that can execute SQL queries."
+            name="sql_agent", description="Agent that can execute SQL queries."
         ),
     ),
     QueryEngineTool(
         query_engine=gmail_agent,
         metadata=ToolMetadata(
-            name="gmail_agent",
-            description="Tool that can send emails on Gmail."
+            name="gmail_agent", description="Tool that can send emails on Gmail."
         ),
     ),
 ]
@@ -189,5 +187,4 @@ agent = OpenAIAgent.from_tools(
 
 # should output a query plan to call march, june, and september tools
 response = agent.query("Analyze Uber revenue growth in March, June, and September")
-
 ```
diff --git a/docs/module_guides/deploying/chat_engines/usage_pattern.md b/docs/module_guides/deploying/chat_engines/usage_pattern.md
index b04e3fc31313401522ec35e8c0835446707487fb..29f3f4ec02297fd7b1031e7082ae8ff0d1e467b5 100644
--- a/docs/module_guides/deploying/chat_engines/usage_pattern.md
+++ b/docs/module_guides/deploying/chat_engines/usage_pattern.md
@@ -39,10 +39,7 @@ Configuring a chat engine is very similar to configuring a query engine.
 You can directly build and configure a chat engine from an index in 1 line of code:
 
 ```python
-chat_engine = index.as_chat_engine(
-    chat_mode='condense_question',
-    verbose=True
-)
+chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
 ```
 
 > Note: you can access different chat engines by specifying the `chat_mode` as a kwarg. `condense_question` corresponds to `CondenseQuestionChatEngine`, `react` corresponds to `ReActChatEngine`, `context` corresponds to a `ContextChatEngine`.
@@ -72,11 +69,12 @@ Here's an example where we configure the following:
 - print verbose debug message.
 
 ```python
-from llama_index.prompts  import PromptTemplate
+from llama_index.prompts import PromptTemplate
 from llama_index.llms import ChatMessage, MessageRole
 from llama_index.chat_engine.condense_question import CondenseQuestionChatEngine
 
-custom_prompt = PromptTemplate("""\
+custom_prompt = PromptTemplate(
+    """\
 Given a conversation (between Human and Assistant) and a follow up message from Human, \
 rewrite the message to be a standalone question that captures all relevant context \
 from the conversation.
@@ -88,18 +86,16 @@ from the conversation.
 {question}
 
 <Standalone question>
-""")
+"""
+)
 
 # list of `ChatMessage` objects
 custom_chat_history = [
     ChatMessage(
         role=MessageRole.USER,
-        content='Hello assistant, we are having a insightful discussion about Paul Graham today.'
+        content="Hello assistant, we are having a insightful discussion about Paul Graham today.",
     ),
-    ChatMessage(
-        role=MessageRole.ASSISTANT,
-        content='Okay, sounds good.'
-    )
+    ChatMessage(role=MessageRole.ASSISTANT, content="Okay, sounds good."),
 ]
 
 query_engine = index.as_query_engine()
@@ -107,7 +103,7 @@ chat_engine = CondenseQuestionChatEngine.from_defaults(
     query_engine=query_engine,
     condense_question_prompt=custom_prompt,
     chat_history=custom_chat_history,
-    verbose=True
+    verbose=True,
 )
 ```
 
diff --git a/docs/module_guides/deploying/query_engine/streaming.md b/docs/module_guides/deploying/query_engine/streaming.md
index 20a9974a9bcd246e9bd849719656b8371e7453b9..dfa5218fc7703be2c2be4cd372843f1c189a1240 100644
--- a/docs/module_guides/deploying/query_engine/streaming.md
+++ b/docs/module_guides/deploying/query_engine/streaming.md
@@ -14,10 +14,7 @@ Configure query engine to use streaming:
 If you are using the high-level API, set `streaming=True` when building a query engine.
 
 ```python
-query_engine = index.as_query_engine(
-    streaming=True,
-    similarity_top_k=1
-)
+query_engine = index.as_query_engine(streaming=True, similarity_top_k=1)
 ```
 
 If you are using the low-level API to compose the query engine,
@@ -25,6 +22,7 @@ pass `streaming=True` when constructing the `Response Synthesizer`:
 
 ```python
 from llama_index import get_response_synthesizer
+
 synth = get_response_synthesizer(streaming=True, ...)
 query_engine = RetrieverQueryEngine(response_synthesizer=synth, ...)
 ```
@@ -49,6 +47,7 @@ You can obtain a `Generator` from the streaming response and iterate over the to
 ```python
 for text in streaming_response.response_gen:
     # do something with text as they arrive.
+    pass
 ```
 
 Alternatively, if you just want to print the text as they arrive:
diff --git a/docs/module_guides/deploying/query_engine/usage_pattern.md b/docs/module_guides/deploying/query_engine/usage_pattern.md
index f97fc648dc142cded34c3da8854f146f53bb87ca..a106e749872539f67cef888ba559d282390c05c3 100644
--- a/docs/module_guides/deploying/query_engine/usage_pattern.md
+++ b/docs/module_guides/deploying/query_engine/usage_pattern.md
@@ -15,7 +15,7 @@ To learn how to build an index, see [Indexing](/module_guides/indexing/indexing.
 Ask a question over your data
 
 ```python
-response = query_engine.query('Who is Paul Graham?')
+response = query_engine.query("Who is Paul Graham?")
 ```
 
 ## Configuring a Query Engine
@@ -26,7 +26,7 @@ You can directly build and configure a query engine from an index in 1 line of c
 
 ```python
 query_engine = index.as_query_engine(
-    response_mode='tree_summarize',
+    response_mode="tree_summarize",
     verbose=True,
 )
 ```
@@ -110,6 +110,7 @@ from llama_index.query_engine import CustomQueryEngine
 from llama_index.retrievers import BaseRetriever
 from llama_index.response_synthesizers import get_response_synthesizer, BaseSynthesizer
 
+
 class RAGQueryEngine(CustomQueryEngine):
     """RAG Query Engine."""
 
@@ -120,7 +121,6 @@ class RAGQueryEngine(CustomQueryEngine):
         nodes = self.retriever.retrieve(query_str)
         response_obj = self.response_synthesizer.synthesize(query_str, nodes)
         return response_obj
-
 ```
 
 See the [Custom Query Engine guide](/examples/query_engine/custom_query_engine.ipynb) for more details.
diff --git a/docs/module_guides/evaluating/usage_pattern.md b/docs/module_guides/evaluating/usage_pattern.md
index 23ea2414dc2204222a387558ef958f9878a16b38..3c031af42f232dc1479f37690973e2f93e5dbe86 100644
--- a/docs/module_guides/evaluating/usage_pattern.md
+++ b/docs/module_guides/evaluating/usage_pattern.md
@@ -6,7 +6,7 @@ All of the evaluation modules in LlamaIndex implement the `BaseEvaluator` class,
 
 1. The `evaluate` method takes in `query`, `contexts`, `response`, and additional keyword arguments.
 
-```python
+```
     def evaluate(
         self,
         query: Optional[str] = None,
@@ -18,7 +18,7 @@ All of the evaluation modules in LlamaIndex implement the `BaseEvaluator` class,
 
 2. The `evaluate_response` method provide an alternative interface that takes in a llamaindex `Response` object (which contains response string and source nodes) instead of separate `contexts` and `response`.
 
-```python
+```
 def evaluate_response(
     self,
     query: Optional[str] = None,
@@ -63,7 +63,9 @@ evaluator = FaithfulnessEvaluator(service_context=service_context)
 
 # query index
 query_engine = vector_index.as_query_engine()
-response = query_engine.query("What battles took place in New York City in the American Revolution?")
+response = query_engine.query(
+    "What battles took place in New York City in the American Revolution?"
+)
 eval_result = evaluator.evaluate_response(response=response)
 print(str(eval_result.passing))
 ```
@@ -89,12 +91,15 @@ evaluator = FaithfulnessEvaluator(service_context=service_context)
 
 # query index
 query_engine = vector_index.as_query_engine()
-response = query_engine.query("What battles took place in New York City in the American Revolution?")
+response = query_engine.query(
+    "What battles took place in New York City in the American Revolution?"
+)
 response_str = response.response
 for source_node in response.source_nodes:
-    eval_result = evaluator.evaluate(response=response_str, contexts=[source_node.get_content()])
+    eval_result = evaluator.evaluate(
+        response=response_str, contexts=[source_node.get_content()]
+    )
     print(str(eval_result.passing))
-
 ```
 
 You'll get back a list of results, corresponding to each source node in `response.source_nodes`.
@@ -126,7 +131,6 @@ query = "What battles took place in New York City in the American Revolution?"
 response = query_engine.query(query)
 eval_result = evaluator.evaluate_response(query=query, response=response)
 print(str(eval_result))
-
 ```
 
 ![](/_static/evaluation/eval_query_response_context.png)
@@ -154,7 +158,9 @@ query = "What battles took place in New York City in the American Revolution?"
 response = query_engine.query(query)
 response_str = response.response
 for source_node in response.source_nodes:
-    eval_result = evaluator.evaluate(query=query, response=response_str, contexts=[source_node.get_content()])
+    eval_result = evaluator.evaluate(
+        query=query, response=response_str, contexts=[source_node.get_content()]
+    )
     print(str(eval_result.passing))
 ```
 
@@ -190,10 +196,7 @@ We also provide a batch evaluation runner for running a set of evaluators across
 from llama_index.evaluation import BatchEvalRunner
 
 runner = BatchEvalRunner(
-    {
-        "faithfulness": faithfulness_evaluator, "
-        "relevancy": relevancy_evaluator
-    },
+    {"faithfulness": faithfulness_evaluator, "relevancy": relevancy_evaluator},
     workers=8,
 )
 
diff --git a/docs/module_guides/evaluating/usage_pattern_retrieval.md b/docs/module_guides/evaluating/usage_pattern_retrieval.md
index 653aead5dcfaaff5c7717412d668d6ab3afc7621..6600f6d4892e58d62efe3bc0fab88958a490ae6a 100644
--- a/docs/module_guides/evaluating/usage_pattern_retrieval.md
+++ b/docs/module_guides/evaluating/usage_pattern_retrieval.md
@@ -17,10 +17,7 @@ retriever_evaluator = RetrieverEvaluator.from_metric_names(
     ["mrr", "hit_rate"], retriever=retriever
 )
 
-retriever_evaluator.evaluate(
-    query="query",
-    expected_ids=["node_id1", "node_id2"]
-)
+retriever_evaluator.evaluate(query="query", expected_ids=["node_id1", "node_id2"])
 ```
 
 ## Building an Evaluation Dataset
@@ -30,12 +27,7 @@ You can manually curate a retrieval evaluation dataset of questions + node id's.
 ```python
 from llama_index.evaluation import generate_question_context_pairs
 
-qa_dataset = generate_question_context_pairs(
-    nodes,
-    llm=llm,
-    num_questions_per_chunk=2
-)
-
+qa_dataset = generate_question_context_pairs(nodes, llm=llm, num_questions_per_chunk=2)
 ```
 
 The returned result is a `EmbeddingQAFinetuneDataset` object (containing `queries`, `relevant_docs`, and `corpus`).
@@ -46,7 +38,6 @@ We offer a convenience function to run a `RetrieverEvaluator` over a dataset in
 
 ```python
 eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)
-
 ```
 
 This should run much faster than you trying to call `.evaluate` on each query separately.
diff --git a/docs/module_guides/indexing/composability.md b/docs/module_guides/indexing/composability.md
index e0c6cb878d048eec5dde32d0ea12324900539187..3f2afe255c11c4169ddc4139e2277b676a2d9077 100644
--- a/docs/module_guides/indexing/composability.md
+++ b/docs/module_guides/indexing/composability.md
@@ -11,9 +11,9 @@ To see how this works, imagine you have 3 documents: `doc1`, `doc2`, and `doc3`.
 ```python
 from llama_index import SimpleDirectoryReader
 
-doc1 = SimpleDirectoryReader('data1').load_data()
-doc2 = SimpleDirectoryReader('data2').load_data()
-doc3 = SimpleDirectoryReader('data3').load_data()
+doc1 = SimpleDirectoryReader("data1").load_data()
+doc2 = SimpleDirectoryReader("data2").load_data()
+doc3 = SimpleDirectoryReader("data3").load_data()
 ```
 
 ![](/_static/composability/diagram_b0.png)
@@ -49,9 +49,7 @@ You may choose to manually specify the summary text, or use LlamaIndex itself to
 a summary, for instance with the following:
 
 ```python
-summary = index1.query(
-    "What is a summary of this document?", retriever_mode="all_leaf"
-)
+summary = index1.query("What is a summary of this document?", retriever_mode="all_leaf")
 index1_summary = str(summary)
 ```
 
@@ -71,7 +69,6 @@ graph = ComposableGraph.from_indices(
     index_summaries=[index1_summary, index2_summary, index3_summary],
     storage_context=storage_context,
 )
-
 ```
 
 ![](/_static/composability/diagram.png)
@@ -88,14 +85,10 @@ More detail on how to configure `ComposableGraphQueryEngine` can be found [here]
 ```python
 # set custom retrievers. An example is provided below
 custom_query_engines = {
-    index.index_id: index.as_query_engine(
-        child_branch_factor=2
-    )
+    index.index_id: index.as_query_engine(child_branch_factor=2)
     for index in [index1, index2, index3]
 }
-query_engine = graph.as_query_engine(
-    custom_query_engines=custom_query_engines
-)
+query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)
 response = query_engine.query("Where did the author grow up?")
 ```
 
diff --git a/docs/module_guides/indexing/document_management.md b/docs/module_guides/indexing/document_management.md
index fca27c8c0de57a56e779c9a51eca48d05f4c1e0a..0abfc8898a86ea76c896439d558f9978e93e6696 100644
--- a/docs/module_guides/indexing/document_management.md
+++ b/docs/module_guides/indexing/document_management.md
@@ -18,7 +18,7 @@ An example code snippet is given below:
 from llama_index import SummaryIndex, Document
 
 index = SummaryIndex([])
-text_chunks = ['text_chunk_1', 'text_chunk_2', 'text_chunk_3']
+text_chunks = ["text_chunk_1", "text_chunk_2", "text_chunk_3"]
 
 doc_chunks = []
 for i, text in enumerate(text_chunks):
@@ -48,8 +48,7 @@ If a Document is already present within an index, you can "update" a Document wi
 # NOTE: the document has a `doc_id` specified
 doc_chunks[0].text = "Brand new document text"
 index.update_ref_doc(
-    doc_chunks[0],
-    update_kwargs={"delete_kwargs": {'delete_from_docstore': True}}
+    doc_chunks[0], update_kwargs={"delete_kwargs": {"delete_from_docstore": True}}
 )
 ```
 
@@ -65,15 +64,16 @@ The `refresh()` function will only update documents who have the same doc `id_`,
 
 ```python
 # modify first document, with the same doc_id
-doc_chunks[0] = Document(text='Super new document text', id_="doc_id_0")
+doc_chunks[0] = Document(text="Super new document text", id_="doc_id_0")
 
 # add a new document
-doc_chunks.append(Document(text="This isn't in the index yet, but it will be soon!", id_="doc_id_3"))
+doc_chunks.append(
+    Document(text="This isn't in the index yet, but it will be soon!", id_="doc_id_3")
+)
 
 # refresh the index
 refreshed_docs = index.refresh_ref_docs(
-    doc_chunks,
-    update_kwargs={"delete_kwargs": {'delete_from_docstore': True}}
+    doc_chunks, update_kwargs={"delete_kwargs": {"delete_from_docstore": True}}
 )
 
 # refreshed_docs[0] and refreshed_docs[-1] should be true
@@ -85,7 +85,7 @@ If you `print()` the output of `refresh()`, you would see which input documents
 
 ```python
 print(refreshed_docs)
-> [True, False, False, True]
+# > [True, False, False, True]
 ```
 
 This is most useful when you are reading from a directory that is constantly updating with new information.
@@ -98,10 +98,12 @@ Any index that uses the docstore (i.e. all indexes except for most vector store
 
 ```python
 print(index.ref_doc_info)
+"""
 > {'doc_id_1': RefDocInfo(node_ids=['071a66a8-3c47-49ad-84fa-7010c6277479'], metadata={}),
    'doc_id_2': RefDocInfo(node_ids=['9563e84b-f934-41c3-acfd-22e88492c869'], metadata={}),
    'doc_id_0': RefDocInfo(node_ids=['b53e6c2f-16f7-4024-af4c-42890e945f36'], metadata={}),
    'doc_id_3': RefDocInfo(node_ids=['6bedb29f-15db-4c7c-9885-7490e10aa33f'], metadata={})}
+"""
 ```
 
 Each entry in the output shows the ingested doc `id_`s as keys, and their associated `node_ids` of the nodes they were split into.
diff --git a/docs/module_guides/indexing/metadata_extraction.md b/docs/module_guides/indexing/metadata_extraction.md
index 533af27438ca70da83f59176a81c904abc1a75f7..7c6a99c7b7f1eb51df0fe186ac37dabbea74e878 100644
--- a/docs/module_guides/indexing/metadata_extraction.md
+++ b/docs/module_guides/indexing/metadata_extraction.md
@@ -59,6 +59,7 @@ If the provided extractors do not fit your needs, you can also define a custom e
 ```python
 from llama_index.node_parser.extractors import MetadataFeatureExtractor
 
+
 class CustomExtractor(MetadataFeatureExtractor):
     def extract(self, nodes) -> List[Dict]:
         metadata_list = [
diff --git a/docs/module_guides/indexing/usage_pattern.md b/docs/module_guides/indexing/usage_pattern.md
index 9babb05d640cbf131bb0993b164532408672e86e..570b7b49831a420df0e2637e78f89092dad4ebc9 100644
--- a/docs/module_guides/indexing/usage_pattern.md
+++ b/docs/module_guides/indexing/usage_pattern.md
@@ -36,9 +36,7 @@ from llama_index import ServiceContext, VectorStoreIndex
 
 service_context = ServiceContext.from_defaults(chunk_size=512)
 index = VectorStoreIndex.from_documents(
-    docs,
-    service_context=service_context,
-    show_progress=True
+    docs, service_context=service_context, show_progress=True
 )
 ```
 
diff --git a/docs/module_guides/loading/connector/root.md b/docs/module_guides/loading/connector/root.md
index 3fdeb93ec39d4db7ce8b2d1d947d58c176dcf62a..048e5cfd067b6a06ffb829a33a1351791a59386b 100644
--- a/docs/module_guides/loading/connector/root.md
+++ b/docs/module_guides/loading/connector/root.md
@@ -22,7 +22,7 @@ Get started with:
 ```python
 from llama_index import download_loader
 
-GoogleDocsReader = download_loader('GoogleDocsReader')
+GoogleDocsReader = download_loader("GoogleDocsReader")
 loader = GoogleDocsReader()
 documents = loader.load_data(document_ids=[...])
 ```
diff --git a/docs/module_guides/loading/connector/usage_pattern.md b/docs/module_guides/loading/connector/usage_pattern.md
index 5b50f40343c3d7835a0984ee7e697199b6d7b041..d9a45b75ba53d43ba3465e7964ed96130c2672ed 100644
--- a/docs/module_guides/loading/connector/usage_pattern.md
+++ b/docs/module_guides/loading/connector/usage_pattern.md
@@ -10,12 +10,12 @@ Example usage:
 ```python
 from llama_index import VectorStoreIndex, download_loader
 
-GoogleDocsReader = download_loader('GoogleDocsReader')
+GoogleDocsReader = download_loader("GoogleDocsReader")
 
-gdoc_ids = ['1wf-y2pd9C878Oh-FmLH7Q_BQkljdm6TQal-c1pUfrec']
+gdoc_ids = ["1wf-y2pd9C878Oh-FmLH7Q_BQkljdm6TQal-c1pUfrec"]
 loader = GoogleDocsReader()
 documents = loader.load_data(document_ids=gdoc_ids)
 index = VectorStoreIndex.from_documents(documents)
 query_engine = index.as_query_engine()
-query_engine.query('Where did the author go to school?')
+query_engine.query("Where did the author go to school?")
 ```
diff --git a/docs/module_guides/loading/documents_and_nodes/root.md b/docs/module_guides/loading/documents_and_nodes/root.md
index 04a0f54687f5bbfb3aab4b6480069376d08494b6..9f69c1c4a27deece9623f7de448b1309474d1638 100644
--- a/docs/module_guides/loading/documents_and_nodes/root.md
+++ b/docs/module_guides/loading/documents_and_nodes/root.md
@@ -29,13 +29,11 @@ documents = [Document(text=t) for t in text_list]
 
 # build index
 index = VectorStoreIndex.from_documents(documents)
-
 ```
 
 #### Nodes
 
 ```python
-
 from llama_index.node_parser import SimpleNodeParser
 
 # load documents
@@ -47,5 +45,4 @@ nodes = parser.get_nodes_from_documents(documents)
 
 # build index
 index = VectorStoreIndex(nodes)
-
 ```
diff --git a/docs/module_guides/loading/documents_and_nodes/usage_documents.md b/docs/module_guides/loading/documents_and_nodes/usage_documents.md
index 94b6686127651b1d1833f652fe5eaf258037302c..41195db50412b547fa966ae2922fe0c496b5663c 100644
--- a/docs/module_guides/loading/documents_and_nodes/usage_documents.md
+++ b/docs/module_guides/loading/documents_and_nodes/usage_documents.md
@@ -9,7 +9,7 @@ By default, all of our [data loaders](/module_guides/loading/connector/root.md)
 ```python
 from llama_index import SimpleDirectoryReader
 
-documents = SimpleDirectoryReader('./data').load_data()
+documents = SimpleDirectoryReader("./data").load_data()
 ```
 
 You can also choose to construct documents manually. LlamaIndex exposes the `Document` struct.
@@ -43,28 +43,25 @@ There are a few ways to set up this dictionary:
 
 ```python
 document = Document(
-    text='text',
-    metadata={
-        'filename': '<doc_file_name>',
-        'category': '<category>'
-    }
+    text="text", metadata={"filename": "<doc_file_name>", "category": "<category>"}
 )
 ```
 
 2. After the document is created:
 
 ```python
-document.metadata = {'filename': '<doc_file_name>'}
+document.metadata = {"filename": "<doc_file_name>"}
 ```
 
 3. Set the filename automatically using the `SimpleDirectoryReader` and `file_metadata` hook. This will automatically run the hook on each document to set the `metadata` field:
 
 ```python
 from llama_index import SimpleDirectoryReader
-filename_fn = lambda filename: {'file_name': filename}
+
+filename_fn = lambda filename: {"file_name": filename}
 
 # automatically sets the metadata of each document according to filename_fn
-documents = SimpleDirectoryReader('./data', file_metadata=filename_fn).load_data()
+documents = SimpleDirectoryReader("./data", file_metadata=filename_fn).load_data()
 ```
 
 ### Customizing the id
@@ -97,13 +94,14 @@ Typically, a document might have many metadata keys, but you might not want all
 We can exclude it like so:
 
 ```python
-document.excluded_llm_metadata_keys = ['file_name']
+document.excluded_llm_metadata_keys = ["file_name"]
 ```
 
 Then, we can test what the LLM will actually end up reading using the `get_content()` function and specifying `MetadataMode.LLM`:
 
 ```python
 from llama_index.schema import MetadataMode
+
 print(document.get_content(metadata_mode=MetadataMode.LLM))
 ```
 
@@ -112,13 +110,14 @@ print(document.get_content(metadata_mode=MetadataMode.LLM))
 Similar to customing the metadata visible to the LLM, we can also customize the metadata visible to embeddings. In this case, you can specifically exclude metadata visible to the embedding model, in case you DON'T want particular text to bias the embeddings.
 
 ```python
-document.excluded_embed_metadata_keys = ['file_name']
+document.excluded_embed_metadata_keys = ["file_name"]
 ```
 
 Then, we can test what the embedding model will actually end up reading using the `get_content()` function and specifying `MetadataMode.EMBED`:
 
 ```python
 from llama_index.schema import MetadataMode
+
 print(document.get_content(metadata_mode=MetadataMode.EMBED))
 ```
 
@@ -151,16 +150,19 @@ document = Document(
     metadata={
         "file_name": "super_secret_document.txt",
         "category": "finance",
-        "author": "LlamaIndex"
+        "author": "LlamaIndex",
     },
-    excluded_llm_metadata_keys=['file_name'],
+    excluded_llm_metadata_keys=["file_name"],
     metadata_seperator="::",
     metadata_template="{key}=>{value}",
     text_template="Metadata: {metadata_str}\n-----\nContent: {content}",
 )
 
 print("The LLM sees this: \n", document.get_content(metadata_mode=MetadataMode.LLM))
-print("The Embedding model sees this: \n", document.get_content(metadata_mode=MetadataMode.EMBED))
+print(
+    "The Embedding model sees this: \n",
+    document.get_content(metadata_mode=MetadataMode.EMBED),
+)
 ```
 
 ### Advanced - Automatic Metadata Extraction
diff --git a/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md b/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md
index e02cd7a91fb2cfd68b4237ed37bc8a2e990672d6..9bb6cb7c85ede8b0a1e8bb984ea44b5d3973c024 100644
--- a/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md
+++ b/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md
@@ -15,7 +15,7 @@ You can use these feature extractors within our overall `MetadataExtractor` clas
 from llama_index.node_parser.extractors import (
     MetadataExtractor,
     TitleExtractor,
-    QuestionsAnsweredExtractor
+    QuestionsAnsweredExtractor,
 )
 from llama_index.text_splitter import TokenTextSplitter
 
diff --git a/docs/module_guides/loading/documents_and_nodes/usage_nodes.md b/docs/module_guides/loading/documents_and_nodes/usage_nodes.md
index 2b2ed48a8765a1ba93870fb1b2377c267c789393..6bebdce04ed147a37cd7008d8425cdcc48a37185 100644
--- a/docs/module_guides/loading/documents_and_nodes/usage_nodes.md
+++ b/docs/module_guides/loading/documents_and_nodes/usage_nodes.md
@@ -31,7 +31,9 @@ nodes = [node1, node2]
 The `RelatedNodeInfo` class can also store additional `metadata` if needed:
 
 ```python
-node2.relationships[NodeRelationship.PARENT] = RelatedNodeInfo(node_id=node1.node_id, metadata={"key": "val"})
+node2.relationships[NodeRelationship.PARENT] = RelatedNodeInfo(
+    node_id=node1.node_id, metadata={"key": "val"}
+)
 ```
 
 ### Customizing the ID
@@ -45,5 +47,4 @@ You can also get and set the `node_id` of any `TextNode` directly.
 ```python
 print(node.node_id)
 node.node_id = "My new node_id!"
-
 ```
diff --git a/docs/module_guides/loading/node_parsers/root.md b/docs/module_guides/loading/node_parsers/root.md
index 84fffc7f98aa5ba520909aa36f20b36c0bed9313..6a04db518c629d0fdce8190cd70c3f240d5a460c 100644
--- a/docs/module_guides/loading/node_parsers/root.md
+++ b/docs/module_guides/loading/node_parsers/root.md
@@ -26,7 +26,9 @@ from llama_index.node_parser import SimpleNodeParser
 
 node_parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=20)
 
-nodes = node_parser.get_nodes_from_documents([Document(text="long text")], show_progress=False)
+nodes = node_parser.get_nodes_from_documents(
+    [Document(text="long text")], show_progress=False
+)
 ```
 
 Or set inside a `ServiceContext` to be used automatically when an index is constructed using `.from_documents()`:
@@ -71,12 +73,12 @@ import tiktoken
 from llama_index.text_splitter import SentenceSplitter
 
 text_splitter = SentenceSplitter(
-  separator=" ",
-  chunk_size=1024,
-  chunk_overlap=20,
-  paragraph_separator="\n\n\n",
-  secondary_chunking_regex="[^,.;。]+[,.;。]?",
-  tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
+    separator=" ",
+    chunk_size=1024,
+    chunk_overlap=20,
+    paragraph_separator="\n\n\n",
+    secondary_chunking_regex="[^,.;。]+[,.;。]?",
+    tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
 )
 
 node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)
@@ -89,11 +91,11 @@ import tiktoken
 from llama_index.text_splitter import TokenTextSplitter
 
 text_splitter = TokenTextSplitter(
-  separator=" ",
-  chunk_size=1024,
-  chunk_overlap=20,
-  backup_separators=["\n"],
-  tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
+    separator=" ",
+    chunk_size=1024,
+    chunk_overlap=20,
+    backup_separators=["\n"],
+    tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
 )
 
 node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)
@@ -105,10 +107,10 @@ node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)
 from llama_index.text_splitter import CodeSplitter
 
 text_splitter = CodeSplitter(
-  language="python",
-  chunk_lines=40,
-  chunk_lines_overlap=15,
-  max_chars=1500,
+    language="python",
+    chunk_lines=40,
+    chunk_lines_overlap=15,
+    max_chars=1500,
 )
 
 node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)
@@ -127,12 +129,12 @@ import nltk
 from llama_index.node_parser import SentenceWindowNodeParser
 
 node_parser = SentenceWindowNodeParser.from_defaults(
-  # how many sentences on either side to capture
-  window_size=3,
-  # the metadata key that holds the window of surrounding sentences
-  window_metadata_key="window",
-  # the metadata key that holds the original sentence
-  original_text_metadata_key="original_sentence"
+    # how many sentences on either side to capture
+    window_size=3,
+    # the metadata key that holds the window of surrounding sentences
+    window_metadata_key="window",
+    # the metadata key that holds the original sentence
+    original_text_metadata_key="original_sentence",
 )
 ```
 
diff --git a/docs/module_guides/models/embeddings.md b/docs/module_guides/models/embeddings.md
index 8e3cae80923825cca6b61301b25e9b5658d8d692..dcafee735f67ffb93973817bb08642e3ff11e905 100644
--- a/docs/module_guides/models/embeddings.md
+++ b/docs/module_guides/models/embeddings.md
@@ -26,6 +26,7 @@ To save costs, you may want to use a local model.
 
 ```python
 from llama_index import ServiceContext
+
 service_context = ServiceContext.from_defaults(embed_model="local")
 ```
 
@@ -48,6 +49,7 @@ service_context = ServiceContext.from_defaults(embed_model=embed_model)
 
 # optionally set a global service context to avoid passing it into other objects every time
 from llama_index import set_global_service_context
+
 set_global_service_context(service_context)
 
 documents = SimpleDirectoryReader("./data").load_data()
@@ -80,6 +82,7 @@ The easiest way to use a local model is:
 
 ```python
 from llama_index import ServiceContext
+
 service_context = ServiceContext.from_defaults(embed_model="local")
 ```
 
@@ -88,9 +91,7 @@ To configure the model used (from Hugging Face hub), add the model name separate
 ```python
 from llama_index import ServiceContext
 
-service_context = ServiceContext.from_defaults(
-  embed_model="local:BAAI/bge-large-en"
-)
+service_context = ServiceContext.from_defaults(embed_model="local:BAAI/bge-large-en")
 ```
 
 ### HuggingFace Optimum ONNX Embeddings
@@ -115,9 +116,7 @@ And then usage:
 
 ```python
 embed_model = OptimumEmbedding(folder_name="./bge_onnx")
-service_context = ServiceContext.from_defaults(
-  embed_model=embed_model
-)
+service_context = ServiceContext.from_defaults(embed_model=embed_model)
 ```
 
 ### LangChain Integrations
@@ -148,28 +147,31 @@ from typing import Any, List
 from InstructorEmbedding import INSTRUCTOR
 from llama_index.embeddings.base import BaseEmbedding
 
+
 class InstructorEmbeddings(BaseEmbedding):
-  def __init__(
-    self,
-    instructor_model_name: str = "hkunlp/instructor-large",
-    instruction: str = "Represent the Computer Science documentation or question:",
-    **kwargs: Any,
-  ) -> None:
-    self._model = INSTRUCTOR(instructor_model_name)
-    self._instruction = instruction
-    super().__init__(**kwargs)
-
-    def _get_query_embedding(self, query: str) -> List[float]:
-      embeddings = self._model.encode([[self._instruction, query]])
-      return embeddings[0]
-
-    def _get_text_embedding(self, text: str) -> List[float]:
-      embeddings = self._model.encode([[self._instruction, text]])
-      return embeddings[0]
-
-    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
-      embeddings = self._model.encode([[self._instruction, text] for text in texts])
-      return embeddings
+    def __init__(
+        self,
+        instructor_model_name: str = "hkunlp/instructor-large",
+        instruction: str = "Represent the Computer Science documentation or question:",
+        **kwargs: Any,
+    ) -> None:
+        self._model = INSTRUCTOR(instructor_model_name)
+        self._instruction = instruction
+        super().__init__(**kwargs)
+
+        def _get_query_embedding(self, query: str) -> List[float]:
+            embeddings = self._model.encode([[self._instruction, query]])
+            return embeddings[0]
+
+        def _get_text_embedding(self, text: str) -> List[float]:
+            embeddings = self._model.encode([[self._instruction, text]])
+            return embeddings[0]
+
+        def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+            embeddings = self._model.encode(
+                [[self._instruction, text] for text in texts]
+            )
+            return embeddings
 ```
 
 ## Standalone Usage
diff --git a/docs/module_guides/models/llms.md b/docs/module_guides/models/llms.md
index c8f9a87cf5ab59862def9e0d0bab91bb66ea0a38..7114f092aae38bc5b2c3a35fd4c7f6c28cb0350b 100644
--- a/docs/module_guides/models/llms.md
+++ b/docs/module_guides/models/llms.md
@@ -21,7 +21,7 @@ The following code snippet shows how you can get started using LLMs.
 from llama_index.llms import OpenAI
 
 # non-streaming
-resp = OpenAI().complete('Paul Graham is ')
+resp = OpenAI().complete("Paul Graham is ")
 print(resp)
 ```
 
diff --git a/docs/module_guides/models/llms/usage_custom.md b/docs/module_guides/models/llms/usage_custom.md
index 979e13daaf9bf63e9c408ea37e5ac3504ae432bc..50437883201029f08a1bb34f0dcfe164e94b3339 100644
--- a/docs/module_guides/models/llms/usage_custom.md
+++ b/docs/module_guides/models/llms/usage_custom.md
@@ -21,18 +21,18 @@ you may also plug in any LLM shown on Langchain's
 [LLM](https://python.langchain.com/docs/integrations/llms/) page.
 
 ```python
-
 from llama_index import (
     KeywordTableIndex,
     SimpleDirectoryReader,
     LLMPredictor,
-    ServiceContext
+    ServiceContext,
 )
 from llama_index.llms import OpenAI
+
 # alternatively
 # from langchain.llms import ...
 
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 
 # define LLM
 llm = OpenAI(temperature=0.1, model="gpt-4")
@@ -44,7 +44,6 @@ index = KeywordTableIndex.from_documents(documents, service_context=service_cont
 # get response from query
 query_engine = index.as_query_engine()
 response = query_engine.query("What did the author do after his time at Y Combinator?")
-
 ```
 
 ## Example: Changing the number of output tokens (for OpenAI, Cohere, AI21)
@@ -56,20 +55,14 @@ For OpenAI, Cohere, AI21, you just need to set the `max_tokens` parameter
 (or maxTokens for AI21). We will handle text chunking/calculations under the hood.
 
 ```python
-
-from llama_index import (
-    KeywordTableIndex,
-    SimpleDirectoryReader,
-    ServiceContext
-)
+from llama_index import KeywordTableIndex, SimpleDirectoryReader, ServiceContext
 from llama_index.llms import OpenAI
 
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 
 # define LLM
 llm = OpenAI(temperature=0, model="text-davinci-002", max_tokens=512)
 service_context = ServiceContext.from_defaults(llm=llm)
-
 ```
 
 ## Example: Explicitly configure `context_window` and `num_output`
@@ -77,17 +70,13 @@ service_context = ServiceContext.from_defaults(llm=llm)
 If you are using other LLM classes from langchain, you may need to explicitly configure the `context_window` and `num_output` via the `ServiceContext` since the information is not available by default.
 
 ```python
-
-from llama_index import (
-    KeywordTableIndex,
-    SimpleDirectoryReader,
-    ServiceContext
-)
+from llama_index import KeywordTableIndex, SimpleDirectoryReader, ServiceContext
 from llama_index.llms import OpenAI
+
 # alternatively
 # from langchain.llms import ...
 
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 
 
 # set context window
@@ -107,7 +96,6 @@ service_context = ServiceContext.from_defaults(
     context_window=context_window,
     num_output=num_output,
 )
-
 ```
 
 ## Example: Using a HuggingFace LLM
@@ -133,6 +121,7 @@ query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")
 
 import torch
 from llama_index.llms import HuggingFaceLLM
+
 llm = HuggingFaceLLM(
     context_window=4096,
     max_new_tokens=256,
@@ -157,7 +146,7 @@ Some models will raise errors if all the keys from the tokenizer are passed to t
 
 ```python
 HuggingFaceLLM(
-    ...
+    # ...
     tokenizer_outputs_to_remove=["token_type_ids"]
 )
 ```
@@ -183,11 +172,7 @@ import torch
 from transformers import pipeline
 from typing import Optional, List, Mapping, Any
 
-from llama_index import (
-    ServiceContext,
-    SimpleDirectoryReader,
-    SummaryIndex
-)
+from llama_index import ServiceContext, SimpleDirectoryReader, SummaryIndex
 from llama_index.callbacks import CallbackManager
 from llama_index.llms import (
     CustomLLM,
@@ -205,17 +190,20 @@ num_output = 256
 
 # store the pipeline/model outside of the LLM class to avoid memory issues
 model_name = "facebook/opt-iml-max-30b"
-pipeline = pipeline("text-generation", model=model_name, device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
+pipeline = pipeline(
+    "text-generation",
+    model=model_name,
+    device="cuda:0",
+    model_kwargs={"torch_dtype": torch.bfloat16},
+)
 
-class OurLLM(CustomLLM):
 
+class OurLLM(CustomLLM):
     @property
     def metadata(self) -> LLMMetadata:
         """Get LLM metadata."""
         return LLMMetadata(
-            context_window=context_window,
-            num_output=num_output,
-            model_name=model_name
+            context_window=context_window, num_output=num_output, model_name=model_name
         )
 
     @llm_completion_callback()
@@ -231,6 +219,7 @@ class OurLLM(CustomLLM):
     def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
         raise NotImplementedError()
 
+
 # define our LLM
 llm = OurLLM()
 
@@ -238,11 +227,11 @@ service_context = ServiceContext.from_defaults(
     llm=llm,
     embed_model="local:BAAI/bge-base-en-v1.5",
     context_window=context_window,
-    num_output=num_output
+    num_output=num_output,
 )
 
 # Load the your data
-documents = SimpleDirectoryReader('./data').load_data()
+documents = SimpleDirectoryReader("./data").load_data()
 index = SummaryIndex.from_documents(documents, service_context=service_context)
 
 # Query and print response
diff --git a/docs/module_guides/models/llms/usage_standalone.md b/docs/module_guides/models/llms/usage_standalone.md
index 3beb398c11263f63cc6e311727ca48d9a658e036..9f07b3348aced3e8c6365de935e41ae0c1228edd 100644
--- a/docs/module_guides/models/llms/usage_standalone.md
+++ b/docs/module_guides/models/llms/usage_standalone.md
@@ -8,15 +8,16 @@ You can use our LLM modules on their own.
 from llama_index.llms import OpenAI
 
 # non-streaming
-resp = OpenAI().complete('Paul Graham is ')
+resp = OpenAI().complete("Paul Graham is ")
 print(resp)
 
 # using streaming endpoint
 from llama_index.llms import OpenAI
+
 llm = OpenAI()
-resp = llm.stream_complete('Paul Graham is ')
+resp = llm.stream_complete("Paul Graham is ")
 for delta in resp:
-    print(delta, end='')
+    print(delta, end="")
 ```
 
 ## Chat Example
diff --git a/docs/module_guides/models/prompts/usage_pattern.md b/docs/module_guides/models/prompts/usage_pattern.md
index 29efbdf776afa56ebafb8a60ed09e540397967dd..56f85e3762d9acb67f93e97253ea15c04c1fdfbc 100644
--- a/docs/module_guides/models/prompts/usage_pattern.md
+++ b/docs/module_guides/models/prompts/usage_pattern.md
@@ -71,7 +71,6 @@ For instance, take a look at the following snippet.
 query_engine = index.as_query_engine(response_mode="compact")
 prompts_dict = query_engine.get_prompts()
 print(list(prompts_dict.keys()))
-
 ```
 
 You might get back the following keys:
@@ -90,7 +89,6 @@ obtained through `get_prompts`.
 e.g. regarding the example above, we might do the following
 
 ```python
-
 # shakespeare!
 qa_prompt_tmpl_str = (
     "Context information is below.\n"
@@ -104,10 +102,7 @@ qa_prompt_tmpl_str = (
 )
 qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
 
-query_engine.update_prompts(
-    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
-)
-
+query_engine.update_prompts({"response_synthesizer:text_qa_template": qa_prompt_tmpl})
 ```
 
 #### Modify prompts used in query engine
@@ -120,8 +115,7 @@ There are also two equivalent ways to override the prompts:
 
 ```python
 query_engine = index.as_query_engine(
-    text_qa_template=<custom_qa_prompt>,
-    refine_template=<custom_refine_prompt>
+    text_qa_template=custom_qa_prompt, refine_template=custom_refine_prompt
 )
 ```
 
@@ -130,8 +124,7 @@ query_engine = index.as_query_engine(
 ```python
 retriever = index.as_retriever()
 synth = get_response_synthesizer(
-    text_qa_template=<custom_qa_prompt>,
-    refine_template=<custom_refine_prompt>
+    text_qa_template=custom_qa_prompt, refine_template=custom_refine_prompt
 )
 query_engine = RetrieverQueryEngine(retriever, response_synthesizer)
 ```
@@ -156,13 +149,13 @@ There are two equivalent ways to override the prompts:
 1. via the default nodes constructor
 
 ```python
-index = TreeIndex(nodes, summary_template=<custom_prompt>)
+index = TreeIndex(nodes, summary_template=custom_prompt)
 ```
 
 2. via the documents constructor.
 
 ```python
-index = TreeIndex.from_documents(docs, summary_template=<custom_prompt>)
+index = TreeIndex.from_documents(docs, summary_template=custom_prompt)
 ```
 
 For more details on which index uses which prompts, please visit
@@ -189,7 +182,6 @@ prompt_tmpl = PromptTemplate(prompt_tmpl_str)
 partial_prompt_tmpl = prompt_tmpl.partial_format(foo="abc")
 
 fmt_str = partial_prompt_tmpl.format(bar="def")
-
 ```
 
 #### Template Variable Mappings
@@ -201,13 +193,11 @@ But if you're trying to adapt a string template for use with LlamaIndex, it can
 Instead, define `template_var_mappings`:
 
 ```python
-
 template_var_mappings = {"context_str": "my_context", "query_str": "my_query"}
 
 prompt_tmpl = PromptTemplate(
     qa_prompt_tmpl_str, template_var_mappings=template_var_mappings
 )
-
 ```
 
 #### Function Mappings
@@ -225,10 +215,10 @@ def format_context_fn(**kwargs):
     fmtted_context = "\n\n".join([f"- {c}" for c in context_list])
     return fmtted_context
 
+
 prompt_tmpl = PromptTemplate(
     qa_prompt_tmpl_str, function_mappings={"context_str": format_context_fn}
 )
 
-prompt_tmpl.format(context_str="<context>", query_str="<query>")
-
+prompt_tmpl.format(context_str="context", query_str="query")
 ```
diff --git a/docs/module_guides/observability/callbacks/token_counting_migration.md b/docs/module_guides/observability/callbacks/token_counting_migration.md
index fc354dd6b54897795a2cec19f2b8ac99f104ff36..d2b03abb02821185298bd4cec3a2d67ff4d96f5a 100644
--- a/docs/module_guides/observability/callbacks/token_counting_migration.md
+++ b/docs/module_guides/observability/callbacks/token_counting_migration.md
@@ -19,8 +19,8 @@ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
 # to the same tokenizer that was used previously for token counting
 # NOTE: The tokenizer should be a function that takes in text and returns a list of tokens
 token_counter = TokenCountingHandler(
-    tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode
-    verbose=False  # set to true to see usage printed to the console
+    tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode,
+    verbose=False,  # set to true to see usage printed to the console
 )
 
 callback_manager = CallbackManager([token_counter])
@@ -40,8 +40,17 @@ token_counter.reset_counts()
 
 # also track prompt, completion, and total LLM tokens, in addition to embeddings
 response = index.as_query_engine().query("What did the author do growing up?")
-print('Embedding Tokens: ', token_counter.total_embedding_token_count, '\n',
-      'LLM Prompt Tokens: ', token_counter.prompt_llm_token_count, '\n',
-      'LLM Completion Tokens: ', token_counter.completion_llm_token_count, '\n',
-      'Total LLM Token Count: ', token_counter.total_llm_token_count)
+print(
+    "Embedding Tokens: ",
+    token_counter.total_embedding_token_count,
+    "\n",
+    "LLM Prompt Tokens: ",
+    token_counter.prompt_llm_token_count,
+    "\n",
+    "LLM Completion Tokens: ",
+    token_counter.completion_llm_token_count,
+    "\n",
+    "Total LLM Token Count: ",
+    token_counter.total_llm_token_count,
+)
 ```
diff --git a/docs/module_guides/observability/observability.md b/docs/module_guides/observability/observability.md
index c1d51099f6c353c930667a9b0693230a787de351..593be02f82d46a3cf5b635ad4969339f7616aba7 100644
--- a/docs/module_guides/observability/observability.md
+++ b/docs/module_guides/observability/observability.md
@@ -19,7 +19,6 @@ Each provider has similarities and differences. Take a look below for the full s
 To toggle, you will generally just need to do the following:
 
 ```python
-
 from llama_index import set_global_handler
 
 # general usage
@@ -27,7 +26,6 @@ set_global_handler("<handler_name>", **kwargs)
 
 # W&B example
 # set_global_handler("wandb", run_args={"project": "llamaindex"})
-
 ```
 
 Note that all `kwargs` to `set_global_handler` are passed to the underlying callback handler.
@@ -60,6 +58,7 @@ Prompts allows users to log/trace/inspect the execution flow of LlamaIndex durin
 
 ```python
 from llama_index import set_global_handler
+
 set_global_handler("wandb", run_args={"project": "llamaindex"})
 
 # NOTE: No need to do the following
@@ -79,7 +78,6 @@ llama_index.global_handler.persist_index(graph, index_name="composable_graph")
 storage_context = llama_index.global_handler.load_storage_context(
     artifact_url="ayut/llamaindex/composable_graph:v0"
 )
-
 ```
 
 ![](/_static/integrations/wandb.png)
@@ -106,12 +104,14 @@ Arize [Phoenix](https://github.com/Arize-ai/phoenix): LLMOps insights at lightni
 # Phoenix can display in real time the traces automatically
 # collected from your LlamaIndex application.
 import phoenix as px
+
 # Look for a URL in the output to open the App in a browser.
 px.launch_app()
 # The App is initially empty, but as you proceed with the steps below,
 # traces will appear automatically as your LlamaIndex application runs.
 
 import llama_index
+
 llama_index.set_global_handler("arize_phoenix")
 
 # Run all of your LlamaIndex applications as usual and traces
@@ -181,11 +181,11 @@ TruLens allows users to instrument/evaluate LlamaIndex applications, through fea
 ```python
 # use trulens
 from trulens_eval import TruLlama
+
 tru_query_engine = TruLlama(query_engine)
 
 # query
 tru_query_engine.query("What did the author do growing up?")
-
 ```
 
 ![](/_static/integrations/trulens.png)
@@ -209,6 +209,7 @@ HoneyHive allows users to trace the execution flow of any LLM pipeline. Users ca
 
 ```python
 from llama_index import set_global_handler
+
 set_global_handler(
     "honeyhive",
     project="My HoneyHive Project",
diff --git a/docs/module_guides/querying/node_postprocessors/node_postprocessors.md b/docs/module_guides/querying/node_postprocessors/node_postprocessors.md
index 0f6db73613740ebdd2618580b481fc50905c8927..98551f134c06c9b639fd415e4a6935c4ddc78a6a 100644
--- a/docs/module_guides/querying/node_postprocessors/node_postprocessors.md
+++ b/docs/module_guides/querying/node_postprocessors/node_postprocessors.md
@@ -20,8 +20,7 @@ Used to ensure certain keywords are either excluded or included.
 from llama_index.indices.postprocessor import KeywordNodePostprocessor
 
 postprocessor = KeywordNodePostprocessor(
-  required_keywords=["word1", "word2"],
-  exclude_keywords=["word3", "word4"]
+    required_keywords=["word1", "word2"], exclude_keywords=["word3", "word4"]
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -35,7 +34,7 @@ Used to replace the node content with a field from the node metadata. If the fie
 from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
 
 postprocessor = MetadataReplacementPostProcessor(
-  target_metadata_key="window",
+    target_metadata_key="window",
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -67,9 +66,9 @@ The threshold cutoff can be specified instead, which uses a raw similarity cutof
 from llama_index.indices.postprocessor import SentenceEmbeddingOptimizer
 
 postprocessor = SentenceEmbeddingOptimizer(
-  embed_model=service_context.embed_model,
-  percentile_cutoff=0.5,
-  # threshold_cutoff=0.7
+    embed_model=service_context.embed_model,
+    percentile_cutoff=0.5,
+    # threshold_cutoff=0.7
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -87,9 +86,7 @@ Uses the "Cohere ReRank" functionality to re-order nodes, and returns the top N
 from llama_index.indices import CohereRerank
 
 postprocessor = CohereRerank(
-  top_n=2
-  model="rerank-english-v2.0",
-  api_key="YOUR COHERE API KEY"
+    top_n=2, model="rerank-english-v2.0", api_key="YOUR COHERE API KEY"
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -106,8 +103,7 @@ from llama_index.indices.postprocessor import SentenceTransformerRerank
 
 # We choose a model with relatively high speed and decent accuracy.
 postprocessor = SentenceTransformerRerank(
-  model="cross-encoder/ms-marco-MiniLM-L-2-v2",
-  top_n=3
+    model="cross-encoder/ms-marco-MiniLM-L-2-v2", top_n=3
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -124,10 +120,7 @@ Uses a LLM to re-order nodes by asking the LLM to return the relevant documents
 ```python
 from llama_index.indices.postprocessor import LLMRerank
 
-postprocessor = LLMRerank(
-  top_n=2
-  service_context=service_context,
-)
+postprocessor = LLMRerank(top_n=2, service_context=service_context)
 
 postprocessor.postprocess_nodes(nodes)
 ```
@@ -142,8 +135,7 @@ This postproccesor returns the top K nodes sorted by date. This assumes there is
 from llama_index.indices.postprocessor import FixedRecencyPostprocessor
 
 postprocessor = FixedRecencyPostprocessor(
-  tok_k=1,
-  date_key="date"  # the key in the metadata to find the date
+    tok_k=1, date_key="date"  # the key in the metadata to find the date
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -161,9 +153,7 @@ This postproccesor returns the top K nodes after sorting by date and removing ol
 from llama_index.indices.postprocessor import EmbeddingRecencyPostprocessor
 
 postprocessor = EmbeddingRecencyPostprocessor(
-  service_context=service_context,
-  date_key="date",
-  similarity_cutoff=0.7
+    service_context=service_context, date_key="date", similarity_cutoff=0.7
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -178,10 +168,7 @@ This postproccesor returns the top K nodes applying a time-weighted rerank to ea
 ```python
 from llama_index.indices.postprocessor import TimeWeightedPostprocessor
 
-postprocessor = TimeWeightedPostprocessor(
-  time_decay=0.99,
-  top_k=1
-)
+postprocessor = TimeWeightedPostprocessor(time_decay=0.99, top_k=1)
 
 postprocessor.postprocess_nodes(nodes)
 ```
@@ -198,7 +185,7 @@ The PII (Personal Identifiable Information) postprocssor removes information tha
 from llama_index.indices.postprocessor import PIINodePostprocessor
 
 postprocessor = PIINodePostprocessor(
-  service_context=service_context,  # this should be setup with an LLM you trust
+    service_context=service_context  # this should be setup with an LLM you trust
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -228,9 +215,9 @@ This is useful when you know the relationships point to important data (either b
 from llama_index.indices.postprocessor import PrevNextNodePostprocessor
 
 postprocessor = PrevNextNodePostprocessor(
-  docstore=index.docstore,
-  num_nodes=1,  # number of nodes to fetch when looking forawrds or backwards
-  mode="next"   # can be either 'next', 'previous', or 'both'
+    docstore=index.docstore,
+    num_nodes=1,  # number of nodes to fetch when looking forawrds or backwards
+    mode="next",  # can be either 'next', 'previous', or 'both'
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -246,10 +233,10 @@ The same as PrevNextNodePostprocessor, but lets the LLM decide the mode (next, p
 from llama_index.indices.postprocessor import AutoPrevNextNodePostprocessor
 
 postprocessor = AutoPrevNextNodePostprocessor(
-  docstore=index.docstore,
-  service_context=service_context
-  num_nodes=1,  # number of nodes to fetch when looking forawrds or backwards)
-
+    docstore=index.docstore,
+    service_context=service_context,
+    num_nodes=1,  # number of nodes to fetch when looking forawrds or backwards)
+)
 postprocessor.postprocess_nodes(nodes)
 ```
 
diff --git a/docs/module_guides/querying/node_postprocessors/root.md b/docs/module_guides/querying/node_postprocessors/root.md
index 637573f6766cde9ba387ccb33f7e91d25519a420..c0c40b79ee9016bc06314db1af727a805951ce49 100644
--- a/docs/module_guides/querying/node_postprocessors/root.md
+++ b/docs/module_guides/querying/node_postprocessors/root.md
@@ -21,8 +21,8 @@ from llama_index.indices.postprocessor import SimilarityPostprocessor
 from llama_index.schema import Node, NodeWithScore
 
 nodes = [
-  NodeWithScore(node=Node(text="text"), score=0.7),
-  NodeWithScore(node=Node(text="text"), score=0.8)
+    NodeWithScore(node=Node(text="text"), score=0.7),
+    NodeWithScore(node=Node(text="text"), score=0.8),
 ]
 
 # filter nodes below 0.75 similarity score
@@ -45,11 +45,9 @@ documents = SimpleDirectoryReader("./data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 
 query_engine = index.as_query_engine(
-  node_postprocessors=[
-    TimeWeightedPostprocessor(
-        time_decay=0.5, time_access_refresh=False, top_k=1
-    )
-  ]
+    node_postprocessors=[
+        TimeWeightedPostprocessor(time_decay=0.5, time_access_refresh=False, top_k=1)
+    ]
 )
 
 # all node post-processors will be applied during each query
@@ -79,8 +77,8 @@ from llama_index.indices.postprocessor import SimilarityPostprocessor
 from llama_index.schema import Node, NodeWithScore
 
 nodes = [
-  NodeWithScore(node=Node(text="text"), score=0.7),
-  NodeWithScore(node=Node(text="text"), score=0.8)
+    NodeWithScore(node=Node(text="text"), score=0.7),
+    NodeWithScore(node=Node(text="text"), score=0.8),
 ]
 
 # filter nodes below 0.75 similarity score
@@ -110,12 +108,11 @@ from llama_index import QueryBundle
 from llama_index.indices.postprocessor.base import BaseNodePostprocessor
 from llama_index.schema import NodeWithScore
 
-class DummyNodePostprocessor:
 
+class DummyNodePostprocessor:
     def postprocess_nodes(
         self, nodes: List[NodeWithScore], query_bundle: Optional[QueryBundle]
     ) -> List[NodeWithScore]:
-
         # subtracts 1 from the score
         for n in nodes:
             n.score -= 1
diff --git a/docs/module_guides/querying/output_parser.md b/docs/module_guides/querying/output_parser.md
index c5549166959432a5f9a4efc1a2efab8924773d8e..5cc242a6a070a47a767e17ce001f2bb394429015 100644
--- a/docs/module_guides/querying/output_parser.md
+++ b/docs/module_guides/querying/output_parser.md
@@ -15,11 +15,14 @@ from llama_index import VectorStoreIndex, SimpleDirectoryReader
 from llama_index.output_parsers import GuardrailsOutputParser
 from llama_index.llm_predictor import StructuredLLMPredictor
 from llama_index.prompts import PromptTemplate
-from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT_TMPL, DEFAULT_REFINE_PROMPT_TMPL
+from llama_index.prompts.default_prompts import (
+    DEFAULT_TEXT_QA_PROMPT_TMPL,
+    DEFAULT_REFINE_PROMPT_TMPL,
+)
 
 
 # load documents, build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectorStoreIndex(documents, chunk_size=512)
 llm_predictor = StructuredLLMPredictor()
 
@@ -28,7 +31,7 @@ llm_predictor = StructuredLLMPredictor()
 # this is a special LLMPredictor that allows for structured outputs
 
 # define query / output spec
-rail_spec = ("""
+rail_spec = """
 <rail version="0.1">
 
 <output>
@@ -52,10 +55,12 @@ Query string here.
 @json_suffix_prompt_v2_wo_none
 </prompt>
 </rail>
-""")
+"""
 
 # define output parser
-output_parser = GuardrailsOutputParser.from_rail_string(rail_spec, llm=llm_predictor.llm)
+output_parser = GuardrailsOutputParser.from_rail_string(
+    rail_spec, llm=llm_predictor.llm
+)
 
 # format each prompt with output parser instructions
 fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)
@@ -66,9 +71,7 @@ refine_prompt = PromptTemplate(fmt_refine_tmpl, output_parser=output_parser)
 
 # obtain a structured response
 query_engine = index.as_query_engine(
-    service_context=ServiceContext.from_defaults(
-        llm_predictor=llm_predictor
-    ),
+    service_context=ServiceContext.from_defaults(llm_predictor=llm_predictor),
     text_qa_template=qa_prompt,
     refine_template=refine_prompt,
 )
@@ -76,7 +79,6 @@ response = query_engine.query(
     "What are the three items the author did growing up?",
 )
 print(response)
-
 ```
 
 Output:
@@ -94,19 +96,27 @@ from llama_index import VectorStoreIndex, SimpleDirectoryReader
 from llama_index.output_parsers import LangchainOutputParser
 from llama_index.llm_predictor import StructuredLLMPredictor
 from llama_index.prompts import PromptTemplate
-from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT_TMPL, DEFAULT_REFINE_PROMPT_TMPL
+from llama_index.prompts.default_prompts import (
+    DEFAULT_TEXT_QA_PROMPT_TMPL,
+    DEFAULT_REFINE_PROMPT_TMPL,
+)
 from langchain.output_parsers import StructuredOutputParser, ResponseSchema
 
 
 # load documents, build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 llm_predictor = StructuredLLMPredictor()
 
 # define output schema
 response_schemas = [
-    ResponseSchema(name="Education", description="Describes the author's educational experience/background."),
-    ResponseSchema(name="Work", description="Describes the author's work experience/background.")
+    ResponseSchema(
+        name="Education",
+        description="Describes the author's educational experience/background.",
+    ),
+    ResponseSchema(
+        name="Work", description="Describes the author's work experience/background."
+    ),
 ]
 
 # define output parser
@@ -121,9 +131,7 @@ refine_prompt = PromptTemplate(fmt_refine_tmpl, output_parser=output_parser)
 
 # query index
 query_engine = index.as_query_engine(
-    service_context=ServiceContext.from_defaults(
-        llm_predictor=llm_predictor
-    ),
+    service_context=ServiceContext.from_defaults(llm_predictor=llm_predictor),
     text_qa_template=qa_prompt,
     refine_template=refine_prompt,
 )
diff --git a/docs/module_guides/querying/response_synthesizers/response_synthesizers.md b/docs/module_guides/querying/response_synthesizers/response_synthesizers.md
index d72be2d9c5511e9f28f256fd8d871906426f0160..66275a3d94bd489d038bf1999b8869472182d370 100644
--- a/docs/module_guides/querying/response_synthesizers/response_synthesizers.md
+++ b/docs/module_guides/querying/response_synthesizers/response_synthesizers.md
@@ -20,26 +20,26 @@ from llama_index.schema import Node, NodeWithScore
 from llama_index import get_response_synthesizer
 
 response_synthesizer = get_response_synthesizer(
-  response_mode="refine",
-  service_context=service_context,
-  text_qa_template=text_qa_template,
-  refine_template=refine_template,
-  use_async=False,
-  streaming=False
+    response_mode="refine",
+    service_context=service_context,
+    text_qa_template=text_qa_template,
+    refine_template=refine_template,
+    use_async=False,
+    streaming=False,
 )
 
 # synchronous
 response = response_synthesizer.synthesize(
-  "query string",
-  nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ..],
-  additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ..],
+    "query string",
+    nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
+    additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
 )
 
 # asynchronous
 response = await response_synthesizer.asynthesize(
-  "query string",
-  nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ..],
-  additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ..],
+    "query string",
+    nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
+    additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
 )
 ```
 
@@ -47,8 +47,7 @@ You can also directly return a string, using the lower-level `get_response` and
 
 ```python
 response_str = response_synthesizer.get_response(
-  "query string",
-  text_chunks=["text1", "text2", ...]
+    "query string", text_chunks=["text1", "text2", ...]
 )
 ```
 
diff --git a/docs/module_guides/querying/response_synthesizers/root.md b/docs/module_guides/querying/response_synthesizers/root.md
index 928fe0b18bf5ceb9e2821518c0657fa521f22bed..d777736320c94d58d529499a7fd3475166c5b76f 100644
--- a/docs/module_guides/querying/response_synthesizers/root.md
+++ b/docs/module_guides/querying/response_synthesizers/root.md
@@ -44,11 +44,10 @@ Configuring the response synthesizer for a query engine using `response_mode`:
 from llama_index.schema import Node, NodeWithScore
 from llama_index.response_synthesizers import get_response_synthesizer
 
-response_synthesizer = get_response_synthesizer(response_mode='compact')
+response_synthesizer = get_response_synthesizer(response_mode="compact")
 
 response = response_synthesizer.synthesize(
-  "query text",
-  nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ..]
+    "query text", nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...]
 )
 ```
 
@@ -185,7 +184,6 @@ You can specify these additional variables in the `**kwargs` for `get_response`.
 For example,
 
 ```python
-
 from llama_index import PromptTemplate
 from llama_index.response_synthesizers import TreeSummarize
 
@@ -207,7 +205,9 @@ qa_prompt = PromptTemplate(qa_prompt_tmpl)
 summarizer = TreeSummarize(verbose=True, summary_template=qa_prompt)
 
 # get response
-response = summarizer.get_response("who is Paul Graham?", [text], tone_name="a Shakespeare play")
+response = summarizer.get_response(
+    "who is Paul Graham?", [text], tone_name="a Shakespeare play"
+)
 ```
 
 ## Modules
diff --git a/docs/module_guides/querying/retriever/root.md b/docs/module_guides/querying/retriever/root.md
index da40168d2b3460736bf48897fbc315b27c355a0c..6a3e6812a60bbc5ffb57e3e55576f8bf3041adb4 100644
--- a/docs/module_guides/querying/retriever/root.md
+++ b/docs/module_guides/querying/retriever/root.md
@@ -31,7 +31,7 @@ retriever = index.as_retriever()
 Retrieve relevant context for a question:
 
 ```python
-nodes = retriever.retrieve('Who is Paul Graham?')
+nodes = retriever.retrieve("Who is Paul Graham?")
 ```
 
 > Note: To learn how to build an index, see [Indexing](/module_guides/indexing/indexing.md)
@@ -45,7 +45,7 @@ For example, with a `SummaryIndex`:
 
 ```python
 retriever = summary_index.as_retriever(
-    retriever_mode='llm',
+    retriever_mode="llm",
 )
 ```
 
@@ -72,10 +72,9 @@ For example, if we selected the "llm" retriever mode, we might do the following:
 
 ```python
 retriever = summary_index.as_retriever(
-    retriever_mode='llm',
+    retriever_mode="llm",
     choice_batch_size=5,
 )
-
 ```
 
 ## Low-Level Composition API
diff --git a/docs/module_guides/querying/router/root.md b/docs/module_guides/querying/router/root.md
index d8c5dbe443755eb01d492d334103569abb241d07..689b6ccb5df70fc9f03b4fa5b75f6c64aacd48f9 100644
--- a/docs/module_guides/querying/router/root.md
+++ b/docs/module_guides/querying/router/root.md
@@ -80,7 +80,6 @@ selector = PydanticMultiSelector.from_defaults()
 selector = LLMSingleSelector.from_defaults()
 # multi selector (LLM)
 selector = LLMMultiSelector.from_defaults()
-
 ```
 
 ## Using as a Query Engine
@@ -118,7 +117,6 @@ query_engine = RouterQueryEngine(
     ],
 )
 query_engine.query("<query>")
-
 ```
 
 ## Using as a Retriever
@@ -155,7 +153,6 @@ retriever = RouterRetriever(
         vector_tool,
     ],
 )
-
 ```
 
 ## Using selector as a standalone module
@@ -177,9 +174,10 @@ choices = [
 choices = ["choice 1 - description for choice 1", "choice 2: description for choice 2"]
 
 selector = LLMSingleSelector.from_defaults()
-selector_result = selector.select(choices, query="What's revenue growth for IBM in 2007?")
+selector_result = selector.select(
+    choices, query="What's revenue growth for IBM in 2007?"
+)
 print(selector_result.selections)
-
 ```
 
 ```{toctree}
diff --git a/docs/module_guides/storing/customization.md b/docs/module_guides/storing/customization.md
index a1154a9b784d740e2b30d1114aff45c480ab88b4..3ff9c7a8d818795e742223863cb1466145a3872d 100644
--- a/docs/module_guides/storing/customization.md
+++ b/docs/module_guides/storing/customization.md
@@ -5,7 +5,7 @@ By default, LlamaIndex hides away the complexities and let you query your data i
 ```python
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 query_engine = index.as_query_engine()
 response = query_engine.query("Summarize the documents.")
@@ -57,19 +57,20 @@ index.storage_context.persist(persist_dir="<persist_dir>")
 
 # to load index later, make sure you setup the storage context
 # this will loaded the persisted stores from persist_dir
-storage_context = StorageContext.from_defaults(
-    persist_dir="<persist_dir>"
-)
+storage_context = StorageContext.from_defaults(persist_dir="<persist_dir>")
 
 # then load the index object
 from llama_index import load_index_from_storage
+
 loaded_index = load_index_from_storage(storage_context)
 
 # if loading an index from a persist_dir containing multiple indexes
 loaded_index = load_index_from_storage(storage_context, index_id="<index_id>")
 
 # if loading multiple indexes from a persist dir
-loaded_indicies = load_index_from_storage(storage_context, index_ids=["<index_id>", ...])
+loaded_indicies = load_index_from_storage(
+    storage_context, index_ids=["<index_id>", ...]
+)
 ```
 
 You can customize the underlying storage with a one-line change to instantiate different document stores, index stores, and vector stores.
@@ -110,12 +111,7 @@ from llama_index.vector_stores import PineconeVectorStore
 # Creating a Pinecone index
 api_key = "api_key"
 pinecone.init(api_key=api_key, environment="us-west1-gcp")
-pinecone.create_index(
-    "quickstart",
-    dimension=1536,
-    metric="euclidean",
-    pod_type="p1"
-)
+pinecone.create_index("quickstart", dimension=1536, metric="euclidean", pod_type="p1")
 index = pinecone.Index("quickstart")
 
 # construct vector store
diff --git a/docs/module_guides/storing/docstores.md b/docs/module_guides/storing/docstores.md
index 04e5e581b2c0d675169d56f21b378c4980b150f8..2ca2075df4328c1c912c1fb26791a1239dcab977 100644
--- a/docs/module_guides/storing/docstores.md
+++ b/docs/module_guides/storing/docstores.md
@@ -59,9 +59,7 @@ nodes = parser.get_nodes_from_documents(documents)
 
 # create (or load) docstore and add nodes
 docstore = RedisDocumentStore.from_host_and_port(
-  host="127.0.0.1",
-  port="6379",
-  namespace='llama_index'
+    host="127.0.0.1", port="6379", namespace="llama_index"
 )
 docstore.add_documents(nodes)
 
@@ -94,8 +92,8 @@ nodes = parser.get_nodes_from_documents(documents)
 
 # create (or load) docstore and add nodes
 docstore = FirestoreDocumentStore.from_dataabse(
-  project="project-id",
-  database="(default)",
+    project="project-id",
+    database="(default)",
 )
 docstore.add_documents(nodes)
 
diff --git a/docs/module_guides/storing/index_stores.md b/docs/module_guides/storing/index_stores.md
index 1b44500b3bff5dbb03249c1178d0d2548471cca3..d13fa69fcdd0040a0bd0dd592fc58237b97f0e7f 100644
--- a/docs/module_guides/storing/index_stores.md
+++ b/docs/module_guides/storing/index_stores.md
@@ -28,6 +28,7 @@ index = VectorStoreIndex(nodes, storage_context=storage_context)
 
 # or alternatively, load index
 from llama_index import load_index_from_storage
+
 index = load_index_from_storage(storage_context)
 ```
 
@@ -52,9 +53,7 @@ from llama_index import VectorStoreIndex
 
 # create (or load) docstore and add nodes
 index_store = RedisIndexStore.from_host_and_port(
-  host="127.0.0.1",
-  port="6379",
-  namespace='llama_index'
+    host="127.0.0.1", port="6379", namespace="llama_index"
 )
 
 # create storage context
@@ -65,6 +64,7 @@ index = VectorStoreIndex(nodes, storage_context=storage_context)
 
 # or alternatively, load index
 from llama_index import load_index_from_storage
+
 index = load_index_from_storage(storage_context)
 ```
 
diff --git a/docs/module_guides/storing/save_load.md b/docs/module_guides/storing/save_load.md
index d7871e3e2089c12784772f69865d6c28e1453d1f..da5c6f23b2300289645b1e6c679bd55222049cbd 100644
--- a/docs/module_guides/storing/save_load.md
+++ b/docs/module_guides/storing/save_load.md
@@ -30,7 +30,11 @@ storage_context = StorageContext.from_defaults(
 We can then load specific indices from the `StorageContext` through some convenience functions below.
 
 ```python
-from llama_index import load_index_from_storage, load_indices_from_storage, load_graph_from_storage
+from llama_index import (
+    load_index_from_storage,
+    load_indices_from_storage,
+    load_graph_from_storage,
+)
 
 # load a single index
 # need to specify index_id if multiple indexes are persisted to the same directory
@@ -40,11 +44,15 @@ index = load_index_from_storage(storage_context, index_id="<index_id>")
 index = load_index_from_storage(storage_context)
 
 # load multiple indices
-indices = load_indices_from_storage(storage_context) # loads all indices
-indices = load_indices_from_storage(storage_context, index_ids=[index_id1, ...]) # loads specific indices
+indices = load_indices_from_storage(storage_context)  # loads all indices
+indices = load_indices_from_storage(
+    storage_context, index_ids=[index_id1, ...]
+)  # loads specific indices
 
 # load composable graph
-graph = load_graph_from_storage(storage_context, root_id="<root_id>") # loads graph with the specified root_id
+graph = load_graph_from_storage(
+    storage_context, root_id="<root_id>"
+)  # loads graph with the specified root_id
 ```
 
 Here's the full [API Reference on saving and loading](/api_reference/storage/indices_save_load.rst).
@@ -59,10 +67,13 @@ Here's a simple example, instantiating a vector store:
 import dotenv
 import s3fs
 import os
+
 dotenv.load_dotenv("../../../.env")
 
 # load documents
-documents = SimpleDirectoryReader('../../../examples/paul_graham_essay/data/').load_data()
+documents = SimpleDirectoryReader(
+    "../../../examples/paul_graham_essay/data/"
+).load_data()
 print(len(documents))
 index = VectorStoreIndex.from_documents(documents)
 ```
@@ -71,17 +82,17 @@ At this point, everything has been the same. Now - let's instantiate a S3 filesy
 
 ```python
 # set up s3fs
-AWS_KEY = os.environ['AWS_ACCESS_KEY_ID']
-AWS_SECRET = os.environ['AWS_SECRET_ACCESS_KEY']
-R2_ACCOUNT_ID = os.environ['R2_ACCOUNT_ID']
+AWS_KEY = os.environ["AWS_ACCESS_KEY_ID"]
+AWS_SECRET = os.environ["AWS_SECRET_ACCESS_KEY"]
+R2_ACCOUNT_ID = os.environ["R2_ACCOUNT_ID"]
 
 assert AWS_KEY is not None and AWS_KEY != ""
 
 s3 = s3fs.S3FileSystem(
-   key=AWS_KEY,
-   secret=AWS_SECRET,
-   endpoint_url=f'https://{R2_ACCOUNT_ID}.r2.cloudflarestorage.com',
-   s3_additional_kwargs={'ACL': 'public-read'}
+    key=AWS_KEY,
+    secret=AWS_SECRET,
+    endpoint_url=f"https://{R2_ACCOUNT_ID}.r2.cloudflarestorage.com",
+    s3_additional_kwargs={"ACL": "public-read"},
 )
 
 # If you're using 2+ indexes with the same StorageContext,
@@ -89,13 +100,13 @@ s3 = s3fs.S3FileSystem(
 index.set_index_id("vector_index")
 
 # persist index to s3
-s3_bucket_name = 'llama-index/storage_demo'  # {bucket_name}/{index_name}
+s3_bucket_name = "llama-index/storage_demo"  # {bucket_name}/{index_name}
 index.storage_context.persist(persist_dir=s3_bucket_name, fs=s3)
 
 # load index from s3
 index_from_s3 = load_index_from_storage(
     StorageContext.from_defaults(persist_dir=s3_bucket_name, fs=s3),
-    index_id='vector_index'
+    index_id="vector_index",
 )
 ```
 
diff --git a/docs/module_guides/storing/storing.md b/docs/module_guides/storing/storing.md
index a75f89abde43dc8a530370c44fa757a9b600a506..32275d18d7fbf65a873607eb161f2d36b2b3d7e3 100644
--- a/docs/module_guides/storing/storing.md
+++ b/docs/module_guides/storing/storing.md
@@ -27,15 +27,13 @@ We have confirmed support for the following storage backends:
 Many vector stores (except FAISS) will store both the data as well as the index (embeddings). This means that you will not need to use a separate document store or index store. This _also_ means that you will not need to explicitly persist this data - this happens automatically. Usage would look something like the following to build a new index / reload an existing one.
 
 ```python
-
 ## build a new index
 from llama_index import VectorStoreIndex, StorageContext
 from llama_index.vector_stores import DeepLakeVectorStore
+
 # construct vector store and customize storage context
 vector_store = DeepLakeVectorStore(dataset_path="<dataset_path>")
-storage_context = StorageContext.from_defaults(
-    vector_store = vector_store
-)
+storage_context = StorageContext.from_defaults(vector_store=vector_store)
 # Load documents and build index
 index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 
diff --git a/docs/module_guides/supporting_modules/service_context.md b/docs/module_guides/supporting_modules/service_context.md
index 8fa4a07f1706fae2b4d2dace53d71218d543affb..6017f52ff6cdbbca16b4670229c906ccde37b493 100644
--- a/docs/module_guides/supporting_modules/service_context.md
+++ b/docs/module_guides/supporting_modules/service_context.md
@@ -11,7 +11,7 @@ You can use it to set the [global configuration](#setting-global-configuration),
 
 The `ServiceContext` is a simple python dataclass that you can directly construct by passing in the desired components.
 
-```python
+```
 @dataclass
 class ServiceContext:
     # The LLM used to generate natural language responses to queries.
@@ -72,23 +72,20 @@ from llama_index.llms import OpenAI
 from llama_index.text_splitter import TokenTextSplitter
 from llama_index.node_parser import SimpleNodeParser
 
-llm = OpenAI(model='text-davinci-003', temperature=0, max_tokens=256)
+llm = OpenAI(model="text-davinci-003", temperature=0, max_tokens=256)
 embed_model = OpenAIEmbedding()
 node_parser = SimpleNodeParser.from_defaults(
-  text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)
+    text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)
 )
 prompt_helper = PromptHelper(
-  context_window=4096,
-  num_output=256,
-  chunk_overlap_ratio=0.1,
-  chunk_size_limit=None
+    context_window=4096, num_output=256, chunk_overlap_ratio=0.1, chunk_size_limit=None
 )
 
 service_context = ServiceContext.from_defaults(
-  llm=llm,
-  embed_model=embed_model,
-  node_parser=node_parser,
-  prompt_helper=prompt_helper
+    llm=llm,
+    embed_model=embed_model,
+    node_parser=node_parser,
+    prompt_helper=prompt_helper,
 )
 ```
 
@@ -98,6 +95,7 @@ You can set a service context as the global default that applies to the entire L
 
 ```python
 from llama_index import set_global_service_context
+
 set_global_service_context(service_context)
 ```
 
diff --git a/docs/optimizing/advanced_retrieval/query_transformations.md b/docs/optimizing/advanced_retrieval/query_transformations.md
index 5e8b9ce8957a0b80bb9e58b6b413df3e7cc3ba6e..68e6512d4ef0671d0f9771af662fe22680b2d1fc 100644
--- a/docs/optimizing/advanced_retrieval/query_transformations.md
+++ b/docs/optimizing/advanced_retrieval/query_transformations.md
@@ -31,7 +31,7 @@ from llama_index.indices.query.query_transform.base import HyDEQueryTransform
 from llama_index.query_engine.transform_query_engine import TransformQueryEngine
 
 # load documents, build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectorStoreIndex(documents)
 
 # run query with HyDE query transform
@@ -41,7 +41,6 @@ query_engine = index.as_query_engine()
 query_engine = TransformQueryEngine(query_engine, query_transform=hyde)
 response = query_engine.query(query_str)
 print(response)
-
 ```
 
 Check out our [example notebook](https://github.com/jerryjliu/llama_index/blob/main/docs/examples/query_transformations/HyDEQueryTransformDemo.ipynb) for a full walkthrough.
@@ -64,13 +63,11 @@ An example image is shown below.
 Here's a corresponding example code snippet over a composed graph.
 
 ```python
-
 # Setting: a summary index composed over multiple vector indices
 # llm_predictor_chatgpt corresponds to the ChatGPT LLM interface
 from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
-decompose_transform = DecomposeQueryTransform(
-    llm_predictor_chatgpt, verbose=True
-)
+
+decompose_transform = DecomposeQueryTransform(llm_predictor_chatgpt, verbose=True)
 
 # initialize indexes and graph
 ...
@@ -80,17 +77,13 @@ decompose_transform = DecomposeQueryTransform(
 vector_query_engine = vector_index.as_query_engine()
 vector_query_engine = TransformQueryEngine(
     vector_query_engine,
-    query_transform=decompose_transform
-    transform_extra_info={'index_summary': vector_index.index_struct.summary}
+    query_transform=decompose_transform,
+    transform_extra_info={"index_summary": vector_index.index_struct.summary},
 )
-custom_query_engines = {
-    vector_index.index_id: vector_query_engine
-}
+custom_query_engines = {vector_index.index_id: vector_query_engine}
 
 # query
-query_str = (
-    "Compare and contrast the airports in Seattle, Houston, and Toronto. "
-)
+query_str = "Compare and contrast the airports in Seattle, Houston, and Toronto. "
 query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)
 response = query_engine.query(query_str)
 ```
@@ -112,19 +105,19 @@ Here's a corresponding example code snippet.
 
 ```python
 from llama_index.indices.query.query_transform.base import StepDecomposeQueryTransform
+
 # gpt-4
-step_decompose_transform = StepDecomposeQueryTransform(
-    llm_predictor, verbose=True
-)
+step_decompose_transform = StepDecomposeQueryTransform(llm_predictor, verbose=True)
 
 query_engine = index.as_query_engine()
-query_engine = MultiStepQueryEngine(query_engine, query_transform=step_decompose_transform)
+query_engine = MultiStepQueryEngine(
+    query_engine, query_transform=step_decompose_transform
+)
 
 response = query_engine.query(
     "Who was in the first batch of the accelerator program the author started?",
 )
 print(str(response))
-
 ```
 
 Check out our [example notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/vector_indices/SimpleIndexDemo-multistep.ipynb) for a full walkthrough.
diff --git a/docs/optimizing/advanced_retrieval/structured_outputs/query_engine.md b/docs/optimizing/advanced_retrieval/structured_outputs/query_engine.md
index 6b1fc435702137d1e942374b945c574033cac643..9d9b6cc7d2ef2b85ca82398012aa05f443b0253e 100644
--- a/docs/optimizing/advanced_retrieval/structured_outputs/query_engine.md
+++ b/docs/optimizing/advanced_retrieval/structured_outputs/query_engine.md
@@ -20,6 +20,7 @@ First, you need to define the object you want to extract.
 from typing import List
 from pydantic import BaseModel
 
+
 class Biography(BaseModel):
     """Data model for a biography."""
 
@@ -31,7 +32,9 @@ class Biography(BaseModel):
 Then, you create your query engine.
 
 ```python
-query_engine = index.as_query_engine(response_mode="tree_summarize", output_cls=Biography)
+query_engine = index.as_query_engine(
+    response_mode="tree_summarize", output_cls=Biography
+)
 ```
 
 Lastly, you can get a response and inspect the output.
@@ -40,11 +43,11 @@ Lastly, you can get a response and inspect the output.
 response = query_engine.query("Who is Paul Graham?")
 
 print(response.name)
-> 'Paul Graham'
+# > 'Paul Graham'
 print(response.best_known_for)
-> ['working on Bel', 'co-founding Viaweb', 'creating the programming language Arc']
+# > ['working on Bel', 'co-founding Viaweb', 'creating the programming language Arc']
 print(response.extra_info)
-> "Paul Graham is a computer scientist, entrepreneur, and writer. He is best known for ..."
+# > "Paul Graham is a computer scientist, entrepreneur, and writer. He is best known      for ..."
 ```
 
 ## Modules
diff --git a/docs/optimizing/basic_strategies/basic_strategies.md b/docs/optimizing/basic_strategies/basic_strategies.md
index 0588524c5103add70529bbf098f53afe7cc7001a..c73b23963c84fd4275059ad6ae0c9e335790ebb1 100644
--- a/docs/optimizing/basic_strategies/basic_strategies.md
+++ b/docs/optimizing/basic_strategies/basic_strategies.md
@@ -116,12 +116,10 @@ from llama_index.vector_stores import MetadataFilters, ExactMatchFilter
 
 documents = [
     Document(text="text", metadata={"author": "LlamaIndex"}),
-    Document(text="text", metadata={"author": "John Doe"})
+    Document(text="text", metadata={"author": "John Doe"}),
 ]
 
-filters = MetadataFilters(filters=[
-    ExactMatchFilter(key="author", value="John Doe")
-])
+filters = MetadataFilters(filters=[ExactMatchFilter(key="author", value="John Doe")])
 
 index = VectorStoreIndex.from_documents(documents)
 query_engine = index.as_query_engine(filters=filters)
diff --git a/docs/understanding/evaluating/cost_analysis/usage_pattern.md b/docs/understanding/evaluating/cost_analysis/usage_pattern.md
index f72d070130d1cabf4932eb879cb98bfce26dfde5..4abc5d09b8fe909f8d598241dacb07ff1f31df5e 100644
--- a/docs/understanding/evaluating/cost_analysis/usage_pattern.md
+++ b/docs/understanding/evaluating/cost_analysis/usage_pattern.md
@@ -34,9 +34,7 @@ from llama_index import ServiceContext, set_global_service_context
 
 set_global_service_context(
     ServiceContext.from_defaults(
-        llm=llm,
-        embed_model=embed_model,
-        callback_manager=callback_manager
+        llm=llm, embed_model=embed_model, callback_manager=callback_manager
     )
 )
 ```
diff --git a/docs/understanding/loading/llamahub.md b/docs/understanding/loading/llamahub.md
index 680666dc622c5096adee6b4ee8108364841d8e9d..623b356157c2980c741e6d249cd812d3f84ec156 100644
--- a/docs/understanding/loading/llamahub.md
+++ b/docs/understanding/loading/llamahub.md
@@ -12,7 +12,7 @@ Get started with:
 ```python
 from llama_index import download_loader
 
-GoogleDocsReader = download_loader('GoogleDocsReader')
+GoogleDocsReader = download_loader("GoogleDocsReader")
 loader = GoogleDocsReader()
 documents = loader.load_data(document_ids=[...])
 ```
@@ -24,7 +24,7 @@ documents = loader.load_data(document_ids=[...])
 ```python
 from llama_index import SimpleDirectoryReader
 
-documents = SimpleDirectoryReader('./data').load_data()
+documents = SimpleDirectoryReader("./data").load_data()
 ```
 
 ## Available connectors
diff --git a/docs/understanding/loading/loading.md b/docs/understanding/loading/loading.md
index 6197475eeea29cf1e1af8d73e6dd157eec9cef70..4d9ce537b7e998b5c6c77c261bb4e6bf6f107d37 100644
--- a/docs/understanding/loading/loading.md
+++ b/docs/understanding/loading/loading.md
@@ -9,7 +9,7 @@ The easiest reader to use is our SimpleDirectoryReader, which creates documents
 ```python
 from llama_index import SimpleDirectoryReader
 
-documents = SimpleDirectoryReader('./data').load_data()
+documents = SimpleDirectoryReader("./data").load_data()
 ```
 
 ## Using Readers from LlamaHub
@@ -21,15 +21,15 @@ In this example LlamaIndex downloads and installs the connector called [Database
 ```python
 from llama_index import download_loader
 
-DatabaseReader = download_loader('DatabaseReader')
+DatabaseReader = download_loader("DatabaseReader")
 
 reader = DatabaseReader(
-    scheme = os.getenv('DB_SCHEME'),
-    host = os.getenv('DB_HOST'),
-    port = os.getenv('DB_PORT'),
-    user = os.getenv('DB_USER'),
-    password = os.getenv('DB_PASS'),
-    dbname = os.getenv('DB_NAME')
+    scheme=os.getenv("DB_SCHEME"),
+    host=os.getenv("DB_HOST"),
+    port=os.getenv("DB_PORT"),
+    user=os.getenv("DB_USER"),
+    password=os.getenv("DB_PASS"),
+    dbname=os.getenv("DB_NAME"),
 )
 
 query = "SELECT * FROM users"
@@ -78,7 +78,7 @@ from llama_index.schema import TextNode
 node1 = TextNode(text="<text_chunk>", id_="<node_id>")
 node2 = TextNode(text="<text_chunk>", id_="<node_id>")
 
-index = VectorStoreIndex([node1,node2])
+index = VectorStoreIndex([node1, node2])
 ```
 
 ## Customizing Documents
@@ -87,11 +87,7 @@ When creating documents, you can also attach useful metadata that can be used at
 
 ```python
 document = Document(
-    text='text',
-    metadata={
-        'filename': '<doc_file_name>',
-        'category': '<category>'
-    }
+    text="text", metadata={"filename": "<doc_file_name>", "category": "<category>"}
 )
 ```
 
diff --git a/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md b/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md
index fd3e7a607b1d06ea84e027f99c11aadf887ba744..f5a899b14943298a04194f2ac82d5160a17f0c04 100644
--- a/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md
+++ b/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md
@@ -25,10 +25,12 @@ from flask import Flask
 
 app = Flask(__name__)
 
+
 @app.route("/")
 def home():
     return "Hello World!"
 
+
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=5601)
 ```
@@ -50,10 +52,11 @@ import os
 from llama_index import SimpleDirectoryReader, VectorStoreIndex, StorageContext
 
 # NOTE: for local testing only, do NOT deploy with your key hardcoded
-os.environ['OPENAI_API_KEY'] = "your key here"
+os.environ["OPENAI_API_KEY"] = "your key here"
 
 index = None
 
+
 def initialize_index():
     global index
     storage_context = StorageContext.from_defaults()
@@ -61,7 +64,9 @@ def initialize_index():
         index = load_index_from_storage(storage_context)
     else:
         documents = SimpleDirectoryReader("./documents").load_data()
-        index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+        index = VectorStoreIndex.from_documents(
+            documents, storage_context=storage_context
+        )
         storage_context.persist(index_dir)
 ```
 
@@ -72,15 +77,16 @@ Our query endpoint will accept `GET` requests with the query text as a parameter
 ```python
 from flask import request
 
+
 @app.route("/query", methods=["GET"])
 def query_index():
-  global index
-  query_text = request.args.get("text", None)
-  if query_text is None:
-    return "No text found, please include a ?text=blah parameter in the URL", 400
-  query_engine = index.as_query_engine()
-  response = query_engine.query(query_text)
-  return str(response), 200
+    global index
+    query_text = request.args.get("text", None)
+    if query_text is None:
+        return "No text found, please include a ?text=blah parameter in the URL", 400
+    query_engine = index.as_query_engine()
+    response = query_engine.query(query_text)
+    return str(response), 200
 ```
 
 Now, we've introduced a few new concepts to our server:
@@ -113,22 +119,26 @@ from multiprocessing.managers import BaseManager
 from llama_index import SimpleDirectoryReader, VectorStoreIndex, Document
 
 # NOTE: for local testing only, do NOT deploy with your key hardcoded
-os.environ['OPENAI_API_KEY'] = "your key here"
+os.environ["OPENAI_API_KEY"] = "your key here"
 
 index = None
 lock = Lock()
 
+
 def initialize_index():
-  global index
+    global index
+
+    with lock:
+        # same as before ...
+        pass
 
-  with lock:
-    # same as before ...
 
 def query_index(query_text):
-  global index
-  query_engine = index.as_query_engine()
-  response = query_engine.query(query_text)
-  return str(response)
+    global index
+    query_engine = index.as_query_engine()
+    response = query_engine.query(query_text)
+    return str(response)
+
 
 if __name__ == "__main__":
     # init the global index
@@ -137,8 +147,8 @@ if __name__ == "__main__":
 
     # setup server
     # NOTE: you might want to handle the password in a less hardcoded way
-    manager = BaseManager(('', 5602), b'password')
-    manager.register('query_index', query_index)
+    manager = BaseManager(("", 5602), b"password")
+    manager.register("query_index", query_index)
     server = manager.get_server()
 
     print("starting server...")
@@ -157,26 +167,28 @@ from flask import Flask, request
 
 # initialize manager connection
 # NOTE: you might want to handle the password in a less hardcoded way
-manager = BaseManager(('', 5602), b'password')
-manager.register('query_index')
+manager = BaseManager(("", 5602), b"password")
+manager.register("query_index")
 manager.connect()
 
+
 @app.route("/query", methods=["GET"])
 def query_index():
-  global index
-  query_text = request.args.get("text", None)
-  if query_text is None:
-    return "No text found, please include a ?text=blah parameter in the URL", 400
-  response = manager.query_index(query_text)._getvalue()
-  return str(response), 200
+    global index
+    query_text = request.args.get("text", None)
+    if query_text is None:
+        return "No text found, please include a ?text=blah parameter in the URL", 400
+    response = manager.query_index(query_text)._getvalue()
+    return str(response), 200
+
 
 @app.route("/")
 def home():
     return "Hello World!"
 
+
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=5601)
-
 ```
 
 _flask_demo.py_
@@ -189,20 +201,21 @@ If we allow users to upload their own documents, we should probably remove the P
 
 ```python
 ...
-manager.register('insert_into_index')
+manager.register("insert_into_index")
 ...
 
+
 @app.route("/uploadFile", methods=["POST"])
 def upload_file():
     global manager
-    if 'file' not in request.files:
+    if "file" not in request.files:
         return "Please send a POST request with a file", 400
 
     filepath = None
     try:
         uploaded_file = request.files["file"]
         filename = secure_filename(uploaded_file.filename)
-        filepath = os.path.join('documents', os.path.basename(filename))
+        filepath = os.path.join("documents", os.path.basename(filename))
         uploaded_file.save(filepath)
 
         if request.form.get("filename_as_doc_id", None) is not None:
@@ -239,8 +252,9 @@ def insert_into_index(doc_text, doc_id=None):
         index.insert(document)
         index.storage_context.persist()
 
+
 ...
-manager.register('insert_into_index', insert_into_index)
+manager.register("insert_into_index", insert_into_index)
 ...
 ```
 
diff --git a/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md b/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md
index c7d6456df7b01f3bc2b5ef92f6c0b2d58d7ba50b..2b8f1c6ad51a37a4d453454b9195b91898e1dacd 100644
--- a/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md
+++ b/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md
@@ -116,10 +116,12 @@ in the `endpoints.py` file:
 
 ```python
 @collections_router.post("/create")
-async def create_collection(request,
-                            title: str = Form(...),
-                            description: str = Form(...),
-                            files: list[UploadedFile] = File(...), ):
+async def create_collection(
+    request,
+    title: str = Form(...),
+    description: str = Form(...),
+    files: list[UploadedFile] = File(...),
+):
     key = None if getattr(request, "auth", None) is None else request.auth
     if key is not None:
         key = await key
@@ -141,9 +143,7 @@ async def create_collection(request,
 
     create_index.si(collection_instance.id).apply_async()
 
-    return await sync_to_async(CollectionModelSchema)(
-        ...
-    )
+    return await sync_to_async(CollectionModelSchema)(...)
 ```
 
 3. `/collections/query` — a POST endpoint to query a document collection using the LLM. Accepts a JSON payload
@@ -152,9 +152,11 @@ async def create_collection(request,
    to this REST endpoint to query a specific collection.
 
 ```python
-@collections_router.post("/query",
-                         response=CollectionQueryOutput,
-                         summary="Ask a question of a document collection", )
+@collections_router.post(
+    "/query",
+    response=CollectionQueryOutput,
+    summary="Ask a question of a document collection",
+)
 def query_collection_view(request: HttpRequest, query_input: CollectionQueryInput):
     collection_id = query_input.collection_id
     query_str = query_input.query_str
@@ -166,9 +168,11 @@ def query_collection_view(request: HttpRequest, query_input: CollectionQueryInpu
    output is serialized using the `CollectionModelSchema`.
 
 ```python
-@collections_router.get("/available",
-                        response=list[CollectionModelSchema],
-                        summary="Get a list of all of the collections created with my api_key", )
+@collections_router.get(
+    "/available",
+    response=list[CollectionModelSchema],
+    summary="Get a list of all of the collections created with my api_key",
+)
 async def get_my_collections_view(request: HttpRequest):
     key = None if getattr(request, "auth", None) is None else request.auth
     if key is not None:
@@ -176,12 +180,7 @@ async def get_my_collections_view(request: HttpRequest):
 
     collections = Collection.objects.filter(api_key=key)
 
-    return [
-        {
-            ...
-        }
-        async for collection in collections
-    ]
+    return [{...} async for collection in collections]
 ```
 
 5. `/collections/{collection_id}/add_file`: A POST endpoint to add a file to an existing collection. Accepts
@@ -189,11 +188,15 @@ async def get_my_collections_view(request: HttpRequest):
    instance associated with the specified collection.
 
 ```python
-@collections_router.post("/{collection_id}/add_file", summary="Add a file to a collection")
-async def add_file_to_collection(request,
-                                 collection_id: int,
-                                 file: UploadedFile = File(...),
-                                 description: str = Form(...), ):
+@collections_router.post(
+    "/{collection_id}/add_file", summary="Add a file to a collection"
+)
+async def add_file_to_collection(
+    request,
+    collection_id: int,
+    file: UploadedFile = File(...),
+    description: str = Form(...),
+):
     collection = await sync_to_async(Collection.objects.get)(id=collection_id)
 ```
 
@@ -312,7 +315,9 @@ async def receive(self, text_data):
 
         await self.send(json.dumps({"response": formatted_response}, indent=4))
     else:
-        await self.send(json.dumps({"error": "No index loaded for this connection."}, indent=4))
+        await self.send(
+            json.dumps({"error": "No index loaded for this connection."}, indent=4)
+        )
 ```
 
 To load the collection model, the `load_collection_model` function is used, which can be found
diff --git a/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md b/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md
index da85608473f648e8e43e620f0c01359c101eb877..3e1ac4c2da11b7e12f970009b07a88081a5506be 100644
--- a/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md
+++ b/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md
@@ -28,7 +28,7 @@ nest_asyncio.apply()
 
 Let's first download the raw 10-k files, from 2019-2022.
 
-```python
+```
 # NOTE: the code examples assume you're operating within a Jupyter notebook.
 # download files
 !mkdir data
@@ -40,7 +40,7 @@ To parse the HTML files into formatted text, we use the [Unstructured](https://g
 
 First we install the necessary packages:
 
-```python
+```
 !pip install llama-hub unstructured
 ```
 
diff --git a/docs/understanding/putting_it_all_together/q_and_a.md b/docs/understanding/putting_it_all_together/q_and_a.md
index 3544b2f7c41b737c73b34d4f58fa2973544cba0d..8413aefb6a64f34d403f8c3a322011e629014ce6 100644
--- a/docs/understanding/putting_it_all_together/q_and_a.md
+++ b/docs/understanding/putting_it_all_together/q_and_a.md
@@ -6,12 +6,12 @@ The most basic example usage of LlamaIndex is through semantic search. We provid
 
 ```python
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
-documents = SimpleDirectoryReader('data').load_data()
+
+documents = SimpleDirectoryReader("data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 query_engine = index.as_query_engine()
 response = query_engine.query("What did the author do growing up?")
 print(response)
-
 ```
 
 **Tutorials**
@@ -38,9 +38,7 @@ Empirically, setting `response_mode="tree_summarize"` also leads to better summa
 ```python
 index = SummaryIndex.from_documents(documents)
 
-query_engine = index.as_query_engine(
-    response_mode="tree_summarize"
-)
+query_engine = index.as_query_engine(response_mode="tree_summarize")
 response = query_engine.query("<summarization_query>")
 ```
 
@@ -72,10 +70,11 @@ from llama_index.indices.composability import ComposableGraph
 index1 = VectorStoreIndex.from_documents(notion_docs)
 index2 = VectorStoreIndex.from_documents(slack_docs)
 
-graph = ComposableGraph.from_indices(SummaryIndex, [index1, index2], index_summaries=["summary1", "summary2"])
+graph = ComposableGraph.from_indices(
+    SummaryIndex, [index1, index2], index_summaries=["summary1", "summary2"]
+)
 query_engine = graph.as_query_engine()
 response = query_engine.query("<query_str>")
-
 ```
 
 **Guides**
@@ -117,14 +116,9 @@ By default, this uses a `LLMSingleSelector` as the router, which uses the LLM to
 ```python
 from llama_index.query_engine import RouterQueryEngine
 
-query_engine = RouterQueryEngine.from_defaults(
-    query_engine_tools=[tool1, tool2]
-)
-
-response = query_engine.query(
-    "In Notion, give me a summary of the product roadmap."
-)
+query_engine = RouterQueryEngine.from_defaults(query_engine_tools=[tool1, tool2])
 
+response = query_engine.query("In Notion, give me a summary of the product roadmap.")
 ```
 
 **Guides**
@@ -138,6 +132,7 @@ You can explicitly perform compare/contrast queries with a **query transformatio
 
 ```python
 from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
+
 decompose_transform = DecomposeQueryTransform(
     service_context.llm_predictor, verbose=True
 )
@@ -166,15 +161,24 @@ from llama_index.tools import QueryEngineTool, ToolMetadata
 query_engine_tools = [
     QueryEngineTool(
         query_engine=sept_engine,
-        metadata=ToolMetadata(name='sept_22', description='Provides information about Uber quarterly financials ending September 2022')
+        metadata=ToolMetadata(
+            name="sept_22",
+            description="Provides information about Uber quarterly financials ending September 2022",
+        ),
     ),
     QueryEngineTool(
         query_engine=june_engine,
-        metadata=ToolMetadata(name='june_22', description='Provides information about Uber quarterly financials ending June 2022')
+        metadata=ToolMetadata(
+            name="june_22",
+            description="Provides information about Uber quarterly financials ending June 2022",
+        ),
     ),
     QueryEngineTool(
         query_engine=march_engine,
-        metadata=ToolMetadata(name='march_22', description='Provides information about Uber quarterly financials ending March 2022')
+        metadata=ToolMetadata(
+            name="march_22",
+            description="Provides information about Uber quarterly financials ending March 2022",
+        ),
     ),
 ]
 ```
@@ -184,8 +188,9 @@ Then, we define a `SubQuestionQueryEngine` over these tools:
 ```python
 from llama_index.query_engine import SubQuestionQueryEngine
 
-query_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)
-
+query_engine = SubQuestionQueryEngine.from_defaults(
+    query_engine_tools=query_engine_tools
+)
 ```
 
 This query engine can execute any number of sub-queries against any subset of query engine tools before synthesizing the final answer.
diff --git a/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md b/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md
index 5e3d049696e0be715a62fa37229811425da73b12..a5e7570009502659b3068e56db4627a8d6dfb42a 100644
--- a/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md
+++ b/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md
@@ -25,7 +25,7 @@ st.title("🦙 Llama Index Term Extractor 🦙")
 document_text = st.text_area("Or enter raw text")
 if st.button("Extract Terms and Definitions") and document_text:
     with st.spinner("Extracting..."):
-        extracted_terms = document text  # this is a placeholder!
+        extracted_terms = document_text  # this is a placeholder!
     st.write(extracted_terms)
 ```
 
@@ -53,16 +53,22 @@ setup_tab, upload_tab = st.tabs(["Setup", "Upload/Extract Terms"])
 with setup_tab:
     st.subheader("LLM Setup")
     api_key = st.text_input("Enter your OpenAI API key here", type="password")
-    llm_name = st.selectbox('Which LLM?', ["text-davinci-003", "gpt-3.5-turbo", "gpt-4"])
-    model_temperature = st.slider("LLM Temperature", min_value=0.0, max_value=1.0, step=0.1)
-    term_extract_str = st.text_area("The query to extract terms and definitions with.", value=DEFAULT_TERM_STR)
+    llm_name = st.selectbox(
+        "Which LLM?", ["text-davinci-003", "gpt-3.5-turbo", "gpt-4"]
+    )
+    model_temperature = st.slider(
+        "LLM Temperature", min_value=0.0, max_value=1.0, step=0.1
+    )
+    term_extract_str = st.text_area(
+        "The query to extract terms and definitions with.", value=DEFAULT_TERM_STR
+    )
 
 with upload_tab:
     st.subheader("Extract and Query Definitions")
     document_text = st.text_area("Or enter raw text")
     if st.button("Extract Terms and Definitions") and document_text:
         with st.spinner("Extracting..."):
-            extracted_terms = document text  # this is a placeholder!
+            extracted_terms = document_text  # this is a placeholder!
         st.write(extracted_terms)
 ```
 
@@ -77,25 +83,42 @@ Now that we are able to define LLM settings and upload text, we can try using Ll
 We can add the following functions to both initialize our LLM, as well as use it to extract terms from the input text.
 
 ```python
-from llama_index import Document, SummaryIndex, LLMPredictor, ServiceContext, load_index_from_storage
+from llama_index import (
+    Document,
+    SummaryIndex,
+    LLMPredictor,
+    ServiceContext,
+    load_index_from_storage,
+)
 from llama_index.llms import OpenAI
 
+
 def get_llm(llm_name, model_temperature, api_key, max_tokens=256):
-    os.environ['OPENAI_API_KEY'] = api_key
+    os.environ["OPENAI_API_KEY"] = api_key
     return OpenAI(temperature=model_temperature, model=llm_name, max_tokens=max_tokens)
 
+
 def extract_terms(documents, term_extract_str, llm_name, model_temperature, api_key):
     llm = get_llm(llm_name, model_temperature, api_key, max_tokens=1024)
 
-    service_context = ServiceContext.from_defaults(llm=llm,
-                                                   chunk_size=1024)
+    service_context = ServiceContext.from_defaults(llm=llm, chunk_size=1024)
 
     temp_index = SummaryIndex.from_documents(documents, service_context=service_context)
     query_engine = temp_index.as_query_engine(response_mode="tree_summarize")
     terms_definitions = str(query_engine.query(term_extract_str))
-    terms_definitions = [x for x in terms_definitions.split("\n") if x and 'Term:' in x and 'Definition:' in x]
+    terms_definitions = [
+        x
+        for x in terms_definitions.split("\n")
+        if x and "Term:" in x and "Definition:" in x
+    ]
     # parse the text into a dict
-    terms_to_definition = {x.split("Definition:")[0].split("Term:")[-1].strip(): x.split("Definition:")[-1].strip() for x in terms_definitions}
+    terms_to_definition = {
+        x.split("Definition:")[0]
+        .split("Term:")[-1]
+        .strip(): x.split("Definition:")[-1]
+        .strip()
+        for x in terms_definitions
+    }
     return terms_to_definition
 ```
 
@@ -108,9 +131,13 @@ with upload_tab:
     document_text = st.text_area("Or enter raw text")
     if st.button("Extract Terms and Definitions") and document_text:
         with st.spinner("Extracting..."):
-            extracted_terms = extract_terms([Document(text=document_text)],
-                                            term_extract_str, llm_name,
-                                            model_temperature, api_key)
+            extracted_terms = extract_terms(
+                [Document(text=document_text)],
+                term_extract_str,
+                llm_name,
+                model_temperature,
+                api_key,
+            )
         st.write(extracted_terms)
 ```
 
@@ -132,14 +159,16 @@ First things first though, let's add a feature to initialize a global vector ind
 
 ```python
 ...
-if 'all_terms' not in st.session_state:
-    st.session_state['all_terms'] = DEFAULT_TERMS
+if "all_terms" not in st.session_state:
+    st.session_state["all_terms"] = DEFAULT_TERMS
 ...
 
+
 def insert_terms(terms_to_definition):
     for term, definition in terms_to_definition.items():
         doc = Document(text=f"Term: {term}\nDefinition: {definition}")
-        st.session_state['llama_index'].insert(doc)
+        st.session_state["llama_index"].insert(doc)
+
 
 @st.cache_resource
 def initialize_index(llm_name, model_temperature, api_key):
@@ -152,33 +181,48 @@ def initialize_index(llm_name, model_temperature, api_key):
 
     return index
 
+
 ...
 
 with upload_tab:
     st.subheader("Extract and Query Definitions")
     if st.button("Initialize Index and Reset Terms"):
-        st.session_state['llama_index'] = initialize_index(llm_name, model_temperature, api_key)
-        st.session_state['all_terms'] = {}
+        st.session_state["llama_index"] = initialize_index(
+            llm_name, model_temperature, api_key
+        )
+        st.session_state["all_terms"] = {}
 
     if "llama_index" in st.session_state:
-        st.markdown("Either upload an image/screenshot of a document, or enter the text manually.")
+        st.markdown(
+            "Either upload an image/screenshot of a document, or enter the text manually."
+        )
         document_text = st.text_area("Or enter raw text")
-        if st.button("Extract Terms and Definitions") and (uploaded_file or document_text):
-            st.session_state['terms'] = {}
+        if st.button("Extract Terms and Definitions") and (
+            uploaded_file or document_text
+        ):
+            st.session_state["terms"] = {}
             terms_docs = {}
             with st.spinner("Extracting..."):
-                terms_docs.update(extract_terms([Document(text=document_text)], term_extract_str, llm_name, model_temperature, api_key))
-            st.session_state['terms'].update(terms_docs)
+                terms_docs.update(
+                    extract_terms(
+                        [Document(text=document_text)],
+                        term_extract_str,
+                        llm_name,
+                        model_temperature,
+                        api_key,
+                    )
+                )
+            st.session_state["terms"].update(terms_docs)
 
-        if "terms" in st.session_state and st.session_state["terms"]::
+        if "terms" in st.session_state and st.session_state["terms"]:
             st.markdown("Extracted terms")
-            st.json(st.session_state['terms'])
+            st.json(st.session_state["terms"])
 
             if st.button("Insert terms?"):
                 with st.spinner("Inserting terms"):
-                    insert_terms(st.session_state['terms'])
-                st.session_state['all_terms'].update(st.session_state['terms'])
-                st.session_state['terms'] = {}
+                    insert_terms(st.session_state["terms"])
+                st.session_state["all_terms"].update(st.session_state["terms"])
+                st.session_state["terms"] = {}
                 st.experimental_rerun()
 ```
 
@@ -196,8 +240,8 @@ setup_tab, terms_tab, upload_tab, query_tab = st.tabs(
 ...
 with terms_tab:
     with terms_tab:
-    st.subheader("Current Extracted Terms and Definitions")
-    st.json(st.session_state["all_terms"])
+        st.subheader("Current Extracted Terms and Definitions")
+        st.json(st.session_state["all_terms"])
 ...
 with query_tab:
     st.subheader("Query for Terms/Definitions!")
@@ -216,7 +260,10 @@ with query_tab:
     if "llama_index" in st.session_state:
         query_text = st.text_input("Ask about a term or definition:")
         if query_text:
-            query_text = query_text + "\nIf you can't find the answer, answer the query with the best of your knowledge."
+            query_text = (
+                query_text
+                + "\nIf you can't find the answer, answer the query with the best of your knowledge."
+            )
             with st.spinner("Generating answer..."):
                 response = st.session_state["llama_index"].query(
                     query_text, similarity_top_k=5, response_mode="compact"
@@ -254,9 +301,9 @@ With our base app working, it might feel like a lot of work to build up a useful
 def insert_terms(terms_to_definition):
     for term, definition in terms_to_definition.items():
         doc = Document(text=f"Term: {term}\nDefinition: {definition}")
-        st.session_state['llama_index'].insert(doc)
+        st.session_state["llama_index"].insert(doc)
     # TEMPORARY - save to disk
-    st.session_state['llama_index'].storage_context.persist()
+    st.session_state["llama_index"].storage_context.persist()
 ```
 
 Now, we need some document to extract from! The repository for this project used the wikipedia page on New York City, and you can find the text [here](https://github.com/jerryjliu/llama_index/blob/main/examples/test_wiki/data/nyc_text.txt).
@@ -298,7 +345,11 @@ This is due to the concept of "refining" answers in Llama Index. Since we are qu
 So, the refine process seems to be messing with our results! Rather than appending extra instructions to the `query_str`, remove that, and Llama Index will let us provide our own custom prompts! Let's create those now, using the [default prompts](https://github.com/jerryjliu/llama_index/blob/main/llama_index/prompts/default_prompts.py) and [chat specific prompts](https://github.com/jerryjliu/llama_index/blob/main/llama_index/prompts/chat_prompts.py) as a guide. Using a new file `constants.py`, let's create some new query templates:
 
 ```python
-from llama_index.prompts import PromptTemplate, SelectorPromptTemplate, ChatPromptTemplate
+from llama_index.prompts import (
+    PromptTemplate,
+    SelectorPromptTemplate,
+    ChatPromptTemplate,
+)
 from llama_index.prompts.utils import is_chat_model
 from llama_index.llms.base import ChatMessage, MessageRole
 
@@ -359,17 +410,21 @@ So, now we can import these prompts into our app and use them during the query.
 
 ```python
 from constants import REFINE_TEMPLATE, TEXT_QA_TEMPLATE
+
 ...
-    if "llama_index" in st.session_state:
-        query_text = st.text_input("Ask about a term or definition:")
-        if query_text:
-            query_text = query_text  # Notice we removed the old instructions
-            with st.spinner("Generating answer..."):
-                response = st.session_state["llama_index"].query(
-                    query_text, similarity_top_k=5, response_mode="compact",
-                    text_qa_template=TEXT_QA_TEMPLATE, refine_template=REFINE_TEMPLATE
-                )
-            st.markdown(str(response))
+if "llama_index" in st.session_state:
+    query_text = st.text_input("Ask about a term or definition:")
+    if query_text:
+        query_text = query_text  # Notice we removed the old instructions
+        with st.spinner("Generating answer..."):
+            response = st.session_state["llama_index"].query(
+                query_text,
+                similarity_top_k=5,
+                response_mode="compact",
+                text_qa_template=TEXT_QA_TEMPLATE,
+                refine_template=REFINE_TEMPLATE,
+            )
+        st.markdown(str(response))
 ...
 ```
 
@@ -385,6 +440,7 @@ If you get an import error about PIL, install it using `pip install Pillow` firs
 from PIL import Image
 from llama_index.readers.file.base import DEFAULT_FILE_EXTRACTOR, ImageParser
 
+
 @st.cache_resource
 def get_file_extractor():
     image_parser = ImageParser(keep_image=True, parse_text=True)
@@ -399,6 +455,7 @@ def get_file_extractor():
 
     return file_extractor
 
+
 file_extractor = get_file_extractor()
 ...
 with upload_tab:
diff --git a/docs/understanding/putting_it_all_together/q_and_a/unified_query.md b/docs/understanding/putting_it_all_together/q_and_a/unified_query.md
index 82420fae6b58f2081a1d3db732e49baed0dfca1f..c1cd7896f0609c496a793bde8bdd224a6c8f3d3b 100644
--- a/docs/understanding/putting_it_all_together/q_and_a/unified_query.md
+++ b/docs/understanding/putting_it_all_together/q_and_a/unified_query.md
@@ -17,7 +17,6 @@ In this example, we will analyze Wikipedia articles of different cities: Boston,
 The below code snippet downloads the relevant data into files.
 
 ```python
-
 from pathlib import Path
 import requests
 
@@ -25,26 +24,25 @@ wiki_titles = ["Toronto", "Seattle", "Chicago", "Boston", "Houston"]
 
 for title in wiki_titles:
     response = requests.get(
-        'https://en.wikipedia.org/w/api.php',
+        "https://en.wikipedia.org/w/api.php",
         params={
-            'action': 'query',
-            'format': 'json',
-            'titles': title,
-            'prop': 'extracts',
+            "action": "query",
+            "format": "json",
+            "titles": title,
+            "prop": "extracts",
             # 'exintro': True,
-            'explaintext': True,
-        }
+            "explaintext": True,
+        },
     ).json()
-    page = next(iter(response['query']['pages'].values()))
-    wiki_text = page['extract']
+    page = next(iter(response["query"]["pages"].values()))
+    wiki_text = page["extract"]
 
-    data_path = Path('data')
+    data_path = Path("data")
     if not data_path.exists():
         Path.mkdir(data_path)
 
-    with open(data_path / f"{title}.txt", 'w') as fp:
+    with open(data_path / f"{title}.txt", "w") as fp:
         fp.write(wiki_text)
-
 ```
 
 The next snippet loads all files into Document objects.
@@ -53,8 +51,9 @@ The next snippet loads all files into Document objects.
 # Load all wiki documents
 city_docs = {}
 for wiki_title in wiki_titles:
-    city_docs[wiki_title] = SimpleDirectoryReader(input_files=[f"data/{wiki_title}.txt"]).load_data()
-
+    city_docs[wiki_title] = SimpleDirectoryReader(
+        input_files=[f"data/{wiki_title}.txt"]
+    ).load_data()
 ```
 
 ### Defining the Set of Indexes
@@ -70,9 +69,7 @@ from llama_index.llms import OpenAI
 
 # set service context
 llm_gpt4 = OpenAI(temperature=0, model="gpt-4")
-service_context = ServiceContext.from_defaults(
-    llm=llm_gpt4, chunk_size=1024
-)
+service_context = ServiceContext.from_defaults(llm=llm_gpt4, chunk_size=1024)
 
 # Build city document index
 vector_indices = {}
@@ -87,15 +84,18 @@ for wiki_title in wiki_titles:
     # set id for vector index
     vector_indices[wiki_title].index_struct.index_id = wiki_title
     # persist to disk
-    storage_context.persist(persist_dir=f'./storage/{wiki_title}')
+    storage_context.persist(persist_dir=f"./storage/{wiki_title}")
 ```
 
 Querying a vector index lets us easily perform semantic search over a given city's documents.
 
 ```python
-response = vector_indices["Toronto"].as_query_engine().query("What are the sports teams in Toronto?")
+response = (
+    vector_indices["Toronto"]
+    .as_query_engine()
+    .query("What are the sports teams in Toronto?")
+)
 print(str(response))
-
 ```
 
 Example response:
@@ -131,7 +131,7 @@ graph = ComposableGraph.from_indices(
     SimpleKeywordTableIndex,
     [index for _, index in vector_indices.items()],
     [summary for _, summary in index_summaries.items()],
-    max_keywords_per_chunk=50
+    max_keywords_per_chunk=50,
 )
 
 # get root index
@@ -142,7 +142,6 @@ root_summary = (
     "This index contains Wikipedia articles about multiple cities. "
     "Use this index if you want to compare multiple cities. "
 )
-
 ```
 
 Querying this graph (with a query transform module), allows us to easily compare/contrast between different cities.
@@ -153,24 +152,23 @@ An example is shown below.
 from llama_index import LLMPredictor
 from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
 
-decompose_transform = DecomposeQueryTransform(
-    LLMPredictor(llm=llm_gpt4), verbose=True
-)
+decompose_transform = DecomposeQueryTransform(LLMPredictor(llm=llm_gpt4), verbose=True)
 
 # define custom query engines
 from llama_index.query_engine.transform_query_engine import TransformQueryEngine
+
 custom_query_engines = {}
 for index in vector_indices.values():
     query_engine = index.as_query_engine(service_context=service_context)
     query_engine = TransformQueryEngine(
         query_engine,
         query_transform=decompose_transform,
-        transform_extra_info={'index_summary': index.index_struct.summary},
+        transform_extra_info={"index_summary": index.index_struct.summary},
     )
     custom_query_engines[index.index_id] = query_engine
 custom_query_engines[graph.root_id] = graph.root_index.as_query_engine(
-    retriever_mode='simple',
-    response_mode='tree_summarize',
+    retriever_mode="simple",
+    response_mode="tree_summarize",
     service_context=service_context,
 )
 
@@ -178,9 +176,7 @@ custom_query_engines[graph.root_id] = graph.root_index.as_query_engine(
 query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)
 
 # query the graph
-query_str = (
-    "Compare and contrast the arts and culture of Houston and Boston. "
-)
+query_str = "Compare and contrast the arts and culture of Houston and Boston. "
 response_chatgpt = query_engine.query(query_str)
 ```
 
@@ -222,7 +218,9 @@ graph_description = (
     "This tool contains Wikipedia articles about multiple cities. "
     "Use this tool if you want to compare multiple cities. "
 )
-graph_tool = QueryEngineTool.from_defaults(graph_query_engine, description=graph_description)
+graph_tool = QueryEngineTool.from_defaults(
+    graph_query_engine, description=graph_description
+)
 query_engine_tools.append(graph_tool)
 ```
 
@@ -236,7 +234,7 @@ from llama_index.selectors.llm_selectors import LLMSingleSelector
 
 router_query_engine = RouterQueryEngine(
     selector=LLMSingleSelector.from_defaults(service_context=service_context),
-    query_engine_tools=query_engine_tools
+    query_engine_tools=query_engine_tools,
 )
 ```
 
@@ -255,16 +253,14 @@ Let's take a look at a few examples!
 response = router_query_engine.query(
     "Compare and contrast the arts and culture of Houston and Boston.",
 )
-print(str(response)
+print(str(response))
 ```
 
 **Asking Questions about specific Cities**
 
 ```python
-
 response = router_query_engine.query("What are the sports teams in Toronto?")
 print(str(response))
-
 ```
 
 This "outer" abstraction is able to handle different queries by routing to the right underlying abstractions.
diff --git a/docs/understanding/putting_it_all_together/structured_data.md b/docs/understanding/putting_it_all_together/structured_data.md
index 374f62b179fa3146126a37b4d3563afdab7f3156..0e38a4b78ca0cfb70e2ba6e59963533c1797290e 100644
--- a/docs/understanding/putting_it_all_together/structured_data.md
+++ b/docs/understanding/putting_it_all_together/structured_data.md
@@ -20,7 +20,16 @@ A notebook for this tutorial is [available here](../../examples/index_structs/st
 First, we use SQLAlchemy to setup a simple sqlite db:
 
 ```python
-from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, select, column
+from sqlalchemy import (
+    create_engine,
+    MetaData,
+    Table,
+    Column,
+    String,
+    Integer,
+    select,
+    column,
+)
 
 engine = create_engine("sqlite:///:memory:")
 metadata_obj = MetaData()
@@ -49,6 +58,7 @@ to directly populate this table:
 
 ```python
 from sqlalchemy import insert
+
 rows = [
     {"city_name": "Toronto", "population": 2731571, "country": "Canada"},
     {"city_name": "Tokyo", "population": 13929286, "country": "Japan"},
@@ -85,9 +95,7 @@ query_engine = NLSQLTableQueryEngine(
     sql_database=sql_database,
     tables=["city_stats"],
 )
-query_str = (
-    "Which city has the highest population?"
-)
+query_str = "Which city has the highest population?"
 response = query_engine.query(query_str)
 ```
 
@@ -109,7 +117,10 @@ into the ObjectIndex constructor.
 from llama_index.objects import SQLTableNodeMapping, ObjectIndex, SQLTableSchema
 
 table_node_mapping = SQLTableNodeMapping(sql_database)
-table_schema_objs = [(SQLTableSchema(table_name="city_stats")), ...] # one SQLTableSchema for each table
+table_schema_objs = [
+    (SQLTableSchema(table_name="city_stats")),
+    ...,
+]  # one SQLTableSchema for each table
 
 obj_index = ObjectIndex.from_objects(
     table_schema_objs,
@@ -133,7 +144,9 @@ city_stats_text = (
 )
 
 table_node_mapping = SQLTableNodeMapping(sql_database)
-table_schema_objs = [(SQLTableSchema(table_name="city_stats", context_str=city_stats_text))]
+table_schema_objs = [
+    (SQLTableSchema(table_name="city_stats", context_str=city_stats_text))
+]
 ```
 
 ## Using natural language SQL queries
diff --git a/docs/understanding/querying/querying.md b/docs/understanding/querying/querying.md
index d60aef1a1a66a43f8ea3e6f77dd412d39787ee87..70ff0a2605fb5c28535bf293f24780dd7f14c6ac 100644
--- a/docs/understanding/querying/querying.md
+++ b/docs/understanding/querying/querying.md
@@ -10,7 +10,9 @@ The basis of all querying is the `QueryEngine`. The simplest way to get a QueryE
 
 ```python
 query_engine = index.as_query_engine()
-response = query_engine.query("Write an email to the user given their background information.")
+response = query_engine.query(
+    "Write an email to the user given their background information."
+)
 print(response)
 ```
 
@@ -57,10 +59,7 @@ response_synthesizer = get_response_synthesizer()
 query_engine = RetrieverQueryEngine(
     retriever=retriever,
     response_synthesizer=response_synthesizer,
-    node_postprocessors=[
-        SimilarityPostprocessor(similarity_cutoff=0.7)
-    ]
-
+    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
 )
 
 # query
@@ -103,8 +102,7 @@ To configure the desired node postprocessors:
 ```python
 node_postprocessors = [
     KeywordNodePostprocessor(
-        required_keywords=["Combinator"],
-        exclude_keywords=["Italy"]
+        required_keywords=["Combinator"], exclude_keywords=["Italy"]
     )
 ]
 query_engine = RetrieverQueryEngine.from_args(
@@ -120,7 +118,7 @@ After a retriever fetches relevant nodes, a `BaseSynthesizer` synthesizes the fi
 You can configure it via
 
 ```python
-query_engine = RetrieverQueryEngine.from_args(retriever, response_mode=<response_mode>)
+query_engine = RetrieverQueryEngine.from_args(retriever, response_mode=response_mode)
 ```
 
 Right now, we support the following options:
diff --git a/docs/understanding/storing/storing.md b/docs/understanding/storing/storing.md
index 77f530895fafe1db8c1e8636b4a7ce0636aae10e..69b870742c2f3e32204bb3cdded3bd30237e387c 100644
--- a/docs/understanding/storing/storing.md
+++ b/docs/understanding/storing/storing.md
@@ -42,7 +42,7 @@ LlamaIndex supports a [huge number of vector stores](/module_guides/storing/vect
 
 First you will need to install chroma:
 
-```python
+```
 pip install chromadb
 ```
 
@@ -75,10 +75,7 @@ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
 
 # create your index
-index = VectorStoreIndex.from_documents(
-    documents,
-    storage_context=storage_context
-)
+index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 
 # create a query engine and query
 query_engine = index.as_query_engine()
@@ -106,8 +103,7 @@ storage_context = StorageContext.from_defaults(vector_store=vector_store)
 
 # load your index from stored vectors
 index = VectorStoreIndex.from_vector_store(
-    vector_store,
-    storage_context=storage_context
+    vector_store, storage_context=storage_context
 )
 
 # create a query engine
diff --git a/docs/understanding/using_llms/using_llms.md b/docs/understanding/using_llms/using_llms.md
index 5cb25d4e801e1d5f1b00e978e06040b4df6e94f2..a6d4343f631c1a300dc115ab9a550fb6ca34f5a9 100644
--- a/docs/understanding/using_llms/using_llms.md
+++ b/docs/understanding/using_llms/using_llms.md
@@ -18,7 +18,7 @@ LlamaIndex provides a single interface to a large number of different LLMs, allo
 ```python
 from llama_index.llms import OpenAI
 
-response = OpenAI().complete('Paul Graham is ')
+response = OpenAI().complete("Paul Graham is ")
 print(response)
 ```
 
@@ -31,8 +31,8 @@ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
 llm = OpenAI(temperature=0.1, model="gpt-4")
 service_context = ServiceContext.from_defaults(llm=llm)
 
-documents = SimpleDirectoryReader('data').load_data()
-index = VectorStoreIndex.from_documents(documents,service_context=service_context)
+documents = SimpleDirectoryReader("data").load_data()
+index = VectorStoreIndex.from_documents(documents, service_context=service_context)
 ```
 
 In this case, you've instantiated OpenAI and customized it to use the `gpt-4` model instead of the default `gpt-3.5-turbo`, and also modified the `temperature`. The `VectorStoreIndex` will now use gpt-4 to encode or `embed` your documents for later querying.
@@ -53,6 +53,7 @@ Once you have a local LLM such as Llama 2 installed, you can use it like this:
 
 ```python
 from llama_index import ServiceContext
+
 service_context = ServiceContext.from_defaults(llm="local")
 ```
 
diff --git a/llama_index/embeddings/elasticsearch.py b/llama_index/embeddings/elasticsearch.py
index f4c1cdaaf87953380b23b353285c5a3e8075b0fd..522ed2740725f7cd8649369cd4c6683e20aedd3a 100644
--- a/llama_index/embeddings/elasticsearch.py
+++ b/llama_index/embeddings/elasticsearch.py
@@ -74,9 +74,7 @@ class ElasticsearchEmbedding(BaseEmbedding):
                 input_field = "your_input_field"
 
                 # Create Elasticsearch connection
-                es_connection = Elasticsearch(
-                    hosts=["localhost:9200"], basic_auth=("user", "password")
-                )
+                es_connection = Elasticsearch(hosts=["localhost:9200"], basic_auth=("user", "password"))
 
                 # Instantiate ElasticsearchEmbedding using the existing connection
                 embeddings = ElasticsearchEmbedding.from_es_connection(
diff --git a/llama_index/indices/keyword_table/README.md b/llama_index/indices/keyword_table/README.md
index bbf4150d72493d81be7794bee7e494e58290f2aa..5d22b808dbb7c16841e02aed0492ebd95607dbc2 100644
--- a/llama_index/indices/keyword_table/README.md
+++ b/llama_index/indices/keyword_table/README.md
@@ -28,7 +28,7 @@ Use the popular RAKE keyword extractor.
 from llama_index import KeywordTableIndex, SimpleDirectoryReader
 
 # build index
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 index = KeywordTableIndex.from_documents(documents)
 # query
 query_engine = index.as_query_engine()
diff --git a/llama_index/indices/list/README.md b/llama_index/indices/list/README.md
index 81eca24700dd4ab0a369d5f03a56339d2e463e71..f75159166f056153fca06268982c67dd6e492688 100644
--- a/llama_index/indices/list/README.md
+++ b/llama_index/indices/list/README.md
@@ -14,10 +14,9 @@ During query-time, Summary Index constructs an answer using the _create and refi
 from llama_index import SummaryIndex, SimpleDirectoryReader
 
 # build index
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 index = SummaryIndex.from_documents(documents)
 # query
 query_engine = index.as_query_engine()
 response = query_engine.query("<question text>")
-
 ```
diff --git a/llama_index/indices/tree/README.md b/llama_index/indices/tree/README.md
index 3ff3954fb116d202f94e2534f8f2c9cd883ff340..4dca8f69875b6fd6ab12431c59e68ede4495d9bd 100644
--- a/llama_index/indices/tree/README.md
+++ b/llama_index/indices/tree/README.md
@@ -24,7 +24,7 @@ Simply use the root nodes as context to synthesize an answer to the query. This
 from llama_index import TreeIndex, SimpleDirectoryReader
 
 # build index
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 index = TreeIndex.from_documents(documents)
 # query
 query_engine = index.as_query_engine()
diff --git a/llama_index/readers/string_iterable.py b/llama_index/readers/string_iterable.py
index 435ac873071fdce3bbb974ec1bc8ea6adb2da347..222ebd295ca4c10cbc753c350e70c10f65f56500 100644
--- a/llama_index/readers/string_iterable.py
+++ b/llama_index/readers/string_iterable.py
@@ -16,7 +16,8 @@ class StringIterableReader(BasePydanticReader):
             from llama_index import StringIterableReader, TreeIndex
 
             documents = StringIterableReader().load_data(
-                texts=["I went to the store", "I bought an apple"])
+                texts=["I went to the store", "I bought an apple"]
+            )
             index = TreeIndex.from_documents(documents)
             query_engine = index.as_query_engine()
             query_engine.query("what did I buy?")
diff --git a/llama_index/storage/docstore/keyval_docstore.py b/llama_index/storage/docstore/keyval_docstore.py
index 9b3302287a82b75112cddfcb21d87fa05a071ff5..e27198f60953d9683499447270e30ce78c290fde 100644
--- a/llama_index/storage/docstore/keyval_docstore.py
+++ b/llama_index/storage/docstore/keyval_docstore.py
@@ -28,10 +28,7 @@ class KVDocumentStore(BaseDocumentStore):
 
         summary_index = SummaryIndex(nodes, storage_context=storage_context)
         vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
-        keyword_table_index = SimpleKeywordTableIndex(
-            nodes,
-            storage_context=storage_context
-        )
+        keyword_table_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)
 
     This will use the same docstore for multiple index structures.
 
diff --git a/llama_index/tools/tool_spec/load_and_search/README.md b/llama_index/tools/tool_spec/load_and_search/README.md
index 31a398b89a2c7b6762c279b390143c385d0d911f..094c8c9ed491e683b18ee92b0625e2d27cca198b 100644
--- a/llama_index/tools/tool_spec/load_and_search/README.md
+++ b/llama_index/tools/tool_spec/load_and_search/README.md
@@ -18,11 +18,10 @@ tool = wiki_spec.to_tool_list()[1]
 
 # Wrap the tool, splitting into a loader and a reader
 agent = OpenAIAgent.from_tools(
- LoadAndSearchToolSpec.from_defaults(
-    tool
- ).to_tool_list(), verbose=True)
+    LoadAndSearchToolSpec.from_defaults(tool).to_tool_list(), verbose=True
+)
 
-agent.chat('who is ben affleck married to')
+agent.chat("who is ben affleck married to")
 ```
 
 `load`: Calls the wrapped function and loads the data into an index