From 18b2ae6783e4cd64cd64d87fd643aaa8d816722a Mon Sep 17 00:00:00 2001
From: Aaron Jimenez <aaronjimv@gmail.com>
Date: Sun, 5 Nov 2023 12:44:35 -0800
Subject: [PATCH] =?UTF-8?q?Feature=20Request=20(#8262):=20adding=C2=A0blac?=
 =?UTF-8?q?ken-docs=C2=A0to=C2=A0pre-commit=20(#8682)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Adding blacken-docs to .pre-commit

* Applied blacken-docs to proyect files

* exclude added to blacken-docs

* test deleting <...> in variables

* fixing parse error

* Updated additional_dependencies:[black==23.10.1]

* Done additional blacken-docs Rewriting
---
 .pre-commit-config.yaml                       |   5 +
 README.md                                     |  10 +-
 docs/community/faq/llms.md                    |   4 +-
 .../community/integrations/chatgpt_plugins.md |  16 +-
 docs/community/integrations/deepeval.md       |   3 +-
 docs/community/integrations/graphsignal.md    |   4 +-
 docs/community/integrations/guidance.md       |  22 ++-
 .../integrations/lmformatenforcer.md          |  14 +-
 .../community/integrations/managed_indices.md |  11 +-
 docs/community/integrations/trulens.md        |   1 -
 .../integrations/using_with_langchain.md      |   3 +-
 docs/community/integrations/vector_stores.md  |  79 ++++------
 docs/getting_started/customization.rst        |  20 +--
 docs/getting_started/starter_example.md       |  15 +-
 .../agents/tools/llamahub_tools_guide.md      |  10 +-
 .../deploying/agents/tools/root.md            |   1 -
 .../deploying/agents/tools/usage_pattern.md   |   7 +-
 .../deploying/agents/usage_pattern.md         |  11 +-
 .../deploying/chat_engines/usage_pattern.md   |  22 ++-
 .../deploying/query_engine/streaming.md       |   7 +-
 .../deploying/query_engine/usage_pattern.md   |   6 +-
 .../module_guides/evaluating/usage_pattern.md |  27 ++--
 .../evaluating/usage_pattern_retrieval.md     |  13 +-
 docs/module_guides/indexing/composability.md  |  19 +--
 .../indexing/document_management.md           |  18 ++-
 .../indexing/metadata_extraction.md           |   1 +
 docs/module_guides/indexing/usage_pattern.md  |   4 +-
 docs/module_guides/loading/connector/root.md  |   2 +-
 .../loading/connector/usage_pattern.md        |   6 +-
 .../loading/documents_and_nodes/root.md       |   3 -
 .../documents_and_nodes/usage_documents.md    |  30 ++--
 .../usage_metadata_extractor.md               |   2 +-
 .../documents_and_nodes/usage_nodes.md        |   5 +-
 .../loading/node_parsers/root.md              |  46 +++---
 docs/module_guides/models/embeddings.md       |  56 +++----
 docs/module_guides/models/llms.md             |   2 +-
 .../module_guides/models/llms/usage_custom.md |  55 +++----
 .../models/llms/usage_standalone.md           |   7 +-
 .../models/prompts/usage_pattern.md           |  24 +--
 .../callbacks/token_counting_migration.md     |  21 ++-
 .../observability/observability.md            |   9 +-
 .../node_postprocessors.md                    |  51 +++---
 .../querying/node_postprocessors/root.md      |  19 +--
 docs/module_guides/querying/output_parser.md  |  40 +++--
 .../response_synthesizers.md                  |  27 ++--
 .../querying/response_synthesizers/root.md    |  10 +-
 docs/module_guides/querying/retriever/root.md |   7 +-
 docs/module_guides/querying/router/root.md    |   8 +-
 docs/module_guides/storing/customization.md   |  18 +--
 docs/module_guides/storing/docstores.md       |   8 +-
 docs/module_guides/storing/index_stores.md    |   6 +-
 docs/module_guides/storing/save_load.md       |  39 +++--
 docs/module_guides/storing/storing.md         |   6 +-
 .../supporting_modules/service_context.md     |  20 ++-
 .../query_transformations.md                  |  31 ++--
 .../structured_outputs/query_engine.md        |  11 +-
 .../basic_strategies/basic_strategies.md      |   6 +-
 .../evaluating/cost_analysis/usage_pattern.md |   4 +-
 docs/understanding/loading/llamahub.md        |   4 +-
 docs/understanding/loading/loading.md         |  24 ++-
 .../apps/fullstack_app_guide.md               |  78 ++++++----
 .../apps/fullstack_with_delphic.md            |  55 ++++---
 .../chatbots/building_a_chatbot.md            |   4 +-
 .../putting_it_all_together/q_and_a.md        |  43 ++---
 .../q_and_a/terms_definitions_tutorial.md     | 147 ++++++++++++------
 .../q_and_a/unified_query.md                  |  70 ++++-----
 .../structured_data.md                        |  25 ++-
 docs/understanding/querying/querying.md       |  14 +-
 docs/understanding/storing/storing.md         |  10 +-
 docs/understanding/using_llms/using_llms.md   |   7 +-
 llama_index/embeddings/elasticsearch.py       |   4 +-
 llama_index/indices/keyword_table/README.md   |   2 +-
 llama_index/indices/list/README.md            |   3 +-
 llama_index/indices/tree/README.md            |   2 +-
 llama_index/readers/string_iterable.py        |   3 +-
 .../storage/docstore/keyval_docstore.py       |   5 +-
 .../tools/tool_spec/load_and_search/README.md |   7 +-
 77 files changed, 743 insertions(+), 696 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a1398b5c90..531933cf10 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -53,3 +53,8 @@ repos:
     hooks:
       - id: toml-sort-fix
         exclude: poetry.lock
+  - repo: https://github.com/adamchainz/blacken-docs
+    rev: "1.16.0"
+    hooks:
+      - id: blacken-docs
+        additional_dependencies: [black==23.10.1]
diff --git a/README.md b/README.md
index 7be75112df..a42ec47e76 100644
--- a/README.md
+++ b/README.md
@@ -70,9 +70,11 @@ To build a simple vector store index using OpenAI:
 
 ```python
 import os
+
 os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"
 
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
+
 documents = SimpleDirectoryReader("YOUR_DATA_DIRECTORY").load_data()
 index = VectorStoreIndex.from_documents(documents)
 ```
@@ -81,22 +83,26 @@ To build a simple vector store index using non-OpenAI LLMs, e.g. Llama 2 hosted
 
 ```python
 import os
+
 os.environ["REPLICATE_API_TOKEN"] = "YOUR_REPLICATE_API_TOKEN"
 
 from llama_index.llms import Replicate
+
 llama2_7b_chat = "meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e"
 llm = Replicate(
     model=llama2_7b_chat,
     temperature=0.01,
-    additional_kwargs={"top_p": 1, "max_new_tokens":300}
+    additional_kwargs={"top_p": 1, "max_new_tokens": 300},
 )
 
 from llama_index.embeddings import HuggingFaceEmbedding
 from llama_index import ServiceContext
+
 embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
 
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
+
 documents = SimpleDirectoryReader("YOUR_DATA_DIRECTORY").load_data()
 index = VectorStoreIndex.from_documents(documents, service_context=service_context)
 ```
@@ -121,7 +127,7 @@ To reload from disk:
 from llama_index import StorageContext, load_index_from_storage
 
 # rebuild storage context
-storage_context = StorageContext.from_defaults(persist_dir='./storage')
+storage_context = StorageContext.from_defaults(persist_dir="./storage")
 # load index
 index = load_index_from_storage(storage_context)
 ```
diff --git a/docs/community/faq/llms.md b/docs/community/faq/llms.md
index 50810a8f7a..58aff0e0ba 100644
--- a/docs/community/faq/llms.md
+++ b/docs/community/faq/llms.md
@@ -53,7 +53,9 @@ llm_predictor = LLMPredictor(system_prompt="Always respond in Italian.")
 
 service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
 
-query_engine = load_index_from_storage(storage_context, service_context=service_context).as_query_engine()
+query_engine = load_index_from_storage(
+    storage_context, service_context=service_context
+).as_query_engine()
 ```
 
 ---
diff --git a/docs/community/integrations/chatgpt_plugins.md b/docs/community/integrations/chatgpt_plugins.md
index db410611cc..456a77d1ae 100644
--- a/docs/community/integrations/chatgpt_plugins.md
+++ b/docs/community/integrations/chatgpt_plugins.md
@@ -33,6 +33,7 @@ loader = SimpleWebPageReader(html_to_text=True)
 url = "http://www.paulgraham.com/worked.html"
 documents = loader.load_data(urls=[url])
 
+
 # Convert LlamaIndex Documents to JSON format
 def dump_docs_to_json(documents: List[Document], out_path: str) -> Dict:
     """Convert LlamaIndex Documents to JSON format and save it."""
@@ -51,8 +52,7 @@ def dump_docs_to_json(documents: List[Document], out_path: str) -> Dict:
         }
         result_json.append(cur_dict)
 
-    json.dump(result_json, open(out_path, 'w'))
-
+    json.dump(result_json, open(out_path, "w"))
 ```
 
 For more details, check out the [full example notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/chatgpt_plugin/ChatGPT_Retrieval_Plugin_Upload.ipynb).
@@ -72,22 +72,19 @@ import os
 # load documents
 bearer_token = os.getenv("BEARER_TOKEN")
 reader = ChatGPTRetrievalPluginReader(
-    endpoint_url="http://localhost:8000",
-    bearer_token=bearer_token
+    endpoint_url="http://localhost:8000", bearer_token=bearer_token
 )
 documents = reader.load_data("What did the author do growing up?")
 
 # build and query index
 from llama_index import SummaryIndex
+
 index = SummaryIndex.from_documents(documents)
 # set Logging to DEBUG for more detailed outputs
-query_engine = vector_index.as_query_engine(
-    response_mode="compact"
-)
+query_engine = vector_index.as_query_engine(response_mode="compact")
 response = query_engine.query(
     "Summarize the retrieved content and describe what the author did growing up",
 )
-
 ```
 
 For more details, check out the [full example notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/chatgpt_plugin/ChatGPTRetrievalPluginReaderDemo.ipynb).
@@ -107,7 +104,7 @@ from llama_index import SimpleDirectoryReader
 import os
 
 # load documents
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 
 # build index
 bearer_token = os.getenv("BEARER_TOKEN")
@@ -124,7 +121,6 @@ query_engine = vector_index.as_query_engine(
     response_mode="compact",
 )
 response = query_engine.query("What did the author do growing up?")
-
 ```
 
 For more details, check out the [full example notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/chatgpt_plugin/ChatGPTRetrievalPluginIndexDemo.ipynb).
diff --git a/docs/community/integrations/deepeval.md b/docs/community/integrations/deepeval.md
index 6d03aa6008..d927efd6c4 100644
--- a/docs/community/integrations/deepeval.md
+++ b/docs/community/integrations/deepeval.md
@@ -49,7 +49,6 @@ You can more about the [DeepEval Framework](https://docs.confident-ai.com/docs/f
 DeepEval integrates nicely with LlamaIndex's `BaseEvaluator` class. Below is an example of the factual consistency documentation.
 
 ```python
-
 from llama_index.response.schema import Response
 from typing import List
 from llama_index.schema import Document
@@ -84,7 +83,7 @@ from llama_index import download_loader
 WikipediaReader = download_loader("WikipediaReader")
 
 loader = WikipediaReader()
-documents = loader.load_data(pages=['Tokyo'])
+documents = loader.load_data(pages=["Tokyo"])
 tree_index = TreeIndex.from_documents(documents=documents)
 vector_index = VectorStoreIndex.from_documents(
     documents, service_context=service_context_gpt4
diff --git a/docs/community/integrations/graphsignal.md b/docs/community/integrations/graphsignal.md
index 69745a987a..d6ad8ddd2a 100644
--- a/docs/community/integrations/graphsignal.md
+++ b/docs/community/integrations/graphsignal.md
@@ -18,7 +18,7 @@ pip install graphsignal
 import graphsignal
 
 # Provide an API key directly or via GRAPHSIGNAL_API_KEY environment variable
-graphsignal.configure(api_key='my-api-key', deployment='my-llama-index-app-prod')
+graphsignal.configure(api_key="my-api-key", deployment="my-llama-index-app-prod")
 ```
 
 You can get an API key [here](https://app.graphsignal.com/).
@@ -30,7 +30,7 @@ See the [Quick Start guide](https://graphsignal.com/docs/guides/quick-start/), [
 To additionally trace any function or code, you can use a decorator or a context manager:
 
 ```python
-with graphsignal.start_trace('load-external-data'):
+with graphsignal.start_trace("load-external-data"):
     reader.load_data()
 ```
 
diff --git a/docs/community/integrations/guidance.md b/docs/community/integrations/guidance.md
index 4b6f5e16b5..4004c8a56d 100644
--- a/docs/community/integrations/guidance.md
+++ b/docs/community/integrations/guidance.md
@@ -21,6 +21,7 @@ class Song(BaseModel):
     title: str
     length_seconds: int
 
+
 class Album(BaseModel):
     name: str
     artist: str
@@ -38,23 +39,30 @@ and supplying a suitable prompt template.
 program = GuidancePydanticProgram(
     output_cls=Album,
     prompt_template_str="Generate an example album, with an artist and a list of songs. Using the movie {{movie_name}} as inspiration",
-    guidance_llm=OpenAI('text-davinci-003'),
+    guidance_llm=OpenAI("text-davinci-003"),
     verbose=True,
 )
-
 ```
 
 Now we can run the program by calling it with additional user input.
 Here let's go for something spooky and create an album inspired by the Shining.
 
 ```python
-output = program(movie_name='The Shining')
+output = program(movie_name="The Shining")
 ```
 
 We have our pydantic object:
 
 ```python
-Album(name='The Shining', artist='Jack Torrance', songs=[Song(title='All Work and No Play', length_seconds=180), Song(title='The Overlook Hotel', length_seconds=240), Song(title='The Shining', length_seconds=210)])
+Album(
+    name="The Shining",
+    artist="Jack Torrance",
+    songs=[
+        Song(title="All Work and No Play", length_seconds=180),
+        Song(title="The Overlook Hotel", length_seconds=240),
+        Song(title="The Shining", length_seconds=210),
+    ],
+)
 ```
 
 You can play with [this notebook](/examples/output_parsing/guidance_pydantic_program.ipynb) for more details.
@@ -73,14 +81,16 @@ from llama_index.question_gen.guidance_generator import GuidanceQuestionGenerato
 from guidance.llms import OpenAI as GuidanceOpenAI
 
 # define guidance based question generator
-question_gen = GuidanceQuestionGenerator.from_defaults(guidance_llm=GuidanceOpenAI('text-davinci-003'), verbose=False)
+question_gen = GuidanceQuestionGenerator.from_defaults(
+    guidance_llm=GuidanceOpenAI("text-davinci-003"), verbose=False
+)
 
 # define query engine tools
 query_engine_tools = ...
 
 # construct sub-question query engine
 s_engine = SubQuestionQueryEngine.from_defaults(
-    question_gen=question_gen  # use guidance based question_gen defined above
+    question_gen=question_gen,  # use guidance based question_gen defined above
     query_engine_tools=query_engine_tools,
 )
 ```
diff --git a/docs/community/integrations/lmformatenforcer.md b/docs/community/integrations/lmformatenforcer.md
index a4d344a12e..24306b1b05 100644
--- a/docs/community/integrations/lmformatenforcer.md
+++ b/docs/community/integrations/lmformatenforcer.md
@@ -17,6 +17,7 @@ class Song(BaseModel):
     title: str
     length_seconds: int
 
+
 class Album(BaseModel):
     name: str
     artist: str
@@ -35,20 +36,27 @@ program = LMFormatEnforcerPydanticProgram(
     llm=LlamaCPP(),
     verbose=True,
 )
-
 ```
 
 Now we can run the program by calling it with additional user input.
 Here let's go for something spooky and create an album inspired by the Shining.
 
 ```python
-output = program(movie_name='The Shining')
+output = program(movie_name="The Shining")
 ```
 
 We have our pydantic object:
 
 ```python
-Album(name='The Shining: A Musical Journey Through the Haunted Halls of the Overlook Hotel', artist='The Shining Choir', songs=[Song(title='Redrum', length_seconds=300), Song(title='All Work and No Play Makes Jack a Dull Boy', length_seconds=240), Song(title="Heeeeere's Johnny!", length_seconds=180)])
+Album(
+    name="The Shining: A Musical Journey Through the Haunted Halls of the Overlook Hotel",
+    artist="The Shining Choir",
+    songs=[
+        Song(title="Redrum", length_seconds=300),
+        Song(title="All Work and No Play Makes Jack a Dull Boy", length_seconds=240),
+        Song(title="Heeeeere's Johnny!", length_seconds=180),
+    ],
+)
 ```
 
 You can play with [this notebook](/examples/output_parsing/lmformatenforcer_pydantic_program.ipynb) for more details.
diff --git a/docs/community/integrations/managed_indices.md b/docs/community/integrations/managed_indices.md
index 83bc5cd409..c8769da348 100644
--- a/docs/community/integrations/managed_indices.md
+++ b/docs/community/integrations/managed_indices.md
@@ -27,8 +27,13 @@ from llama_index.managed import VectaraIndex
 vectara_customer_id = os.environ.get("VECTARA_CUSTOMER_ID")
 vectara_corpus_id = os.environ.get("VECTARA_CORPUS_ID")
 vectara_api_key = os.environ.get("VECTARA_API_KEY")
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
-index = VectaraIndex.from_documents(documents, vectara_customer_id=vectara_customer_id, vectara_corpus_id=vectara_corpus_id, vectara_api_key=vectara_api_key)
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
+index = VectaraIndex.from_documents(
+    documents,
+    vectara_customer_id=vectara_customer_id,
+    vectara_corpus_id=vectara_corpus_id,
+    vectara_api_key=vectara_api_key,
+)
 
 # Query index
 query_engine = index.as_query_engine()
@@ -42,7 +47,7 @@ from llama_index import ManagedIndex, SimpleDirectoryReade
 from llama_index.managed import VectaraIndex
 
 # Load documents and build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectaraIndex.from_documents(documents)
 
 # Query index
diff --git a/docs/community/integrations/trulens.md b/docs/community/integrations/trulens.md
index 07c435727f..037320cea1 100644
--- a/docs/community/integrations/trulens.md
+++ b/docs/community/integrations/trulens.md
@@ -20,7 +20,6 @@ pip install trulens-eval
 
 ```python
 from trulens_eval import TruLlama
-
 ```
 
 ## Try it out!
diff --git a/docs/community/integrations/using_with_langchain.md b/docs/community/integrations/using_with_langchain.md
index 93957e34e0..11ce40d7f6 100644
--- a/docs/community/integrations/using_with_langchain.md
+++ b/docs/community/integrations/using_with_langchain.md
@@ -27,11 +27,10 @@ tool_config = IndexToolConfig(
     query_engine=query_engine,
     name=f"Vector Index",
     description=f"useful for when you want to answer queries about X",
-    tool_kwargs={"return_direct": True}
+    tool_kwargs={"return_direct": True},
 )
 
 tool = LlamaIndexTool.from_tool_config(tool_config)
-
 ```
 
 ### Llama Demo Notebook: Tool + Memory module
diff --git a/docs/community/integrations/vector_stores.md b/docs/community/integrations/vector_stores.md
index 77e5462aae..47df4538e8 100644
--- a/docs/community/integrations/vector_stores.md
+++ b/docs/community/integrations/vector_stores.md
@@ -51,13 +51,12 @@ that's initialized as part of the default storage context.
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
 # Load documents and build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 
 # Query index
 query_engine = index.as_query_engine()
 response = query_engine.query("What did the author do growing up?")
-
 ```
 
 **Custom Vector Store Index Construction/Querying**
@@ -70,11 +69,11 @@ from llama_index.vector_stores import DeepLakeVectorStore
 
 # construct vector store and customize storage context
 storage_context = StorageContext.from_defaults(
-    vector_store = DeepLakeVectorStore(dataset_path="<dataset_path>")
+    vector_store=DeepLakeVectorStore(dataset_path="<dataset_path>")
 )
 
 # Load documents and build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 
 # Query index
@@ -103,6 +102,7 @@ Then connect and use Elasticsearch as a vector database with LlamaIndex
 
 ```python
 from llama_index.vector_stores import ElasticsearchStore
+
 vector_store = ElasticsearchStore(
     index_name="llm-project",
     es_url="http://localhost:9200",
@@ -127,10 +127,9 @@ Then connect and use Redis as a vector database with LlamaIndex
 
 ```python
 from llama_index.vector_stores import RedisVectorStore
+
 vector_store = RedisVectorStore(
-    index_name="llm-project",
-    redis_url="redis://localhost:6379",
-    overwrite=True
+    index_name="llm-project", redis_url="redis://localhost:6379", overwrite=True
 )
 ```
 
@@ -196,14 +195,13 @@ vector_store = WeaviateVectorStore(weaviate_client=client)
 Zep stores texts, metadata, and embeddings. All are returned in search results.
 
 ```python
-
 from llama_index.vector_stores.zep import ZepVectorStore
 
 vector_store = ZepVectorStore(
     api_url="<api_url>",
     api_key="<api_key>",
     collection_name="<unique_collection_name>",  # Can either be an existing collection or a new one
-    embedding_dimensions=1536 # Optional, required if creating a new collection
+    embedding_dimensions=1536,  # Optional, required if creating a new collection
 )
 
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
@@ -225,12 +223,7 @@ from llama_index.vector_stores import PineconeVectorStore
 # Creating a Pinecone index
 api_key = "api_key"
 pinecone.init(api_key=api_key, environment="us-west1-gcp")
-pinecone.create_index(
-    "quickstart",
-    dimension=1536,
-    metric="euclidean",
-    pod_type="p1"
-)
+pinecone.create_index("quickstart", dimension=1536, metric="euclidean", pod_type="p1")
 index = pinecone.Index("quickstart")
 
 # can define filters specific to this vector index (so you can
@@ -239,8 +232,7 @@ metadata_filters = {"title": "paul_graham_essay"}
 
 # construct vector store
 vector_store = PineconeVectorStore(
-    pinecone_index=index,
-    metadata_filters=metadata_filters
+    pinecone_index=index, metadata_filters=metadata_filters
 )
 ```
 
@@ -252,9 +244,7 @@ from llama_index.vector_stores import QdrantVectorStore
 
 # Creating a Qdrant vector store
 client = qdrant_client.QdrantClient(
-    host="<qdrant-host>",
-    api_key="<qdrant-api-key>",
-    https=True
+    host="<qdrant-host>", api_key="<qdrant-api-key>", https=True
 )
 collection_name = "paul_graham"
 
@@ -276,6 +266,7 @@ cassio.init(database_id="1234abcd-...", token="AstraCS:...")
 
 # For a Cassandra cluster:
 from cassandra.cluster import Cluster
+
 cluster = Cluster(["127.0.0.1"])
 cassio.init(session=cluster.connect(), keyspace="my_keyspace")
 
@@ -328,11 +319,7 @@ import pymilvus
 from llama_index.vector_stores import MilvusVectorStore
 
 # construct vector store
-vector_store = MilvusVectorStore(
-    uri='https://localhost:19530',
-    overwrite='True'
-)
-
+vector_store = MilvusVectorStore(uri="https://localhost:19530", overwrite="True")
 ```
 
 **Note**: `MilvusVectorStore` depends on the `pymilvus` library.
@@ -352,9 +339,7 @@ from llama_index.vector_stores import MilvusVectorStore
 
 # construct vector store
 vector_store = MilvusVectorStore(
-    uri='foo.vectordb.zillizcloud.com',
-    token="your_token_here",
-    overwrite='True'
+    uri="foo.vectordb.zillizcloud.com", token="your_token_here", overwrite="True"
 )
 ```
 
@@ -372,17 +357,15 @@ from llama_index.vector_stores import MyScaleVectorStore
 
 # Creating a MyScale client
 client = clickhouse_connect.get_client(
-    host='YOUR_CLUSTER_HOST',
+    host="YOUR_CLUSTER_HOST",
     port=8443,
-    username='YOUR_USERNAME',
-    password='YOUR_CLUSTER_PASSWORD'
+    username="YOUR_USERNAME",
+    password="YOUR_CLUSTER_PASSWORD",
 )
 
 
 # construct vector store
-vector_store = MyScaleVectorStore(
-    myscale_client=client
-)
+vector_store = MyScaleVectorStore(myscale_client=client)
 ```
 
 **Timescale**
@@ -391,7 +374,7 @@ vector_store = MyScaleVectorStore(
 from llama_index.vector_stores import TimescaleVectorStore
 
 vector_store = TimescaleVectorStore.from_params(
-    service_url='YOUR TIMESCALE SERVICE URL',
+    service_url="YOUR TIMESCALE SERVICE URL",
     table_name="paul_graham_essay",
 )
 ```
@@ -423,7 +406,7 @@ from llama_index.vector_stores import (
 )
 
 # construct vector store
-vector_store = DocArrayHnswVectorStore(work_dir='hnsw_index')
+vector_store = DocArrayHnswVectorStore(work_dir="hnsw_index")
 
 # alternatively, construct the in-memory vector store
 vector_store = DocArrayInMemoryVectorStore()
@@ -464,9 +447,8 @@ neo4j_vector = Neo4jVectorStore(
     username="neo4j",
     password="pleaseletmein",
     url="bolt://localhost:7687",
-    embed_dim=1536
+    embed_dim=1536,
 )
-
 ```
 
 **Azure Cognitive Search**
@@ -504,7 +486,7 @@ import dashvector
 from llama_index.vector_stores import DashVectorStore
 
 # init dashvector client
-client = dashvector.Client(api_key='your-dashvector-api-key')
+client = dashvector.Client(api_key="your-dashvector-api-key")
 
 # creating a DashVector collection
 client.create("quickstart", dimension=1536)
@@ -523,7 +505,6 @@ LlamaIndex supports loading data from a huge number of sources. See [Data Connec
 Chroma stores both documents and vectors. This is an example of how to use Chroma:
 
 ```python
-
 from llama_index.readers.chroma import ChromaReader
 from llama_index.indices import SummaryIndex
 
@@ -531,10 +512,10 @@ from llama_index.indices import SummaryIndex
 # This requires a collection name and a persist directory.
 reader = ChromaReader(
     collection_name="chroma_collection",
-    persist_directory="examples/data_connectors/chroma_collection"
+    persist_directory="examples/data_connectors/chroma_collection",
 )
 
-query_vector=[n1, n2, n3, ...]
+query_vector = [n1, n2, n3, ...]
 
 documents = reader.load_data(collection_name="demo", query_vector=query_vector, limit=5)
 index = SummaryIndex.from_documents(documents)
@@ -547,7 +528,6 @@ display(Markdown(f"<b>{response}</b>"))
 Qdrant also stores both documents and vectors. This is an example of how to use Qdrant:
 
 ```python
-
 from llama_index.readers.qdrant import QdrantReader
 
 reader = QdrantReader(host="localhost")
@@ -563,7 +543,6 @@ query_vector = [n1, n2, n3, ...]
 # for more details
 
 documents = reader.load_data(collection_name="demo", query_vector=query_vector, limit=5)
-
 ```
 
 NOTE: Since Weaviate can store a hybrid of document and vector objects, the user may either choose to explicitly specify `class_name` and `properties` in order to query documents, or they may choose to specify a raw GraphQL query. See below for usage.
@@ -575,7 +554,7 @@ NOTE: Since Weaviate can store a hybrid of document and vector objects, the user
 documents = reader.load_data(
     class_name="<class_name>",
     properties=["property1", "property2", "..."],
-    separate_documents=True
+    separate_documents=True,
 )
 
 # 2) example GraphQL query
@@ -598,7 +577,6 @@ NOTE: Both Pinecone and Faiss data loaders assume that the respective data sourc
 For instance, this is an example usage of the Pinecone data loader `PineconeReader`:
 
 ```python
-
 from llama_index.readers.pinecone import PineconeReader
 
 reader = PineconeReader(api_key=api_key, environment="us-west1-gcp")
@@ -608,12 +586,15 @@ id_to_text_map = {
     "id2": "text blob 2",
 }
 
-query_vector=[n1, n2, n3, ..]
+query_vector = [n1, n2, n3, ...]
 
 documents = reader.load_data(
-    index_name="quickstart", id_to_text_map=id_to_text_map, top_k=3, vector=query_vector, separate_documents=True
+    index_name="quickstart",
+    id_to_text_map=id_to_text_map,
+    top_k=3,
+    vector=query_vector,
+    separate_documents=True,
 )
-
 ```
 
 [Example notebooks can be found here](https://github.com/jerryjliu/llama_index/tree/main/docs/examples/data_connectors).
diff --git a/docs/getting_started/customization.rst b/docs/getting_started/customization.rst
index b0f7ea8723..f0a152d2b1 100644
--- a/docs/getting_started/customization.rst
+++ b/docs/getting_started/customization.rst
@@ -9,7 +9,7 @@ In this tutorial, we start with the code you wrote for the `starter example <sta
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     query_engine = index.as_query_engine()
     response = query_engine.query("What did the author do growing up?")
@@ -22,6 +22,7 @@ In this tutorial, we start with the code you wrote for the `starter example <sta
 .. code-block:: python
 
     from llama_index import ServiceContext
+
     service_context = ServiceContext.from_defaults(chunk_size=1000)
 
 The `ServiceContext </module_guides/supporting_modules/service_context.html>`_ is a bundle of services and configurations used across a LlamaIndex pipeline.
@@ -31,7 +32,7 @@ The `ServiceContext </module_guides/supporting_modules/service_context.html>`_ i
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents, service_context=service_context)
     query_engine = index.as_query_engine()
     response = query_engine.query("What did the author do growing up?")
@@ -59,7 +60,7 @@ The `ServiceContext </module_guides/supporting_modules/service_context.html>`_ i
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
     query_engine = index.as_query_engine()
     response = query_engine.query("What did the author do growing up?")
@@ -74,7 +75,7 @@ The `ServiceContext </module_guides/supporting_modules/service_context.html>`_ i
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     query_engine = index.as_query_engine(similarity_top_k=5)
     response = query_engine.query("What did the author do growing up?")
@@ -90,6 +91,7 @@ The `ServiceContext </module_guides/supporting_modules/service_context.html>`_ i
 
     from llama_index import ServiceContext
     from llama_index.llms import PaLM
+
     service_context = ServiceContext.from_defaults(llm=PaLM())
 
 You can learn more about `customizing LLMs </module_guides/models/llms.html>`_.
@@ -99,7 +101,7 @@ You can learn more about `customizing LLMs </module_guides/models/llms.html>`_.
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     query_engine = index.as_query_engine(service_context=service_context)
     response = query_engine.query("What did the author do growing up?")
@@ -115,9 +117,9 @@ You can learn more about `customizing LLMs </module_guides/models/llms.html>`_.
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
-    query_engine = index.as_query_engine(response_mode='tree_summarize')
+    query_engine = index.as_query_engine(response_mode="tree_summarize")
     response = query_engine.query("What did the author do growing up?")
     print(response)
 
@@ -133,7 +135,7 @@ You can learn more about `query engines <../core_modules/query_modules/query_eng
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     query_engine = index.as_query_engine(streaming=True)
     response = query_engine.query("What did the author do growing up?")
@@ -150,7 +152,7 @@ You can learn more about `streaming responses </module_guides/putting_it_all_tog
 
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     query_engine = index.as_chat_engine()
     response = query_engine.chat("What did the author do growing up?")
diff --git a/docs/getting_started/starter_example.md b/docs/getting_started/starter_example.md
index 10f15ff0c8..656421d1c9 100644
--- a/docs/getting_started/starter_example.md
+++ b/docs/getting_started/starter_example.md
@@ -33,7 +33,7 @@ In the same folder where you created the `data` folder, create a file called `st
 ```python
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 ```
 
@@ -87,18 +87,23 @@ Of course, you don't get the benefits of persisting unless you load the data. So
 
 ```python
 import os.path
-from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage
+from llama_index import (
+    VectorStoreIndex,
+    SimpleDirectoryReader,
+    StorageContext,
+    load_index_from_storage,
+)
 
 # check if storage already exists
-if (not os.path.exists('./storage')):
+if not os.path.exists("./storage"):
     # load the documents and create the index
-    documents = SimpleDirectoryReader('data').load_data()
+    documents = SimpleDirectoryReader("data").load_data()
     index = VectorStoreIndex.from_documents(documents)
     # store it for later
     index.storage_context.persist()
 else:
     # load the existing index
-    storage_context = StorageContext.from_defaults(persist_dir='./storage')
+    storage_context = StorageContext.from_defaults(persist_dir="./storage")
     index = load_index_from_storage(storage_context)
 
 # either way we can now query the index
diff --git a/docs/module_guides/deploying/agents/tools/llamahub_tools_guide.md b/docs/module_guides/deploying/agents/tools/llamahub_tools_guide.md
index 815b1eb9e9..1d9f4573fc 100644
--- a/docs/module_guides/deploying/agents/tools/llamahub_tools_guide.md
+++ b/docs/module_guides/deploying/agents/tools/llamahub_tools_guide.md
@@ -32,9 +32,9 @@ from llama_hub.wikipedia.base import WikipediaReader
 from llama_index.tools.ondemand_loader_tool import OnDemandLoaderTool
 
 tool = OnDemandLoaderTool.from_defaults(
-	reader,
-	name="Wikipedia Tool",
-	description="A tool for loading data and querying articles from Wikipedia"
+    reader,
+    name="Wikipedia Tool",
+    description="A tool for loading data and querying articles from Wikipedia",
 )
 ```
 
@@ -58,8 +58,6 @@ tool = wiki_spec.to_tool_list()[1]
 
 # Create the Agent with load/search tools
 agent = OpenAIAgent.from_tools(
- LoadAndSearchToolSpec.from_defaults(
-    tool
- ).to_tool_list(), verbose=True
+    LoadAndSearchToolSpec.from_defaults(tool).to_tool_list(), verbose=True
 )
 ```
diff --git a/docs/module_guides/deploying/agents/tools/root.md b/docs/module_guides/deploying/agents/tools/root.md
index 70b8e35df3..0d4ade35d6 100644
--- a/docs/module_guides/deploying/agents/tools/root.md
+++ b/docs/module_guides/deploying/agents/tools/root.md
@@ -31,7 +31,6 @@ from llama_hub.tools.gmail.base import GmailToolSpec
 
 tool_spec = GmailToolSpec()
 agent = OpenAIAgent.from_tools(tool_spec.to_tool_list(), verbose=True)
-
 ```
 
 See our Usage Pattern Guide for more details.
diff --git a/docs/module_guides/deploying/agents/tools/usage_pattern.md b/docs/module_guides/deploying/agents/tools/usage_pattern.md
index 904dadd47e..a7f687848a 100644
--- a/docs/module_guides/deploying/agents/tools/usage_pattern.md
+++ b/docs/module_guides/deploying/agents/tools/usage_pattern.md
@@ -14,6 +14,7 @@ from llama_index.tools.function_tool import FunctionTool
 # Use a tool spec from Llama-Hub
 tool_spec = GmailToolSpec()
 
+
 # Create a custom tool. Type annotations and docstring are used for the
 # tool definition sent to the Function calling API.
 def add_numbers(x: int, y: int) -> int:
@@ -22,13 +23,16 @@ def add_numbers(x: int, y: int) -> int:
     """
     return x + y
 
+
 function_tool = FunctionTool.from_defaults(fn=add_numbers)
 
 tools = tool_spec.to_tool_list() + [function_tool]
 agent = OpenAIAgent.from_tools(tools, verbose=True)
 
 # use agent
-agent.chat("Can you create a new email to helpdesk and support @example.com about a service outage")
+agent.chat(
+    "Can you create a new email to helpdesk and support @example.com about a service outage"
+)
 ```
 
 Full Tool details can be found on our [LlamaHub](https://llamahub.ai) page. Each tool contains a "Usage" section showing how that tool can be used.
@@ -46,5 +50,4 @@ from langchain.agents import initialize_agent
 agent_executor = initialize_agent(
     langchain_tools, llm, agent="conversational-react-description", memory=memory
 )
-
 ```
diff --git a/docs/module_guides/deploying/agents/usage_pattern.md b/docs/module_guides/deploying/agents/usage_pattern.md
index 4122171d11..3b77b84770 100644
--- a/docs/module_guides/deploying/agents/usage_pattern.md
+++ b/docs/module_guides/deploying/agents/usage_pattern.md
@@ -10,11 +10,13 @@ from llama_index.tools import FunctionTool
 from llama_index.llms import OpenAI
 from llama_index.agent import ReActAgent
 
+
 # define sample Tool
 def multiply(a: int, b: int) -> int:
     """Multiple two integers and returns the result integer"""
     return a * b
 
+
 multiply_tool = FunctionTool.from_defaults(fn=multiply)
 
 # initialize llm
@@ -37,7 +39,6 @@ agent.chat("What is 2123 * 215123")
 It is easy to wrap query engines as tools for an agent as well. Simply do the following:
 
 ```python
-
 from llama_index.agent import ReActAgent
 from llama_index.tools import QueryEngineTool
 
@@ -66,7 +67,6 @@ query_engine_tools = [
 
 # initialize ReAct agent
 agent = ReActAgent.from_tools(query_engine_tools, llm=llm, verbose=True)
-
 ```
 
 ## Use other agents as Tools
@@ -81,15 +81,13 @@ query_engine_tools = [
     QueryEngineTool(
         query_engine=sql_agent,
         metadata=ToolMetadata(
-            name="sql_agent",
-            description="Agent that can execute SQL queries."
+            name="sql_agent", description="Agent that can execute SQL queries."
         ),
     ),
     QueryEngineTool(
         query_engine=gmail_agent,
         metadata=ToolMetadata(
-            name="gmail_agent",
-            description="Tool that can send emails on Gmail."
+            name="gmail_agent", description="Tool that can send emails on Gmail."
         ),
     ),
 ]
@@ -189,5 +187,4 @@ agent = OpenAIAgent.from_tools(
 
 # should output a query plan to call march, june, and september tools
 response = agent.query("Analyze Uber revenue growth in March, June, and September")
-
 ```
diff --git a/docs/module_guides/deploying/chat_engines/usage_pattern.md b/docs/module_guides/deploying/chat_engines/usage_pattern.md
index b04e3fc313..29f3f4ec02 100644
--- a/docs/module_guides/deploying/chat_engines/usage_pattern.md
+++ b/docs/module_guides/deploying/chat_engines/usage_pattern.md
@@ -39,10 +39,7 @@ Configuring a chat engine is very similar to configuring a query engine.
 You can directly build and configure a chat engine from an index in 1 line of code:
 
 ```python
-chat_engine = index.as_chat_engine(
-    chat_mode='condense_question',
-    verbose=True
-)
+chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
 ```
 
 > Note: you can access different chat engines by specifying the `chat_mode` as a kwarg. `condense_question` corresponds to `CondenseQuestionChatEngine`, `react` corresponds to `ReActChatEngine`, `context` corresponds to a `ContextChatEngine`.
@@ -72,11 +69,12 @@ Here's an example where we configure the following:
 - print verbose debug message.
 
 ```python
-from llama_index.prompts  import PromptTemplate
+from llama_index.prompts import PromptTemplate
 from llama_index.llms import ChatMessage, MessageRole
 from llama_index.chat_engine.condense_question import CondenseQuestionChatEngine
 
-custom_prompt = PromptTemplate("""\
+custom_prompt = PromptTemplate(
+    """\
 Given a conversation (between Human and Assistant) and a follow up message from Human, \
 rewrite the message to be a standalone question that captures all relevant context \
 from the conversation.
@@ -88,18 +86,16 @@ from the conversation.
 {question}
 
 <Standalone question>
-""")
+"""
+)
 
 # list of `ChatMessage` objects
 custom_chat_history = [
     ChatMessage(
         role=MessageRole.USER,
-        content='Hello assistant, we are having a insightful discussion about Paul Graham today.'
+        content="Hello assistant, we are having a insightful discussion about Paul Graham today.",
     ),
-    ChatMessage(
-        role=MessageRole.ASSISTANT,
-        content='Okay, sounds good.'
-    )
+    ChatMessage(role=MessageRole.ASSISTANT, content="Okay, sounds good."),
 ]
 
 query_engine = index.as_query_engine()
@@ -107,7 +103,7 @@ chat_engine = CondenseQuestionChatEngine.from_defaults(
     query_engine=query_engine,
     condense_question_prompt=custom_prompt,
     chat_history=custom_chat_history,
-    verbose=True
+    verbose=True,
 )
 ```
 
diff --git a/docs/module_guides/deploying/query_engine/streaming.md b/docs/module_guides/deploying/query_engine/streaming.md
index 20a9974a9b..dfa5218fc7 100644
--- a/docs/module_guides/deploying/query_engine/streaming.md
+++ b/docs/module_guides/deploying/query_engine/streaming.md
@@ -14,10 +14,7 @@ Configure query engine to use streaming:
 If you are using the high-level API, set `streaming=True` when building a query engine.
 
 ```python
-query_engine = index.as_query_engine(
-    streaming=True,
-    similarity_top_k=1
-)
+query_engine = index.as_query_engine(streaming=True, similarity_top_k=1)
 ```
 
 If you are using the low-level API to compose the query engine,
@@ -25,6 +22,7 @@ pass `streaming=True` when constructing the `Response Synthesizer`:
 
 ```python
 from llama_index import get_response_synthesizer
+
 synth = get_response_synthesizer(streaming=True, ...)
 query_engine = RetrieverQueryEngine(response_synthesizer=synth, ...)
 ```
@@ -49,6 +47,7 @@ You can obtain a `Generator` from the streaming response and iterate over the to
 ```python
 for text in streaming_response.response_gen:
     # do something with text as they arrive.
+    pass
 ```
 
 Alternatively, if you just want to print the text as they arrive:
diff --git a/docs/module_guides/deploying/query_engine/usage_pattern.md b/docs/module_guides/deploying/query_engine/usage_pattern.md
index f97fc648dc..a106e74987 100644
--- a/docs/module_guides/deploying/query_engine/usage_pattern.md
+++ b/docs/module_guides/deploying/query_engine/usage_pattern.md
@@ -15,7 +15,7 @@ To learn how to build an index, see [Indexing](/module_guides/indexing/indexing.
 Ask a question over your data
 
 ```python
-response = query_engine.query('Who is Paul Graham?')
+response = query_engine.query("Who is Paul Graham?")
 ```
 
 ## Configuring a Query Engine
@@ -26,7 +26,7 @@ You can directly build and configure a query engine from an index in 1 line of c
 
 ```python
 query_engine = index.as_query_engine(
-    response_mode='tree_summarize',
+    response_mode="tree_summarize",
     verbose=True,
 )
 ```
@@ -110,6 +110,7 @@ from llama_index.query_engine import CustomQueryEngine
 from llama_index.retrievers import BaseRetriever
 from llama_index.response_synthesizers import get_response_synthesizer, BaseSynthesizer
 
+
 class RAGQueryEngine(CustomQueryEngine):
     """RAG Query Engine."""
 
@@ -120,7 +121,6 @@ class RAGQueryEngine(CustomQueryEngine):
         nodes = self.retriever.retrieve(query_str)
         response_obj = self.response_synthesizer.synthesize(query_str, nodes)
         return response_obj
-
 ```
 
 See the [Custom Query Engine guide](/examples/query_engine/custom_query_engine.ipynb) for more details.
diff --git a/docs/module_guides/evaluating/usage_pattern.md b/docs/module_guides/evaluating/usage_pattern.md
index 23ea2414dc..3c031af42f 100644
--- a/docs/module_guides/evaluating/usage_pattern.md
+++ b/docs/module_guides/evaluating/usage_pattern.md
@@ -6,7 +6,7 @@ All of the evaluation modules in LlamaIndex implement the `BaseEvaluator` class,
 
 1. The `evaluate` method takes in `query`, `contexts`, `response`, and additional keyword arguments.
 
-```python
+```
     def evaluate(
         self,
         query: Optional[str] = None,
@@ -18,7 +18,7 @@ All of the evaluation modules in LlamaIndex implement the `BaseEvaluator` class,
 
 2. The `evaluate_response` method provide an alternative interface that takes in a llamaindex `Response` object (which contains response string and source nodes) instead of separate `contexts` and `response`.
 
-```python
+```
 def evaluate_response(
     self,
     query: Optional[str] = None,
@@ -63,7 +63,9 @@ evaluator = FaithfulnessEvaluator(service_context=service_context)
 
 # query index
 query_engine = vector_index.as_query_engine()
-response = query_engine.query("What battles took place in New York City in the American Revolution?")
+response = query_engine.query(
+    "What battles took place in New York City in the American Revolution?"
+)
 eval_result = evaluator.evaluate_response(response=response)
 print(str(eval_result.passing))
 ```
@@ -89,12 +91,15 @@ evaluator = FaithfulnessEvaluator(service_context=service_context)
 
 # query index
 query_engine = vector_index.as_query_engine()
-response = query_engine.query("What battles took place in New York City in the American Revolution?")
+response = query_engine.query(
+    "What battles took place in New York City in the American Revolution?"
+)
 response_str = response.response
 for source_node in response.source_nodes:
-    eval_result = evaluator.evaluate(response=response_str, contexts=[source_node.get_content()])
+    eval_result = evaluator.evaluate(
+        response=response_str, contexts=[source_node.get_content()]
+    )
     print(str(eval_result.passing))
-
 ```
 
 You'll get back a list of results, corresponding to each source node in `response.source_nodes`.
@@ -126,7 +131,6 @@ query = "What battles took place in New York City in the American Revolution?"
 response = query_engine.query(query)
 eval_result = evaluator.evaluate_response(query=query, response=response)
 print(str(eval_result))
-
 ```
 
 ![](/_static/evaluation/eval_query_response_context.png)
@@ -154,7 +158,9 @@ query = "What battles took place in New York City in the American Revolution?"
 response = query_engine.query(query)
 response_str = response.response
 for source_node in response.source_nodes:
-    eval_result = evaluator.evaluate(query=query, response=response_str, contexts=[source_node.get_content()])
+    eval_result = evaluator.evaluate(
+        query=query, response=response_str, contexts=[source_node.get_content()]
+    )
     print(str(eval_result.passing))
 ```
 
@@ -190,10 +196,7 @@ We also provide a batch evaluation runner for running a set of evaluators across
 from llama_index.evaluation import BatchEvalRunner
 
 runner = BatchEvalRunner(
-    {
-        "faithfulness": faithfulness_evaluator, "
-        "relevancy": relevancy_evaluator
-    },
+    {"faithfulness": faithfulness_evaluator, "relevancy": relevancy_evaluator},
     workers=8,
 )
 
diff --git a/docs/module_guides/evaluating/usage_pattern_retrieval.md b/docs/module_guides/evaluating/usage_pattern_retrieval.md
index 653aead5dc..6600f6d489 100644
--- a/docs/module_guides/evaluating/usage_pattern_retrieval.md
+++ b/docs/module_guides/evaluating/usage_pattern_retrieval.md
@@ -17,10 +17,7 @@ retriever_evaluator = RetrieverEvaluator.from_metric_names(
     ["mrr", "hit_rate"], retriever=retriever
 )
 
-retriever_evaluator.evaluate(
-    query="query",
-    expected_ids=["node_id1", "node_id2"]
-)
+retriever_evaluator.evaluate(query="query", expected_ids=["node_id1", "node_id2"])
 ```
 
 ## Building an Evaluation Dataset
@@ -30,12 +27,7 @@ You can manually curate a retrieval evaluation dataset of questions + node id's.
 ```python
 from llama_index.evaluation import generate_question_context_pairs
 
-qa_dataset = generate_question_context_pairs(
-    nodes,
-    llm=llm,
-    num_questions_per_chunk=2
-)
-
+qa_dataset = generate_question_context_pairs(nodes, llm=llm, num_questions_per_chunk=2)
 ```
 
 The returned result is a `EmbeddingQAFinetuneDataset` object (containing `queries`, `relevant_docs`, and `corpus`).
@@ -46,7 +38,6 @@ We offer a convenience function to run a `RetrieverEvaluator` over a dataset in
 
 ```python
 eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)
-
 ```
 
 This should run much faster than you trying to call `.evaluate` on each query separately.
diff --git a/docs/module_guides/indexing/composability.md b/docs/module_guides/indexing/composability.md
index e0c6cb878d..3f2afe255c 100644
--- a/docs/module_guides/indexing/composability.md
+++ b/docs/module_guides/indexing/composability.md
@@ -11,9 +11,9 @@ To see how this works, imagine you have 3 documents: `doc1`, `doc2`, and `doc3`.
 ```python
 from llama_index import SimpleDirectoryReader
 
-doc1 = SimpleDirectoryReader('data1').load_data()
-doc2 = SimpleDirectoryReader('data2').load_data()
-doc3 = SimpleDirectoryReader('data3').load_data()
+doc1 = SimpleDirectoryReader("data1").load_data()
+doc2 = SimpleDirectoryReader("data2").load_data()
+doc3 = SimpleDirectoryReader("data3").load_data()
 ```
 
 ![](/_static/composability/diagram_b0.png)
@@ -49,9 +49,7 @@ You may choose to manually specify the summary text, or use LlamaIndex itself to
 a summary, for instance with the following:
 
 ```python
-summary = index1.query(
-    "What is a summary of this document?", retriever_mode="all_leaf"
-)
+summary = index1.query("What is a summary of this document?", retriever_mode="all_leaf")
 index1_summary = str(summary)
 ```
 
@@ -71,7 +69,6 @@ graph = ComposableGraph.from_indices(
     index_summaries=[index1_summary, index2_summary, index3_summary],
     storage_context=storage_context,
 )
-
 ```
 
 ![](/_static/composability/diagram.png)
@@ -88,14 +85,10 @@ More detail on how to configure `ComposableGraphQueryEngine` can be found [here]
 ```python
 # set custom retrievers. An example is provided below
 custom_query_engines = {
-    index.index_id: index.as_query_engine(
-        child_branch_factor=2
-    )
+    index.index_id: index.as_query_engine(child_branch_factor=2)
     for index in [index1, index2, index3]
 }
-query_engine = graph.as_query_engine(
-    custom_query_engines=custom_query_engines
-)
+query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)
 response = query_engine.query("Where did the author grow up?")
 ```
 
diff --git a/docs/module_guides/indexing/document_management.md b/docs/module_guides/indexing/document_management.md
index fca27c8c0d..0abfc8898a 100644
--- a/docs/module_guides/indexing/document_management.md
+++ b/docs/module_guides/indexing/document_management.md
@@ -18,7 +18,7 @@ An example code snippet is given below:
 from llama_index import SummaryIndex, Document
 
 index = SummaryIndex([])
-text_chunks = ['text_chunk_1', 'text_chunk_2', 'text_chunk_3']
+text_chunks = ["text_chunk_1", "text_chunk_2", "text_chunk_3"]
 
 doc_chunks = []
 for i, text in enumerate(text_chunks):
@@ -48,8 +48,7 @@ If a Document is already present within an index, you can "update" a Document wi
 # NOTE: the document has a `doc_id` specified
 doc_chunks[0].text = "Brand new document text"
 index.update_ref_doc(
-    doc_chunks[0],
-    update_kwargs={"delete_kwargs": {'delete_from_docstore': True}}
+    doc_chunks[0], update_kwargs={"delete_kwargs": {"delete_from_docstore": True}}
 )
 ```
 
@@ -65,15 +64,16 @@ The `refresh()` function will only update documents who have the same doc `id_`,
 
 ```python
 # modify first document, with the same doc_id
-doc_chunks[0] = Document(text='Super new document text', id_="doc_id_0")
+doc_chunks[0] = Document(text="Super new document text", id_="doc_id_0")
 
 # add a new document
-doc_chunks.append(Document(text="This isn't in the index yet, but it will be soon!", id_="doc_id_3"))
+doc_chunks.append(
+    Document(text="This isn't in the index yet, but it will be soon!", id_="doc_id_3")
+)
 
 # refresh the index
 refreshed_docs = index.refresh_ref_docs(
-    doc_chunks,
-    update_kwargs={"delete_kwargs": {'delete_from_docstore': True}}
+    doc_chunks, update_kwargs={"delete_kwargs": {"delete_from_docstore": True}}
 )
 
 # refreshed_docs[0] and refreshed_docs[-1] should be true
@@ -85,7 +85,7 @@ If you `print()` the output of `refresh()`, you would see which input documents
 
 ```python
 print(refreshed_docs)
-> [True, False, False, True]
+# > [True, False, False, True]
 ```
 
 This is most useful when you are reading from a directory that is constantly updating with new information.
@@ -98,10 +98,12 @@ Any index that uses the docstore (i.e. all indexes except for most vector store
 
 ```python
 print(index.ref_doc_info)
+"""
 > {'doc_id_1': RefDocInfo(node_ids=['071a66a8-3c47-49ad-84fa-7010c6277479'], metadata={}),
    'doc_id_2': RefDocInfo(node_ids=['9563e84b-f934-41c3-acfd-22e88492c869'], metadata={}),
    'doc_id_0': RefDocInfo(node_ids=['b53e6c2f-16f7-4024-af4c-42890e945f36'], metadata={}),
    'doc_id_3': RefDocInfo(node_ids=['6bedb29f-15db-4c7c-9885-7490e10aa33f'], metadata={})}
+"""
 ```
 
 Each entry in the output shows the ingested doc `id_`s as keys, and their associated `node_ids` of the nodes they were split into.
diff --git a/docs/module_guides/indexing/metadata_extraction.md b/docs/module_guides/indexing/metadata_extraction.md
index 533af27438..7c6a99c7b7 100644
--- a/docs/module_guides/indexing/metadata_extraction.md
+++ b/docs/module_guides/indexing/metadata_extraction.md
@@ -59,6 +59,7 @@ If the provided extractors do not fit your needs, you can also define a custom e
 ```python
 from llama_index.node_parser.extractors import MetadataFeatureExtractor
 
+
 class CustomExtractor(MetadataFeatureExtractor):
     def extract(self, nodes) -> List[Dict]:
         metadata_list = [
diff --git a/docs/module_guides/indexing/usage_pattern.md b/docs/module_guides/indexing/usage_pattern.md
index 9babb05d64..570b7b4983 100644
--- a/docs/module_guides/indexing/usage_pattern.md
+++ b/docs/module_guides/indexing/usage_pattern.md
@@ -36,9 +36,7 @@ from llama_index import ServiceContext, VectorStoreIndex
 
 service_context = ServiceContext.from_defaults(chunk_size=512)
 index = VectorStoreIndex.from_documents(
-    docs,
-    service_context=service_context,
-    show_progress=True
+    docs, service_context=service_context, show_progress=True
 )
 ```
 
diff --git a/docs/module_guides/loading/connector/root.md b/docs/module_guides/loading/connector/root.md
index 3fdeb93ec3..048e5cfd06 100644
--- a/docs/module_guides/loading/connector/root.md
+++ b/docs/module_guides/loading/connector/root.md
@@ -22,7 +22,7 @@ Get started with:
 ```python
 from llama_index import download_loader
 
-GoogleDocsReader = download_loader('GoogleDocsReader')
+GoogleDocsReader = download_loader("GoogleDocsReader")
 loader = GoogleDocsReader()
 documents = loader.load_data(document_ids=[...])
 ```
diff --git a/docs/module_guides/loading/connector/usage_pattern.md b/docs/module_guides/loading/connector/usage_pattern.md
index 5b50f40343..d9a45b75ba 100644
--- a/docs/module_guides/loading/connector/usage_pattern.md
+++ b/docs/module_guides/loading/connector/usage_pattern.md
@@ -10,12 +10,12 @@ Example usage:
 ```python
 from llama_index import VectorStoreIndex, download_loader
 
-GoogleDocsReader = download_loader('GoogleDocsReader')
+GoogleDocsReader = download_loader("GoogleDocsReader")
 
-gdoc_ids = ['1wf-y2pd9C878Oh-FmLH7Q_BQkljdm6TQal-c1pUfrec']
+gdoc_ids = ["1wf-y2pd9C878Oh-FmLH7Q_BQkljdm6TQal-c1pUfrec"]
 loader = GoogleDocsReader()
 documents = loader.load_data(document_ids=gdoc_ids)
 index = VectorStoreIndex.from_documents(documents)
 query_engine = index.as_query_engine()
-query_engine.query('Where did the author go to school?')
+query_engine.query("Where did the author go to school?")
 ```
diff --git a/docs/module_guides/loading/documents_and_nodes/root.md b/docs/module_guides/loading/documents_and_nodes/root.md
index 04a0f54687..9f69c1c4a2 100644
--- a/docs/module_guides/loading/documents_and_nodes/root.md
+++ b/docs/module_guides/loading/documents_and_nodes/root.md
@@ -29,13 +29,11 @@ documents = [Document(text=t) for t in text_list]
 
 # build index
 index = VectorStoreIndex.from_documents(documents)
-
 ```
 
 #### Nodes
 
 ```python
-
 from llama_index.node_parser import SimpleNodeParser
 
 # load documents
@@ -47,5 +45,4 @@ nodes = parser.get_nodes_from_documents(documents)
 
 # build index
 index = VectorStoreIndex(nodes)
-
 ```
diff --git a/docs/module_guides/loading/documents_and_nodes/usage_documents.md b/docs/module_guides/loading/documents_and_nodes/usage_documents.md
index 94b6686127..41195db504 100644
--- a/docs/module_guides/loading/documents_and_nodes/usage_documents.md
+++ b/docs/module_guides/loading/documents_and_nodes/usage_documents.md
@@ -9,7 +9,7 @@ By default, all of our [data loaders](/module_guides/loading/connector/root.md)
 ```python
 from llama_index import SimpleDirectoryReader
 
-documents = SimpleDirectoryReader('./data').load_data()
+documents = SimpleDirectoryReader("./data").load_data()
 ```
 
 You can also choose to construct documents manually. LlamaIndex exposes the `Document` struct.
@@ -43,28 +43,25 @@ There are a few ways to set up this dictionary:
 
 ```python
 document = Document(
-    text='text',
-    metadata={
-        'filename': '<doc_file_name>',
-        'category': '<category>'
-    }
+    text="text", metadata={"filename": "<doc_file_name>", "category": "<category>"}
 )
 ```
 
 2. After the document is created:
 
 ```python
-document.metadata = {'filename': '<doc_file_name>'}
+document.metadata = {"filename": "<doc_file_name>"}
 ```
 
 3. Set the filename automatically using the `SimpleDirectoryReader` and `file_metadata` hook. This will automatically run the hook on each document to set the `metadata` field:
 
 ```python
 from llama_index import SimpleDirectoryReader
-filename_fn = lambda filename: {'file_name': filename}
+
+filename_fn = lambda filename: {"file_name": filename}
 
 # automatically sets the metadata of each document according to filename_fn
-documents = SimpleDirectoryReader('./data', file_metadata=filename_fn).load_data()
+documents = SimpleDirectoryReader("./data", file_metadata=filename_fn).load_data()
 ```
 
 ### Customizing the id
@@ -97,13 +94,14 @@ Typically, a document might have many metadata keys, but you might not want all
 We can exclude it like so:
 
 ```python
-document.excluded_llm_metadata_keys = ['file_name']
+document.excluded_llm_metadata_keys = ["file_name"]
 ```
 
 Then, we can test what the LLM will actually end up reading using the `get_content()` function and specifying `MetadataMode.LLM`:
 
 ```python
 from llama_index.schema import MetadataMode
+
 print(document.get_content(metadata_mode=MetadataMode.LLM))
 ```
 
@@ -112,13 +110,14 @@ print(document.get_content(metadata_mode=MetadataMode.LLM))
 Similar to customing the metadata visible to the LLM, we can also customize the metadata visible to embeddings. In this case, you can specifically exclude metadata visible to the embedding model, in case you DON'T want particular text to bias the embeddings.
 
 ```python
-document.excluded_embed_metadata_keys = ['file_name']
+document.excluded_embed_metadata_keys = ["file_name"]
 ```
 
 Then, we can test what the embedding model will actually end up reading using the `get_content()` function and specifying `MetadataMode.EMBED`:
 
 ```python
 from llama_index.schema import MetadataMode
+
 print(document.get_content(metadata_mode=MetadataMode.EMBED))
 ```
 
@@ -151,16 +150,19 @@ document = Document(
     metadata={
         "file_name": "super_secret_document.txt",
         "category": "finance",
-        "author": "LlamaIndex"
+        "author": "LlamaIndex",
     },
-    excluded_llm_metadata_keys=['file_name'],
+    excluded_llm_metadata_keys=["file_name"],
     metadata_seperator="::",
     metadata_template="{key}=>{value}",
     text_template="Metadata: {metadata_str}\n-----\nContent: {content}",
 )
 
 print("The LLM sees this: \n", document.get_content(metadata_mode=MetadataMode.LLM))
-print("The Embedding model sees this: \n", document.get_content(metadata_mode=MetadataMode.EMBED))
+print(
+    "The Embedding model sees this: \n",
+    document.get_content(metadata_mode=MetadataMode.EMBED),
+)
 ```
 
 ### Advanced - Automatic Metadata Extraction
diff --git a/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md b/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md
index e02cd7a91f..9bb6cb7c85 100644
--- a/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md
+++ b/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md
@@ -15,7 +15,7 @@ You can use these feature extractors within our overall `MetadataExtractor` clas
 from llama_index.node_parser.extractors import (
     MetadataExtractor,
     TitleExtractor,
-    QuestionsAnsweredExtractor
+    QuestionsAnsweredExtractor,
 )
 from llama_index.text_splitter import TokenTextSplitter
 
diff --git a/docs/module_guides/loading/documents_and_nodes/usage_nodes.md b/docs/module_guides/loading/documents_and_nodes/usage_nodes.md
index 2b2ed48a87..6bebdce04e 100644
--- a/docs/module_guides/loading/documents_and_nodes/usage_nodes.md
+++ b/docs/module_guides/loading/documents_and_nodes/usage_nodes.md
@@ -31,7 +31,9 @@ nodes = [node1, node2]
 The `RelatedNodeInfo` class can also store additional `metadata` if needed:
 
 ```python
-node2.relationships[NodeRelationship.PARENT] = RelatedNodeInfo(node_id=node1.node_id, metadata={"key": "val"})
+node2.relationships[NodeRelationship.PARENT] = RelatedNodeInfo(
+    node_id=node1.node_id, metadata={"key": "val"}
+)
 ```
 
 ### Customizing the ID
@@ -45,5 +47,4 @@ You can also get and set the `node_id` of any `TextNode` directly.
 ```python
 print(node.node_id)
 node.node_id = "My new node_id!"
-
 ```
diff --git a/docs/module_guides/loading/node_parsers/root.md b/docs/module_guides/loading/node_parsers/root.md
index 84fffc7f98..6a04db518c 100644
--- a/docs/module_guides/loading/node_parsers/root.md
+++ b/docs/module_guides/loading/node_parsers/root.md
@@ -26,7 +26,9 @@ from llama_index.node_parser import SimpleNodeParser
 
 node_parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=20)
 
-nodes = node_parser.get_nodes_from_documents([Document(text="long text")], show_progress=False)
+nodes = node_parser.get_nodes_from_documents(
+    [Document(text="long text")], show_progress=False
+)
 ```
 
 Or set inside a `ServiceContext` to be used automatically when an index is constructed using `.from_documents()`:
@@ -71,12 +73,12 @@ import tiktoken
 from llama_index.text_splitter import SentenceSplitter
 
 text_splitter = SentenceSplitter(
-  separator=" ",
-  chunk_size=1024,
-  chunk_overlap=20,
-  paragraph_separator="\n\n\n",
-  secondary_chunking_regex="[^,.;。]+[,.;。]?",
-  tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
+    separator=" ",
+    chunk_size=1024,
+    chunk_overlap=20,
+    paragraph_separator="\n\n\n",
+    secondary_chunking_regex="[^,.;。]+[,.;。]?",
+    tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
 )
 
 node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)
@@ -89,11 +91,11 @@ import tiktoken
 from llama_index.text_splitter import TokenTextSplitter
 
 text_splitter = TokenTextSplitter(
-  separator=" ",
-  chunk_size=1024,
-  chunk_overlap=20,
-  backup_separators=["\n"],
-  tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
+    separator=" ",
+    chunk_size=1024,
+    chunk_overlap=20,
+    backup_separators=["\n"],
+    tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
 )
 
 node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)
@@ -105,10 +107,10 @@ node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)
 from llama_index.text_splitter import CodeSplitter
 
 text_splitter = CodeSplitter(
-  language="python",
-  chunk_lines=40,
-  chunk_lines_overlap=15,
-  max_chars=1500,
+    language="python",
+    chunk_lines=40,
+    chunk_lines_overlap=15,
+    max_chars=1500,
 )
 
 node_parser = SimpleNodeParser.from_defaults(text_splitter=text_splitter)
@@ -127,12 +129,12 @@ import nltk
 from llama_index.node_parser import SentenceWindowNodeParser
 
 node_parser = SentenceWindowNodeParser.from_defaults(
-  # how many sentences on either side to capture
-  window_size=3,
-  # the metadata key that holds the window of surrounding sentences
-  window_metadata_key="window",
-  # the metadata key that holds the original sentence
-  original_text_metadata_key="original_sentence"
+    # how many sentences on either side to capture
+    window_size=3,
+    # the metadata key that holds the window of surrounding sentences
+    window_metadata_key="window",
+    # the metadata key that holds the original sentence
+    original_text_metadata_key="original_sentence",
 )
 ```
 
diff --git a/docs/module_guides/models/embeddings.md b/docs/module_guides/models/embeddings.md
index 8e3cae8092..dcafee735f 100644
--- a/docs/module_guides/models/embeddings.md
+++ b/docs/module_guides/models/embeddings.md
@@ -26,6 +26,7 @@ To save costs, you may want to use a local model.
 
 ```python
 from llama_index import ServiceContext
+
 service_context = ServiceContext.from_defaults(embed_model="local")
 ```
 
@@ -48,6 +49,7 @@ service_context = ServiceContext.from_defaults(embed_model=embed_model)
 
 # optionally set a global service context to avoid passing it into other objects every time
 from llama_index import set_global_service_context
+
 set_global_service_context(service_context)
 
 documents = SimpleDirectoryReader("./data").load_data()
@@ -80,6 +82,7 @@ The easiest way to use a local model is:
 
 ```python
 from llama_index import ServiceContext
+
 service_context = ServiceContext.from_defaults(embed_model="local")
 ```
 
@@ -88,9 +91,7 @@ To configure the model used (from Hugging Face hub), add the model name separate
 ```python
 from llama_index import ServiceContext
 
-service_context = ServiceContext.from_defaults(
-  embed_model="local:BAAI/bge-large-en"
-)
+service_context = ServiceContext.from_defaults(embed_model="local:BAAI/bge-large-en")
 ```
 
 ### HuggingFace Optimum ONNX Embeddings
@@ -115,9 +116,7 @@ And then usage:
 
 ```python
 embed_model = OptimumEmbedding(folder_name="./bge_onnx")
-service_context = ServiceContext.from_defaults(
-  embed_model=embed_model
-)
+service_context = ServiceContext.from_defaults(embed_model=embed_model)
 ```
 
 ### LangChain Integrations
@@ -148,28 +147,31 @@ from typing import Any, List
 from InstructorEmbedding import INSTRUCTOR
 from llama_index.embeddings.base import BaseEmbedding
 
+
 class InstructorEmbeddings(BaseEmbedding):
-  def __init__(
-    self,
-    instructor_model_name: str = "hkunlp/instructor-large",
-    instruction: str = "Represent the Computer Science documentation or question:",
-    **kwargs: Any,
-  ) -> None:
-    self._model = INSTRUCTOR(instructor_model_name)
-    self._instruction = instruction
-    super().__init__(**kwargs)
-
-    def _get_query_embedding(self, query: str) -> List[float]:
-      embeddings = self._model.encode([[self._instruction, query]])
-      return embeddings[0]
-
-    def _get_text_embedding(self, text: str) -> List[float]:
-      embeddings = self._model.encode([[self._instruction, text]])
-      return embeddings[0]
-
-    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
-      embeddings = self._model.encode([[self._instruction, text] for text in texts])
-      return embeddings
+    def __init__(
+        self,
+        instructor_model_name: str = "hkunlp/instructor-large",
+        instruction: str = "Represent the Computer Science documentation or question:",
+        **kwargs: Any,
+    ) -> None:
+        self._model = INSTRUCTOR(instructor_model_name)
+        self._instruction = instruction
+        super().__init__(**kwargs)
+
+        def _get_query_embedding(self, query: str) -> List[float]:
+            embeddings = self._model.encode([[self._instruction, query]])
+            return embeddings[0]
+
+        def _get_text_embedding(self, text: str) -> List[float]:
+            embeddings = self._model.encode([[self._instruction, text]])
+            return embeddings[0]
+
+        def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+            embeddings = self._model.encode(
+                [[self._instruction, text] for text in texts]
+            )
+            return embeddings
 ```
 
 ## Standalone Usage
diff --git a/docs/module_guides/models/llms.md b/docs/module_guides/models/llms.md
index c8f9a87cf5..7114f092aa 100644
--- a/docs/module_guides/models/llms.md
+++ b/docs/module_guides/models/llms.md
@@ -21,7 +21,7 @@ The following code snippet shows how you can get started using LLMs.
 from llama_index.llms import OpenAI
 
 # non-streaming
-resp = OpenAI().complete('Paul Graham is ')
+resp = OpenAI().complete("Paul Graham is ")
 print(resp)
 ```
 
diff --git a/docs/module_guides/models/llms/usage_custom.md b/docs/module_guides/models/llms/usage_custom.md
index 979e13daaf..5043788320 100644
--- a/docs/module_guides/models/llms/usage_custom.md
+++ b/docs/module_guides/models/llms/usage_custom.md
@@ -21,18 +21,18 @@ you may also plug in any LLM shown on Langchain's
 [LLM](https://python.langchain.com/docs/integrations/llms/) page.
 
 ```python
-
 from llama_index import (
     KeywordTableIndex,
     SimpleDirectoryReader,
     LLMPredictor,
-    ServiceContext
+    ServiceContext,
 )
 from llama_index.llms import OpenAI
+
 # alternatively
 # from langchain.llms import ...
 
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 
 # define LLM
 llm = OpenAI(temperature=0.1, model="gpt-4")
@@ -44,7 +44,6 @@ index = KeywordTableIndex.from_documents(documents, service_context=service_cont
 # get response from query
 query_engine = index.as_query_engine()
 response = query_engine.query("What did the author do after his time at Y Combinator?")
-
 ```
 
 ## Example: Changing the number of output tokens (for OpenAI, Cohere, AI21)
@@ -56,20 +55,14 @@ For OpenAI, Cohere, AI21, you just need to set the `max_tokens` parameter
 (or maxTokens for AI21). We will handle text chunking/calculations under the hood.
 
 ```python
-
-from llama_index import (
-    KeywordTableIndex,
-    SimpleDirectoryReader,
-    ServiceContext
-)
+from llama_index import KeywordTableIndex, SimpleDirectoryReader, ServiceContext
 from llama_index.llms import OpenAI
 
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 
 # define LLM
 llm = OpenAI(temperature=0, model="text-davinci-002", max_tokens=512)
 service_context = ServiceContext.from_defaults(llm=llm)
-
 ```
 
 ## Example: Explicitly configure `context_window` and `num_output`
@@ -77,17 +70,13 @@ service_context = ServiceContext.from_defaults(llm=llm)
 If you are using other LLM classes from langchain, you may need to explicitly configure the `context_window` and `num_output` via the `ServiceContext` since the information is not available by default.
 
 ```python
-
-from llama_index import (
-    KeywordTableIndex,
-    SimpleDirectoryReader,
-    ServiceContext
-)
+from llama_index import KeywordTableIndex, SimpleDirectoryReader, ServiceContext
 from llama_index.llms import OpenAI
+
 # alternatively
 # from langchain.llms import ...
 
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 
 
 # set context window
@@ -107,7 +96,6 @@ service_context = ServiceContext.from_defaults(
     context_window=context_window,
     num_output=num_output,
 )
-
 ```
 
 ## Example: Using a HuggingFace LLM
@@ -133,6 +121,7 @@ query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")
 
 import torch
 from llama_index.llms import HuggingFaceLLM
+
 llm = HuggingFaceLLM(
     context_window=4096,
     max_new_tokens=256,
@@ -157,7 +146,7 @@ Some models will raise errors if all the keys from the tokenizer are passed to t
 
 ```python
 HuggingFaceLLM(
-    ...
+    # ...
     tokenizer_outputs_to_remove=["token_type_ids"]
 )
 ```
@@ -183,11 +172,7 @@ import torch
 from transformers import pipeline
 from typing import Optional, List, Mapping, Any
 
-from llama_index import (
-    ServiceContext,
-    SimpleDirectoryReader,
-    SummaryIndex
-)
+from llama_index import ServiceContext, SimpleDirectoryReader, SummaryIndex
 from llama_index.callbacks import CallbackManager
 from llama_index.llms import (
     CustomLLM,
@@ -205,17 +190,20 @@ num_output = 256
 
 # store the pipeline/model outside of the LLM class to avoid memory issues
 model_name = "facebook/opt-iml-max-30b"
-pipeline = pipeline("text-generation", model=model_name, device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
+pipeline = pipeline(
+    "text-generation",
+    model=model_name,
+    device="cuda:0",
+    model_kwargs={"torch_dtype": torch.bfloat16},
+)
 
-class OurLLM(CustomLLM):
 
+class OurLLM(CustomLLM):
     @property
     def metadata(self) -> LLMMetadata:
         """Get LLM metadata."""
         return LLMMetadata(
-            context_window=context_window,
-            num_output=num_output,
-            model_name=model_name
+            context_window=context_window, num_output=num_output, model_name=model_name
         )
 
     @llm_completion_callback()
@@ -231,6 +219,7 @@ class OurLLM(CustomLLM):
     def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
         raise NotImplementedError()
 
+
 # define our LLM
 llm = OurLLM()
 
@@ -238,11 +227,11 @@ service_context = ServiceContext.from_defaults(
     llm=llm,
     embed_model="local:BAAI/bge-base-en-v1.5",
     context_window=context_window,
-    num_output=num_output
+    num_output=num_output,
 )
 
 # Load the your data
-documents = SimpleDirectoryReader('./data').load_data()
+documents = SimpleDirectoryReader("./data").load_data()
 index = SummaryIndex.from_documents(documents, service_context=service_context)
 
 # Query and print response
diff --git a/docs/module_guides/models/llms/usage_standalone.md b/docs/module_guides/models/llms/usage_standalone.md
index 3beb398c11..9f07b3348a 100644
--- a/docs/module_guides/models/llms/usage_standalone.md
+++ b/docs/module_guides/models/llms/usage_standalone.md
@@ -8,15 +8,16 @@ You can use our LLM modules on their own.
 from llama_index.llms import OpenAI
 
 # non-streaming
-resp = OpenAI().complete('Paul Graham is ')
+resp = OpenAI().complete("Paul Graham is ")
 print(resp)
 
 # using streaming endpoint
 from llama_index.llms import OpenAI
+
 llm = OpenAI()
-resp = llm.stream_complete('Paul Graham is ')
+resp = llm.stream_complete("Paul Graham is ")
 for delta in resp:
-    print(delta, end='')
+    print(delta, end="")
 ```
 
 ## Chat Example
diff --git a/docs/module_guides/models/prompts/usage_pattern.md b/docs/module_guides/models/prompts/usage_pattern.md
index 29efbdf776..56f85e3762 100644
--- a/docs/module_guides/models/prompts/usage_pattern.md
+++ b/docs/module_guides/models/prompts/usage_pattern.md
@@ -71,7 +71,6 @@ For instance, take a look at the following snippet.
 query_engine = index.as_query_engine(response_mode="compact")
 prompts_dict = query_engine.get_prompts()
 print(list(prompts_dict.keys()))
-
 ```
 
 You might get back the following keys:
@@ -90,7 +89,6 @@ obtained through `get_prompts`.
 e.g. regarding the example above, we might do the following
 
 ```python
-
 # shakespeare!
 qa_prompt_tmpl_str = (
     "Context information is below.\n"
@@ -104,10 +102,7 @@ qa_prompt_tmpl_str = (
 )
 qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
 
-query_engine.update_prompts(
-    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
-)
-
+query_engine.update_prompts({"response_synthesizer:text_qa_template": qa_prompt_tmpl})
 ```
 
 #### Modify prompts used in query engine
@@ -120,8 +115,7 @@ There are also two equivalent ways to override the prompts:
 
 ```python
 query_engine = index.as_query_engine(
-    text_qa_template=<custom_qa_prompt>,
-    refine_template=<custom_refine_prompt>
+    text_qa_template=custom_qa_prompt, refine_template=custom_refine_prompt
 )
 ```
 
@@ -130,8 +124,7 @@ query_engine = index.as_query_engine(
 ```python
 retriever = index.as_retriever()
 synth = get_response_synthesizer(
-    text_qa_template=<custom_qa_prompt>,
-    refine_template=<custom_refine_prompt>
+    text_qa_template=custom_qa_prompt, refine_template=custom_refine_prompt
 )
 query_engine = RetrieverQueryEngine(retriever, response_synthesizer)
 ```
@@ -156,13 +149,13 @@ There are two equivalent ways to override the prompts:
 1. via the default nodes constructor
 
 ```python
-index = TreeIndex(nodes, summary_template=<custom_prompt>)
+index = TreeIndex(nodes, summary_template=custom_prompt)
 ```
 
 2. via the documents constructor.
 
 ```python
-index = TreeIndex.from_documents(docs, summary_template=<custom_prompt>)
+index = TreeIndex.from_documents(docs, summary_template=custom_prompt)
 ```
 
 For more details on which index uses which prompts, please visit
@@ -189,7 +182,6 @@ prompt_tmpl = PromptTemplate(prompt_tmpl_str)
 partial_prompt_tmpl = prompt_tmpl.partial_format(foo="abc")
 
 fmt_str = partial_prompt_tmpl.format(bar="def")
-
 ```
 
 #### Template Variable Mappings
@@ -201,13 +193,11 @@ But if you're trying to adapt a string template for use with LlamaIndex, it can
 Instead, define `template_var_mappings`:
 
 ```python
-
 template_var_mappings = {"context_str": "my_context", "query_str": "my_query"}
 
 prompt_tmpl = PromptTemplate(
     qa_prompt_tmpl_str, template_var_mappings=template_var_mappings
 )
-
 ```
 
 #### Function Mappings
@@ -225,10 +215,10 @@ def format_context_fn(**kwargs):
     fmtted_context = "\n\n".join([f"- {c}" for c in context_list])
     return fmtted_context
 
+
 prompt_tmpl = PromptTemplate(
     qa_prompt_tmpl_str, function_mappings={"context_str": format_context_fn}
 )
 
-prompt_tmpl.format(context_str="<context>", query_str="<query>")
-
+prompt_tmpl.format(context_str="context", query_str="query")
 ```
diff --git a/docs/module_guides/observability/callbacks/token_counting_migration.md b/docs/module_guides/observability/callbacks/token_counting_migration.md
index fc354dd6b5..d2b03abb02 100644
--- a/docs/module_guides/observability/callbacks/token_counting_migration.md
+++ b/docs/module_guides/observability/callbacks/token_counting_migration.md
@@ -19,8 +19,8 @@ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
 # to the same tokenizer that was used previously for token counting
 # NOTE: The tokenizer should be a function that takes in text and returns a list of tokens
 token_counter = TokenCountingHandler(
-    tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode
-    verbose=False  # set to true to see usage printed to the console
+    tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode,
+    verbose=False,  # set to true to see usage printed to the console
 )
 
 callback_manager = CallbackManager([token_counter])
@@ -40,8 +40,17 @@ token_counter.reset_counts()
 
 # also track prompt, completion, and total LLM tokens, in addition to embeddings
 response = index.as_query_engine().query("What did the author do growing up?")
-print('Embedding Tokens: ', token_counter.total_embedding_token_count, '\n',
-      'LLM Prompt Tokens: ', token_counter.prompt_llm_token_count, '\n',
-      'LLM Completion Tokens: ', token_counter.completion_llm_token_count, '\n',
-      'Total LLM Token Count: ', token_counter.total_llm_token_count)
+print(
+    "Embedding Tokens: ",
+    token_counter.total_embedding_token_count,
+    "\n",
+    "LLM Prompt Tokens: ",
+    token_counter.prompt_llm_token_count,
+    "\n",
+    "LLM Completion Tokens: ",
+    token_counter.completion_llm_token_count,
+    "\n",
+    "Total LLM Token Count: ",
+    token_counter.total_llm_token_count,
+)
 ```
diff --git a/docs/module_guides/observability/observability.md b/docs/module_guides/observability/observability.md
index c1d51099f6..593be02f82 100644
--- a/docs/module_guides/observability/observability.md
+++ b/docs/module_guides/observability/observability.md
@@ -19,7 +19,6 @@ Each provider has similarities and differences. Take a look below for the full s
 To toggle, you will generally just need to do the following:
 
 ```python
-
 from llama_index import set_global_handler
 
 # general usage
@@ -27,7 +26,6 @@ set_global_handler("<handler_name>", **kwargs)
 
 # W&B example
 # set_global_handler("wandb", run_args={"project": "llamaindex"})
-
 ```
 
 Note that all `kwargs` to `set_global_handler` are passed to the underlying callback handler.
@@ -60,6 +58,7 @@ Prompts allows users to log/trace/inspect the execution flow of LlamaIndex durin
 
 ```python
 from llama_index import set_global_handler
+
 set_global_handler("wandb", run_args={"project": "llamaindex"})
 
 # NOTE: No need to do the following
@@ -79,7 +78,6 @@ llama_index.global_handler.persist_index(graph, index_name="composable_graph")
 storage_context = llama_index.global_handler.load_storage_context(
     artifact_url="ayut/llamaindex/composable_graph:v0"
 )
-
 ```
 
 ![](/_static/integrations/wandb.png)
@@ -106,12 +104,14 @@ Arize [Phoenix](https://github.com/Arize-ai/phoenix): LLMOps insights at lightni
 # Phoenix can display in real time the traces automatically
 # collected from your LlamaIndex application.
 import phoenix as px
+
 # Look for a URL in the output to open the App in a browser.
 px.launch_app()
 # The App is initially empty, but as you proceed with the steps below,
 # traces will appear automatically as your LlamaIndex application runs.
 
 import llama_index
+
 llama_index.set_global_handler("arize_phoenix")
 
 # Run all of your LlamaIndex applications as usual and traces
@@ -181,11 +181,11 @@ TruLens allows users to instrument/evaluate LlamaIndex applications, through fea
 ```python
 # use trulens
 from trulens_eval import TruLlama
+
 tru_query_engine = TruLlama(query_engine)
 
 # query
 tru_query_engine.query("What did the author do growing up?")
-
 ```
 
 ![](/_static/integrations/trulens.png)
@@ -209,6 +209,7 @@ HoneyHive allows users to trace the execution flow of any LLM pipeline. Users ca
 
 ```python
 from llama_index import set_global_handler
+
 set_global_handler(
     "honeyhive",
     project="My HoneyHive Project",
diff --git a/docs/module_guides/querying/node_postprocessors/node_postprocessors.md b/docs/module_guides/querying/node_postprocessors/node_postprocessors.md
index 0f6db73613..98551f134c 100644
--- a/docs/module_guides/querying/node_postprocessors/node_postprocessors.md
+++ b/docs/module_guides/querying/node_postprocessors/node_postprocessors.md
@@ -20,8 +20,7 @@ Used to ensure certain keywords are either excluded or included.
 from llama_index.indices.postprocessor import KeywordNodePostprocessor
 
 postprocessor = KeywordNodePostprocessor(
-  required_keywords=["word1", "word2"],
-  exclude_keywords=["word3", "word4"]
+    required_keywords=["word1", "word2"], exclude_keywords=["word3", "word4"]
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -35,7 +34,7 @@ Used to replace the node content with a field from the node metadata. If the fie
 from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
 
 postprocessor = MetadataReplacementPostProcessor(
-  target_metadata_key="window",
+    target_metadata_key="window",
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -67,9 +66,9 @@ The threshold cutoff can be specified instead, which uses a raw similarity cutof
 from llama_index.indices.postprocessor import SentenceEmbeddingOptimizer
 
 postprocessor = SentenceEmbeddingOptimizer(
-  embed_model=service_context.embed_model,
-  percentile_cutoff=0.5,
-  # threshold_cutoff=0.7
+    embed_model=service_context.embed_model,
+    percentile_cutoff=0.5,
+    # threshold_cutoff=0.7
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -87,9 +86,7 @@ Uses the "Cohere ReRank" functionality to re-order nodes, and returns the top N
 from llama_index.indices import CohereRerank
 
 postprocessor = CohereRerank(
-  top_n=2
-  model="rerank-english-v2.0",
-  api_key="YOUR COHERE API KEY"
+    top_n=2, model="rerank-english-v2.0", api_key="YOUR COHERE API KEY"
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -106,8 +103,7 @@ from llama_index.indices.postprocessor import SentenceTransformerRerank
 
 # We choose a model with relatively high speed and decent accuracy.
 postprocessor = SentenceTransformerRerank(
-  model="cross-encoder/ms-marco-MiniLM-L-2-v2",
-  top_n=3
+    model="cross-encoder/ms-marco-MiniLM-L-2-v2", top_n=3
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -124,10 +120,7 @@ Uses a LLM to re-order nodes by asking the LLM to return the relevant documents
 ```python
 from llama_index.indices.postprocessor import LLMRerank
 
-postprocessor = LLMRerank(
-  top_n=2
-  service_context=service_context,
-)
+postprocessor = LLMRerank(top_n=2, service_context=service_context)
 
 postprocessor.postprocess_nodes(nodes)
 ```
@@ -142,8 +135,7 @@ This postproccesor returns the top K nodes sorted by date. This assumes there is
 from llama_index.indices.postprocessor import FixedRecencyPostprocessor
 
 postprocessor = FixedRecencyPostprocessor(
-  tok_k=1,
-  date_key="date"  # the key in the metadata to find the date
+    tok_k=1, date_key="date"  # the key in the metadata to find the date
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -161,9 +153,7 @@ This postproccesor returns the top K nodes after sorting by date and removing ol
 from llama_index.indices.postprocessor import EmbeddingRecencyPostprocessor
 
 postprocessor = EmbeddingRecencyPostprocessor(
-  service_context=service_context,
-  date_key="date",
-  similarity_cutoff=0.7
+    service_context=service_context, date_key="date", similarity_cutoff=0.7
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -178,10 +168,7 @@ This postproccesor returns the top K nodes applying a time-weighted rerank to ea
 ```python
 from llama_index.indices.postprocessor import TimeWeightedPostprocessor
 
-postprocessor = TimeWeightedPostprocessor(
-  time_decay=0.99,
-  top_k=1
-)
+postprocessor = TimeWeightedPostprocessor(time_decay=0.99, top_k=1)
 
 postprocessor.postprocess_nodes(nodes)
 ```
@@ -198,7 +185,7 @@ The PII (Personal Identifiable Information) postprocssor removes information tha
 from llama_index.indices.postprocessor import PIINodePostprocessor
 
 postprocessor = PIINodePostprocessor(
-  service_context=service_context,  # this should be setup with an LLM you trust
+    service_context=service_context  # this should be setup with an LLM you trust
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -228,9 +215,9 @@ This is useful when you know the relationships point to important data (either b
 from llama_index.indices.postprocessor import PrevNextNodePostprocessor
 
 postprocessor = PrevNextNodePostprocessor(
-  docstore=index.docstore,
-  num_nodes=1,  # number of nodes to fetch when looking forawrds or backwards
-  mode="next"   # can be either 'next', 'previous', or 'both'
+    docstore=index.docstore,
+    num_nodes=1,  # number of nodes to fetch when looking forawrds or backwards
+    mode="next",  # can be either 'next', 'previous', or 'both'
 )
 
 postprocessor.postprocess_nodes(nodes)
@@ -246,10 +233,10 @@ The same as PrevNextNodePostprocessor, but lets the LLM decide the mode (next, p
 from llama_index.indices.postprocessor import AutoPrevNextNodePostprocessor
 
 postprocessor = AutoPrevNextNodePostprocessor(
-  docstore=index.docstore,
-  service_context=service_context
-  num_nodes=1,  # number of nodes to fetch when looking forawrds or backwards)
-
+    docstore=index.docstore,
+    service_context=service_context,
+    num_nodes=1,  # number of nodes to fetch when looking forawrds or backwards)
+)
 postprocessor.postprocess_nodes(nodes)
 ```
 
diff --git a/docs/module_guides/querying/node_postprocessors/root.md b/docs/module_guides/querying/node_postprocessors/root.md
index 637573f676..c0c40b79ee 100644
--- a/docs/module_guides/querying/node_postprocessors/root.md
+++ b/docs/module_guides/querying/node_postprocessors/root.md
@@ -21,8 +21,8 @@ from llama_index.indices.postprocessor import SimilarityPostprocessor
 from llama_index.schema import Node, NodeWithScore
 
 nodes = [
-  NodeWithScore(node=Node(text="text"), score=0.7),
-  NodeWithScore(node=Node(text="text"), score=0.8)
+    NodeWithScore(node=Node(text="text"), score=0.7),
+    NodeWithScore(node=Node(text="text"), score=0.8),
 ]
 
 # filter nodes below 0.75 similarity score
@@ -45,11 +45,9 @@ documents = SimpleDirectoryReader("./data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 
 query_engine = index.as_query_engine(
-  node_postprocessors=[
-    TimeWeightedPostprocessor(
-        time_decay=0.5, time_access_refresh=False, top_k=1
-    )
-  ]
+    node_postprocessors=[
+        TimeWeightedPostprocessor(time_decay=0.5, time_access_refresh=False, top_k=1)
+    ]
 )
 
 # all node post-processors will be applied during each query
@@ -79,8 +77,8 @@ from llama_index.indices.postprocessor import SimilarityPostprocessor
 from llama_index.schema import Node, NodeWithScore
 
 nodes = [
-  NodeWithScore(node=Node(text="text"), score=0.7),
-  NodeWithScore(node=Node(text="text"), score=0.8)
+    NodeWithScore(node=Node(text="text"), score=0.7),
+    NodeWithScore(node=Node(text="text"), score=0.8),
 ]
 
 # filter nodes below 0.75 similarity score
@@ -110,12 +108,11 @@ from llama_index import QueryBundle
 from llama_index.indices.postprocessor.base import BaseNodePostprocessor
 from llama_index.schema import NodeWithScore
 
-class DummyNodePostprocessor:
 
+class DummyNodePostprocessor:
     def postprocess_nodes(
         self, nodes: List[NodeWithScore], query_bundle: Optional[QueryBundle]
     ) -> List[NodeWithScore]:
-
         # subtracts 1 from the score
         for n in nodes:
             n.score -= 1
diff --git a/docs/module_guides/querying/output_parser.md b/docs/module_guides/querying/output_parser.md
index c554916695..5cc242a6a0 100644
--- a/docs/module_guides/querying/output_parser.md
+++ b/docs/module_guides/querying/output_parser.md
@@ -15,11 +15,14 @@ from llama_index import VectorStoreIndex, SimpleDirectoryReader
 from llama_index.output_parsers import GuardrailsOutputParser
 from llama_index.llm_predictor import StructuredLLMPredictor
 from llama_index.prompts import PromptTemplate
-from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT_TMPL, DEFAULT_REFINE_PROMPT_TMPL
+from llama_index.prompts.default_prompts import (
+    DEFAULT_TEXT_QA_PROMPT_TMPL,
+    DEFAULT_REFINE_PROMPT_TMPL,
+)
 
 
 # load documents, build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectorStoreIndex(documents, chunk_size=512)
 llm_predictor = StructuredLLMPredictor()
 
@@ -28,7 +31,7 @@ llm_predictor = StructuredLLMPredictor()
 # this is a special LLMPredictor that allows for structured outputs
 
 # define query / output spec
-rail_spec = ("""
+rail_spec = """
 <rail version="0.1">
 
 <output>
@@ -52,10 +55,12 @@ Query string here.
 @json_suffix_prompt_v2_wo_none
 </prompt>
 </rail>
-""")
+"""
 
 # define output parser
-output_parser = GuardrailsOutputParser.from_rail_string(rail_spec, llm=llm_predictor.llm)
+output_parser = GuardrailsOutputParser.from_rail_string(
+    rail_spec, llm=llm_predictor.llm
+)
 
 # format each prompt with output parser instructions
 fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)
@@ -66,9 +71,7 @@ refine_prompt = PromptTemplate(fmt_refine_tmpl, output_parser=output_parser)
 
 # obtain a structured response
 query_engine = index.as_query_engine(
-    service_context=ServiceContext.from_defaults(
-        llm_predictor=llm_predictor
-    ),
+    service_context=ServiceContext.from_defaults(llm_predictor=llm_predictor),
     text_qa_template=qa_prompt,
     refine_template=refine_prompt,
 )
@@ -76,7 +79,6 @@ response = query_engine.query(
     "What are the three items the author did growing up?",
 )
 print(response)
-
 ```
 
 Output:
@@ -94,19 +96,27 @@ from llama_index import VectorStoreIndex, SimpleDirectoryReader
 from llama_index.output_parsers import LangchainOutputParser
 from llama_index.llm_predictor import StructuredLLMPredictor
 from llama_index.prompts import PromptTemplate
-from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT_TMPL, DEFAULT_REFINE_PROMPT_TMPL
+from llama_index.prompts.default_prompts import (
+    DEFAULT_TEXT_QA_PROMPT_TMPL,
+    DEFAULT_REFINE_PROMPT_TMPL,
+)
 from langchain.output_parsers import StructuredOutputParser, ResponseSchema
 
 
 # load documents, build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 llm_predictor = StructuredLLMPredictor()
 
 # define output schema
 response_schemas = [
-    ResponseSchema(name="Education", description="Describes the author's educational experience/background."),
-    ResponseSchema(name="Work", description="Describes the author's work experience/background.")
+    ResponseSchema(
+        name="Education",
+        description="Describes the author's educational experience/background.",
+    ),
+    ResponseSchema(
+        name="Work", description="Describes the author's work experience/background."
+    ),
 ]
 
 # define output parser
@@ -121,9 +131,7 @@ refine_prompt = PromptTemplate(fmt_refine_tmpl, output_parser=output_parser)
 
 # query index
 query_engine = index.as_query_engine(
-    service_context=ServiceContext.from_defaults(
-        llm_predictor=llm_predictor
-    ),
+    service_context=ServiceContext.from_defaults(llm_predictor=llm_predictor),
     text_qa_template=qa_prompt,
     refine_template=refine_prompt,
 )
diff --git a/docs/module_guides/querying/response_synthesizers/response_synthesizers.md b/docs/module_guides/querying/response_synthesizers/response_synthesizers.md
index d72be2d9c5..66275a3d94 100644
--- a/docs/module_guides/querying/response_synthesizers/response_synthesizers.md
+++ b/docs/module_guides/querying/response_synthesizers/response_synthesizers.md
@@ -20,26 +20,26 @@ from llama_index.schema import Node, NodeWithScore
 from llama_index import get_response_synthesizer
 
 response_synthesizer = get_response_synthesizer(
-  response_mode="refine",
-  service_context=service_context,
-  text_qa_template=text_qa_template,
-  refine_template=refine_template,
-  use_async=False,
-  streaming=False
+    response_mode="refine",
+    service_context=service_context,
+    text_qa_template=text_qa_template,
+    refine_template=refine_template,
+    use_async=False,
+    streaming=False,
 )
 
 # synchronous
 response = response_synthesizer.synthesize(
-  "query string",
-  nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ..],
-  additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ..],
+    "query string",
+    nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
+    additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
 )
 
 # asynchronous
 response = await response_synthesizer.asynthesize(
-  "query string",
-  nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ..],
-  additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ..],
+    "query string",
+    nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
+    additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
 )
 ```
 
@@ -47,8 +47,7 @@ You can also directly return a string, using the lower-level `get_response` and
 
 ```python
 response_str = response_synthesizer.get_response(
-  "query string",
-  text_chunks=["text1", "text2", ...]
+    "query string", text_chunks=["text1", "text2", ...]
 )
 ```
 
diff --git a/docs/module_guides/querying/response_synthesizers/root.md b/docs/module_guides/querying/response_synthesizers/root.md
index 928fe0b18b..d777736320 100644
--- a/docs/module_guides/querying/response_synthesizers/root.md
+++ b/docs/module_guides/querying/response_synthesizers/root.md
@@ -44,11 +44,10 @@ Configuring the response synthesizer for a query engine using `response_mode`:
 from llama_index.schema import Node, NodeWithScore
 from llama_index.response_synthesizers import get_response_synthesizer
 
-response_synthesizer = get_response_synthesizer(response_mode='compact')
+response_synthesizer = get_response_synthesizer(response_mode="compact")
 
 response = response_synthesizer.synthesize(
-  "query text",
-  nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ..]
+    "query text", nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...]
 )
 ```
 
@@ -185,7 +184,6 @@ You can specify these additional variables in the `**kwargs` for `get_response`.
 For example,
 
 ```python
-
 from llama_index import PromptTemplate
 from llama_index.response_synthesizers import TreeSummarize
 
@@ -207,7 +205,9 @@ qa_prompt = PromptTemplate(qa_prompt_tmpl)
 summarizer = TreeSummarize(verbose=True, summary_template=qa_prompt)
 
 # get response
-response = summarizer.get_response("who is Paul Graham?", [text], tone_name="a Shakespeare play")
+response = summarizer.get_response(
+    "who is Paul Graham?", [text], tone_name="a Shakespeare play"
+)
 ```
 
 ## Modules
diff --git a/docs/module_guides/querying/retriever/root.md b/docs/module_guides/querying/retriever/root.md
index da40168d2b..6a3e6812a6 100644
--- a/docs/module_guides/querying/retriever/root.md
+++ b/docs/module_guides/querying/retriever/root.md
@@ -31,7 +31,7 @@ retriever = index.as_retriever()
 Retrieve relevant context for a question:
 
 ```python
-nodes = retriever.retrieve('Who is Paul Graham?')
+nodes = retriever.retrieve("Who is Paul Graham?")
 ```
 
 > Note: To learn how to build an index, see [Indexing](/module_guides/indexing/indexing.md)
@@ -45,7 +45,7 @@ For example, with a `SummaryIndex`:
 
 ```python
 retriever = summary_index.as_retriever(
-    retriever_mode='llm',
+    retriever_mode="llm",
 )
 ```
 
@@ -72,10 +72,9 @@ For example, if we selected the "llm" retriever mode, we might do the following:
 
 ```python
 retriever = summary_index.as_retriever(
-    retriever_mode='llm',
+    retriever_mode="llm",
     choice_batch_size=5,
 )
-
 ```
 
 ## Low-Level Composition API
diff --git a/docs/module_guides/querying/router/root.md b/docs/module_guides/querying/router/root.md
index d8c5dbe443..689b6ccb5d 100644
--- a/docs/module_guides/querying/router/root.md
+++ b/docs/module_guides/querying/router/root.md
@@ -80,7 +80,6 @@ selector = PydanticMultiSelector.from_defaults()
 selector = LLMSingleSelector.from_defaults()
 # multi selector (LLM)
 selector = LLMMultiSelector.from_defaults()
-
 ```
 
 ## Using as a Query Engine
@@ -118,7 +117,6 @@ query_engine = RouterQueryEngine(
     ],
 )
 query_engine.query("<query>")
-
 ```
 
 ## Using as a Retriever
@@ -155,7 +153,6 @@ retriever = RouterRetriever(
         vector_tool,
     ],
 )
-
 ```
 
 ## Using selector as a standalone module
@@ -177,9 +174,10 @@ choices = [
 choices = ["choice 1 - description for choice 1", "choice 2: description for choice 2"]
 
 selector = LLMSingleSelector.from_defaults()
-selector_result = selector.select(choices, query="What's revenue growth for IBM in 2007?")
+selector_result = selector.select(
+    choices, query="What's revenue growth for IBM in 2007?"
+)
 print(selector_result.selections)
-
 ```
 
 ```{toctree}
diff --git a/docs/module_guides/storing/customization.md b/docs/module_guides/storing/customization.md
index a1154a9b78..3ff9c7a8d8 100644
--- a/docs/module_guides/storing/customization.md
+++ b/docs/module_guides/storing/customization.md
@@ -5,7 +5,7 @@ By default, LlamaIndex hides away the complexities and let you query your data i
 ```python
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 query_engine = index.as_query_engine()
 response = query_engine.query("Summarize the documents.")
@@ -57,19 +57,20 @@ index.storage_context.persist(persist_dir="<persist_dir>")
 
 # to load index later, make sure you setup the storage context
 # this will loaded the persisted stores from persist_dir
-storage_context = StorageContext.from_defaults(
-    persist_dir="<persist_dir>"
-)
+storage_context = StorageContext.from_defaults(persist_dir="<persist_dir>")
 
 # then load the index object
 from llama_index import load_index_from_storage
+
 loaded_index = load_index_from_storage(storage_context)
 
 # if loading an index from a persist_dir containing multiple indexes
 loaded_index = load_index_from_storage(storage_context, index_id="<index_id>")
 
 # if loading multiple indexes from a persist dir
-loaded_indicies = load_index_from_storage(storage_context, index_ids=["<index_id>", ...])
+loaded_indicies = load_index_from_storage(
+    storage_context, index_ids=["<index_id>", ...]
+)
 ```
 
 You can customize the underlying storage with a one-line change to instantiate different document stores, index stores, and vector stores.
@@ -110,12 +111,7 @@ from llama_index.vector_stores import PineconeVectorStore
 # Creating a Pinecone index
 api_key = "api_key"
 pinecone.init(api_key=api_key, environment="us-west1-gcp")
-pinecone.create_index(
-    "quickstart",
-    dimension=1536,
-    metric="euclidean",
-    pod_type="p1"
-)
+pinecone.create_index("quickstart", dimension=1536, metric="euclidean", pod_type="p1")
 index = pinecone.Index("quickstart")
 
 # construct vector store
diff --git a/docs/module_guides/storing/docstores.md b/docs/module_guides/storing/docstores.md
index 04e5e581b2..2ca2075df4 100644
--- a/docs/module_guides/storing/docstores.md
+++ b/docs/module_guides/storing/docstores.md
@@ -59,9 +59,7 @@ nodes = parser.get_nodes_from_documents(documents)
 
 # create (or load) docstore and add nodes
 docstore = RedisDocumentStore.from_host_and_port(
-  host="127.0.0.1",
-  port="6379",
-  namespace='llama_index'
+    host="127.0.0.1", port="6379", namespace="llama_index"
 )
 docstore.add_documents(nodes)
 
@@ -94,8 +92,8 @@ nodes = parser.get_nodes_from_documents(documents)
 
 # create (or load) docstore and add nodes
 docstore = FirestoreDocumentStore.from_dataabse(
-  project="project-id",
-  database="(default)",
+    project="project-id",
+    database="(default)",
 )
 docstore.add_documents(nodes)
 
diff --git a/docs/module_guides/storing/index_stores.md b/docs/module_guides/storing/index_stores.md
index 1b44500b3b..d13fa69fcd 100644
--- a/docs/module_guides/storing/index_stores.md
+++ b/docs/module_guides/storing/index_stores.md
@@ -28,6 +28,7 @@ index = VectorStoreIndex(nodes, storage_context=storage_context)
 
 # or alternatively, load index
 from llama_index import load_index_from_storage
+
 index = load_index_from_storage(storage_context)
 ```
 
@@ -52,9 +53,7 @@ from llama_index import VectorStoreIndex
 
 # create (or load) docstore and add nodes
 index_store = RedisIndexStore.from_host_and_port(
-  host="127.0.0.1",
-  port="6379",
-  namespace='llama_index'
+    host="127.0.0.1", port="6379", namespace="llama_index"
 )
 
 # create storage context
@@ -65,6 +64,7 @@ index = VectorStoreIndex(nodes, storage_context=storage_context)
 
 # or alternatively, load index
 from llama_index import load_index_from_storage
+
 index = load_index_from_storage(storage_context)
 ```
 
diff --git a/docs/module_guides/storing/save_load.md b/docs/module_guides/storing/save_load.md
index d7871e3e20..da5c6f23b2 100644
--- a/docs/module_guides/storing/save_load.md
+++ b/docs/module_guides/storing/save_load.md
@@ -30,7 +30,11 @@ storage_context = StorageContext.from_defaults(
 We can then load specific indices from the `StorageContext` through some convenience functions below.
 
 ```python
-from llama_index import load_index_from_storage, load_indices_from_storage, load_graph_from_storage
+from llama_index import (
+    load_index_from_storage,
+    load_indices_from_storage,
+    load_graph_from_storage,
+)
 
 # load a single index
 # need to specify index_id if multiple indexes are persisted to the same directory
@@ -40,11 +44,15 @@ index = load_index_from_storage(storage_context, index_id="<index_id>")
 index = load_index_from_storage(storage_context)
 
 # load multiple indices
-indices = load_indices_from_storage(storage_context) # loads all indices
-indices = load_indices_from_storage(storage_context, index_ids=[index_id1, ...]) # loads specific indices
+indices = load_indices_from_storage(storage_context)  # loads all indices
+indices = load_indices_from_storage(
+    storage_context, index_ids=[index_id1, ...]
+)  # loads specific indices
 
 # load composable graph
-graph = load_graph_from_storage(storage_context, root_id="<root_id>") # loads graph with the specified root_id
+graph = load_graph_from_storage(
+    storage_context, root_id="<root_id>"
+)  # loads graph with the specified root_id
 ```
 
 Here's the full [API Reference on saving and loading](/api_reference/storage/indices_save_load.rst).
@@ -59,10 +67,13 @@ Here's a simple example, instantiating a vector store:
 import dotenv
 import s3fs
 import os
+
 dotenv.load_dotenv("../../../.env")
 
 # load documents
-documents = SimpleDirectoryReader('../../../examples/paul_graham_essay/data/').load_data()
+documents = SimpleDirectoryReader(
+    "../../../examples/paul_graham_essay/data/"
+).load_data()
 print(len(documents))
 index = VectorStoreIndex.from_documents(documents)
 ```
@@ -71,17 +82,17 @@ At this point, everything has been the same. Now - let's instantiate a S3 filesy
 
 ```python
 # set up s3fs
-AWS_KEY = os.environ['AWS_ACCESS_KEY_ID']
-AWS_SECRET = os.environ['AWS_SECRET_ACCESS_KEY']
-R2_ACCOUNT_ID = os.environ['R2_ACCOUNT_ID']
+AWS_KEY = os.environ["AWS_ACCESS_KEY_ID"]
+AWS_SECRET = os.environ["AWS_SECRET_ACCESS_KEY"]
+R2_ACCOUNT_ID = os.environ["R2_ACCOUNT_ID"]
 
 assert AWS_KEY is not None and AWS_KEY != ""
 
 s3 = s3fs.S3FileSystem(
-   key=AWS_KEY,
-   secret=AWS_SECRET,
-   endpoint_url=f'https://{R2_ACCOUNT_ID}.r2.cloudflarestorage.com',
-   s3_additional_kwargs={'ACL': 'public-read'}
+    key=AWS_KEY,
+    secret=AWS_SECRET,
+    endpoint_url=f"https://{R2_ACCOUNT_ID}.r2.cloudflarestorage.com",
+    s3_additional_kwargs={"ACL": "public-read"},
 )
 
 # If you're using 2+ indexes with the same StorageContext,
@@ -89,13 +100,13 @@ s3 = s3fs.S3FileSystem(
 index.set_index_id("vector_index")
 
 # persist index to s3
-s3_bucket_name = 'llama-index/storage_demo'  # {bucket_name}/{index_name}
+s3_bucket_name = "llama-index/storage_demo"  # {bucket_name}/{index_name}
 index.storage_context.persist(persist_dir=s3_bucket_name, fs=s3)
 
 # load index from s3
 index_from_s3 = load_index_from_storage(
     StorageContext.from_defaults(persist_dir=s3_bucket_name, fs=s3),
-    index_id='vector_index'
+    index_id="vector_index",
 )
 ```
 
diff --git a/docs/module_guides/storing/storing.md b/docs/module_guides/storing/storing.md
index a75f89abde..32275d18d7 100644
--- a/docs/module_guides/storing/storing.md
+++ b/docs/module_guides/storing/storing.md
@@ -27,15 +27,13 @@ We have confirmed support for the following storage backends:
 Many vector stores (except FAISS) will store both the data as well as the index (embeddings). This means that you will not need to use a separate document store or index store. This _also_ means that you will not need to explicitly persist this data - this happens automatically. Usage would look something like the following to build a new index / reload an existing one.
 
 ```python
-
 ## build a new index
 from llama_index import VectorStoreIndex, StorageContext
 from llama_index.vector_stores import DeepLakeVectorStore
+
 # construct vector store and customize storage context
 vector_store = DeepLakeVectorStore(dataset_path="<dataset_path>")
-storage_context = StorageContext.from_defaults(
-    vector_store = vector_store
-)
+storage_context = StorageContext.from_defaults(vector_store=vector_store)
 # Load documents and build index
 index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 
diff --git a/docs/module_guides/supporting_modules/service_context.md b/docs/module_guides/supporting_modules/service_context.md
index 8fa4a07f17..6017f52ff6 100644
--- a/docs/module_guides/supporting_modules/service_context.md
+++ b/docs/module_guides/supporting_modules/service_context.md
@@ -11,7 +11,7 @@ You can use it to set the [global configuration](#setting-global-configuration),
 
 The `ServiceContext` is a simple python dataclass that you can directly construct by passing in the desired components.
 
-```python
+```
 @dataclass
 class ServiceContext:
     # The LLM used to generate natural language responses to queries.
@@ -72,23 +72,20 @@ from llama_index.llms import OpenAI
 from llama_index.text_splitter import TokenTextSplitter
 from llama_index.node_parser import SimpleNodeParser
 
-llm = OpenAI(model='text-davinci-003', temperature=0, max_tokens=256)
+llm = OpenAI(model="text-davinci-003", temperature=0, max_tokens=256)
 embed_model = OpenAIEmbedding()
 node_parser = SimpleNodeParser.from_defaults(
-  text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)
+    text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)
 )
 prompt_helper = PromptHelper(
-  context_window=4096,
-  num_output=256,
-  chunk_overlap_ratio=0.1,
-  chunk_size_limit=None
+    context_window=4096, num_output=256, chunk_overlap_ratio=0.1, chunk_size_limit=None
 )
 
 service_context = ServiceContext.from_defaults(
-  llm=llm,
-  embed_model=embed_model,
-  node_parser=node_parser,
-  prompt_helper=prompt_helper
+    llm=llm,
+    embed_model=embed_model,
+    node_parser=node_parser,
+    prompt_helper=prompt_helper,
 )
 ```
 
@@ -98,6 +95,7 @@ You can set a service context as the global default that applies to the entire L
 
 ```python
 from llama_index import set_global_service_context
+
 set_global_service_context(service_context)
 ```
 
diff --git a/docs/optimizing/advanced_retrieval/query_transformations.md b/docs/optimizing/advanced_retrieval/query_transformations.md
index 5e8b9ce895..68e6512d4e 100644
--- a/docs/optimizing/advanced_retrieval/query_transformations.md
+++ b/docs/optimizing/advanced_retrieval/query_transformations.md
@@ -31,7 +31,7 @@ from llama_index.indices.query.query_transform.base import HyDEQueryTransform
 from llama_index.query_engine.transform_query_engine import TransformQueryEngine
 
 # load documents, build index
-documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()
+documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
 index = VectorStoreIndex(documents)
 
 # run query with HyDE query transform
@@ -41,7 +41,6 @@ query_engine = index.as_query_engine()
 query_engine = TransformQueryEngine(query_engine, query_transform=hyde)
 response = query_engine.query(query_str)
 print(response)
-
 ```
 
 Check out our [example notebook](https://github.com/jerryjliu/llama_index/blob/main/docs/examples/query_transformations/HyDEQueryTransformDemo.ipynb) for a full walkthrough.
@@ -64,13 +63,11 @@ An example image is shown below.
 Here's a corresponding example code snippet over a composed graph.
 
 ```python
-
 # Setting: a summary index composed over multiple vector indices
 # llm_predictor_chatgpt corresponds to the ChatGPT LLM interface
 from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
-decompose_transform = DecomposeQueryTransform(
-    llm_predictor_chatgpt, verbose=True
-)
+
+decompose_transform = DecomposeQueryTransform(llm_predictor_chatgpt, verbose=True)
 
 # initialize indexes and graph
 ...
@@ -80,17 +77,13 @@ decompose_transform = DecomposeQueryTransform(
 vector_query_engine = vector_index.as_query_engine()
 vector_query_engine = TransformQueryEngine(
     vector_query_engine,
-    query_transform=decompose_transform
-    transform_extra_info={'index_summary': vector_index.index_struct.summary}
+    query_transform=decompose_transform,
+    transform_extra_info={"index_summary": vector_index.index_struct.summary},
 )
-custom_query_engines = {
-    vector_index.index_id: vector_query_engine
-}
+custom_query_engines = {vector_index.index_id: vector_query_engine}
 
 # query
-query_str = (
-    "Compare and contrast the airports in Seattle, Houston, and Toronto. "
-)
+query_str = "Compare and contrast the airports in Seattle, Houston, and Toronto. "
 query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)
 response = query_engine.query(query_str)
 ```
@@ -112,19 +105,19 @@ Here's a corresponding example code snippet.
 
 ```python
 from llama_index.indices.query.query_transform.base import StepDecomposeQueryTransform
+
 # gpt-4
-step_decompose_transform = StepDecomposeQueryTransform(
-    llm_predictor, verbose=True
-)
+step_decompose_transform = StepDecomposeQueryTransform(llm_predictor, verbose=True)
 
 query_engine = index.as_query_engine()
-query_engine = MultiStepQueryEngine(query_engine, query_transform=step_decompose_transform)
+query_engine = MultiStepQueryEngine(
+    query_engine, query_transform=step_decompose_transform
+)
 
 response = query_engine.query(
     "Who was in the first batch of the accelerator program the author started?",
 )
 print(str(response))
-
 ```
 
 Check out our [example notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/vector_indices/SimpleIndexDemo-multistep.ipynb) for a full walkthrough.
diff --git a/docs/optimizing/advanced_retrieval/structured_outputs/query_engine.md b/docs/optimizing/advanced_retrieval/structured_outputs/query_engine.md
index 6b1fc43570..9d9b6cc7d2 100644
--- a/docs/optimizing/advanced_retrieval/structured_outputs/query_engine.md
+++ b/docs/optimizing/advanced_retrieval/structured_outputs/query_engine.md
@@ -20,6 +20,7 @@ First, you need to define the object you want to extract.
 from typing import List
 from pydantic import BaseModel
 
+
 class Biography(BaseModel):
     """Data model for a biography."""
 
@@ -31,7 +32,9 @@ class Biography(BaseModel):
 Then, you create your query engine.
 
 ```python
-query_engine = index.as_query_engine(response_mode="tree_summarize", output_cls=Biography)
+query_engine = index.as_query_engine(
+    response_mode="tree_summarize", output_cls=Biography
+)
 ```
 
 Lastly, you can get a response and inspect the output.
@@ -40,11 +43,11 @@ Lastly, you can get a response and inspect the output.
 response = query_engine.query("Who is Paul Graham?")
 
 print(response.name)
-> 'Paul Graham'
+# > 'Paul Graham'
 print(response.best_known_for)
-> ['working on Bel', 'co-founding Viaweb', 'creating the programming language Arc']
+# > ['working on Bel', 'co-founding Viaweb', 'creating the programming language Arc']
 print(response.extra_info)
-> "Paul Graham is a computer scientist, entrepreneur, and writer. He is best known for ..."
+# > "Paul Graham is a computer scientist, entrepreneur, and writer. He is best known      for ..."
 ```
 
 ## Modules
diff --git a/docs/optimizing/basic_strategies/basic_strategies.md b/docs/optimizing/basic_strategies/basic_strategies.md
index 0588524c51..c73b23963c 100644
--- a/docs/optimizing/basic_strategies/basic_strategies.md
+++ b/docs/optimizing/basic_strategies/basic_strategies.md
@@ -116,12 +116,10 @@ from llama_index.vector_stores import MetadataFilters, ExactMatchFilter
 
 documents = [
     Document(text="text", metadata={"author": "LlamaIndex"}),
-    Document(text="text", metadata={"author": "John Doe"})
+    Document(text="text", metadata={"author": "John Doe"}),
 ]
 
-filters = MetadataFilters(filters=[
-    ExactMatchFilter(key="author", value="John Doe")
-])
+filters = MetadataFilters(filters=[ExactMatchFilter(key="author", value="John Doe")])
 
 index = VectorStoreIndex.from_documents(documents)
 query_engine = index.as_query_engine(filters=filters)
diff --git a/docs/understanding/evaluating/cost_analysis/usage_pattern.md b/docs/understanding/evaluating/cost_analysis/usage_pattern.md
index f72d070130..4abc5d09b8 100644
--- a/docs/understanding/evaluating/cost_analysis/usage_pattern.md
+++ b/docs/understanding/evaluating/cost_analysis/usage_pattern.md
@@ -34,9 +34,7 @@ from llama_index import ServiceContext, set_global_service_context
 
 set_global_service_context(
     ServiceContext.from_defaults(
-        llm=llm,
-        embed_model=embed_model,
-        callback_manager=callback_manager
+        llm=llm, embed_model=embed_model, callback_manager=callback_manager
     )
 )
 ```
diff --git a/docs/understanding/loading/llamahub.md b/docs/understanding/loading/llamahub.md
index 680666dc62..623b356157 100644
--- a/docs/understanding/loading/llamahub.md
+++ b/docs/understanding/loading/llamahub.md
@@ -12,7 +12,7 @@ Get started with:
 ```python
 from llama_index import download_loader
 
-GoogleDocsReader = download_loader('GoogleDocsReader')
+GoogleDocsReader = download_loader("GoogleDocsReader")
 loader = GoogleDocsReader()
 documents = loader.load_data(document_ids=[...])
 ```
@@ -24,7 +24,7 @@ documents = loader.load_data(document_ids=[...])
 ```python
 from llama_index import SimpleDirectoryReader
 
-documents = SimpleDirectoryReader('./data').load_data()
+documents = SimpleDirectoryReader("./data").load_data()
 ```
 
 ## Available connectors
diff --git a/docs/understanding/loading/loading.md b/docs/understanding/loading/loading.md
index 6197475eee..4d9ce537b7 100644
--- a/docs/understanding/loading/loading.md
+++ b/docs/understanding/loading/loading.md
@@ -9,7 +9,7 @@ The easiest reader to use is our SimpleDirectoryReader, which creates documents
 ```python
 from llama_index import SimpleDirectoryReader
 
-documents = SimpleDirectoryReader('./data').load_data()
+documents = SimpleDirectoryReader("./data").load_data()
 ```
 
 ## Using Readers from LlamaHub
@@ -21,15 +21,15 @@ In this example LlamaIndex downloads and installs the connector called [Database
 ```python
 from llama_index import download_loader
 
-DatabaseReader = download_loader('DatabaseReader')
+DatabaseReader = download_loader("DatabaseReader")
 
 reader = DatabaseReader(
-    scheme = os.getenv('DB_SCHEME'),
-    host = os.getenv('DB_HOST'),
-    port = os.getenv('DB_PORT'),
-    user = os.getenv('DB_USER'),
-    password = os.getenv('DB_PASS'),
-    dbname = os.getenv('DB_NAME')
+    scheme=os.getenv("DB_SCHEME"),
+    host=os.getenv("DB_HOST"),
+    port=os.getenv("DB_PORT"),
+    user=os.getenv("DB_USER"),
+    password=os.getenv("DB_PASS"),
+    dbname=os.getenv("DB_NAME"),
 )
 
 query = "SELECT * FROM users"
@@ -78,7 +78,7 @@ from llama_index.schema import TextNode
 node1 = TextNode(text="<text_chunk>", id_="<node_id>")
 node2 = TextNode(text="<text_chunk>", id_="<node_id>")
 
-index = VectorStoreIndex([node1,node2])
+index = VectorStoreIndex([node1, node2])
 ```
 
 ## Customizing Documents
@@ -87,11 +87,7 @@ When creating documents, you can also attach useful metadata that can be used at
 
 ```python
 document = Document(
-    text='text',
-    metadata={
-        'filename': '<doc_file_name>',
-        'category': '<category>'
-    }
+    text="text", metadata={"filename": "<doc_file_name>", "category": "<category>"}
 )
 ```
 
diff --git a/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md b/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md
index fd3e7a607b..f5a899b149 100644
--- a/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md
+++ b/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md
@@ -25,10 +25,12 @@ from flask import Flask
 
 app = Flask(__name__)
 
+
 @app.route("/")
 def home():
     return "Hello World!"
 
+
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=5601)
 ```
@@ -50,10 +52,11 @@ import os
 from llama_index import SimpleDirectoryReader, VectorStoreIndex, StorageContext
 
 # NOTE: for local testing only, do NOT deploy with your key hardcoded
-os.environ['OPENAI_API_KEY'] = "your key here"
+os.environ["OPENAI_API_KEY"] = "your key here"
 
 index = None
 
+
 def initialize_index():
     global index
     storage_context = StorageContext.from_defaults()
@@ -61,7 +64,9 @@ def initialize_index():
         index = load_index_from_storage(storage_context)
     else:
         documents = SimpleDirectoryReader("./documents").load_data()
-        index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+        index = VectorStoreIndex.from_documents(
+            documents, storage_context=storage_context
+        )
         storage_context.persist(index_dir)
 ```
 
@@ -72,15 +77,16 @@ Our query endpoint will accept `GET` requests with the query text as a parameter
 ```python
 from flask import request
 
+
 @app.route("/query", methods=["GET"])
 def query_index():
-  global index
-  query_text = request.args.get("text", None)
-  if query_text is None:
-    return "No text found, please include a ?text=blah parameter in the URL", 400
-  query_engine = index.as_query_engine()
-  response = query_engine.query(query_text)
-  return str(response), 200
+    global index
+    query_text = request.args.get("text", None)
+    if query_text is None:
+        return "No text found, please include a ?text=blah parameter in the URL", 400
+    query_engine = index.as_query_engine()
+    response = query_engine.query(query_text)
+    return str(response), 200
 ```
 
 Now, we've introduced a few new concepts to our server:
@@ -113,22 +119,26 @@ from multiprocessing.managers import BaseManager
 from llama_index import SimpleDirectoryReader, VectorStoreIndex, Document
 
 # NOTE: for local testing only, do NOT deploy with your key hardcoded
-os.environ['OPENAI_API_KEY'] = "your key here"
+os.environ["OPENAI_API_KEY"] = "your key here"
 
 index = None
 lock = Lock()
 
+
 def initialize_index():
-  global index
+    global index
+
+    with lock:
+        # same as before ...
+        pass
 
-  with lock:
-    # same as before ...
 
 def query_index(query_text):
-  global index
-  query_engine = index.as_query_engine()
-  response = query_engine.query(query_text)
-  return str(response)
+    global index
+    query_engine = index.as_query_engine()
+    response = query_engine.query(query_text)
+    return str(response)
+
 
 if __name__ == "__main__":
     # init the global index
@@ -137,8 +147,8 @@ if __name__ == "__main__":
 
     # setup server
     # NOTE: you might want to handle the password in a less hardcoded way
-    manager = BaseManager(('', 5602), b'password')
-    manager.register('query_index', query_index)
+    manager = BaseManager(("", 5602), b"password")
+    manager.register("query_index", query_index)
     server = manager.get_server()
 
     print("starting server...")
@@ -157,26 +167,28 @@ from flask import Flask, request
 
 # initialize manager connection
 # NOTE: you might want to handle the password in a less hardcoded way
-manager = BaseManager(('', 5602), b'password')
-manager.register('query_index')
+manager = BaseManager(("", 5602), b"password")
+manager.register("query_index")
 manager.connect()
 
+
 @app.route("/query", methods=["GET"])
 def query_index():
-  global index
-  query_text = request.args.get("text", None)
-  if query_text is None:
-    return "No text found, please include a ?text=blah parameter in the URL", 400
-  response = manager.query_index(query_text)._getvalue()
-  return str(response), 200
+    global index
+    query_text = request.args.get("text", None)
+    if query_text is None:
+        return "No text found, please include a ?text=blah parameter in the URL", 400
+    response = manager.query_index(query_text)._getvalue()
+    return str(response), 200
+
 
 @app.route("/")
 def home():
     return "Hello World!"
 
+
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=5601)
-
 ```
 
 _flask_demo.py_
@@ -189,20 +201,21 @@ If we allow users to upload their own documents, we should probably remove the P
 
 ```python
 ...
-manager.register('insert_into_index')
+manager.register("insert_into_index")
 ...
 
+
 @app.route("/uploadFile", methods=["POST"])
 def upload_file():
     global manager
-    if 'file' not in request.files:
+    if "file" not in request.files:
         return "Please send a POST request with a file", 400
 
     filepath = None
     try:
         uploaded_file = request.files["file"]
         filename = secure_filename(uploaded_file.filename)
-        filepath = os.path.join('documents', os.path.basename(filename))
+        filepath = os.path.join("documents", os.path.basename(filename))
         uploaded_file.save(filepath)
 
         if request.form.get("filename_as_doc_id", None) is not None:
@@ -239,8 +252,9 @@ def insert_into_index(doc_text, doc_id=None):
         index.insert(document)
         index.storage_context.persist()
 
+
 ...
-manager.register('insert_into_index', insert_into_index)
+manager.register("insert_into_index", insert_into_index)
 ...
 ```
 
diff --git a/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md b/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md
index c7d6456df7..2b8f1c6ad5 100644
--- a/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md
+++ b/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md
@@ -116,10 +116,12 @@ in the `endpoints.py` file:
 
 ```python
 @collections_router.post("/create")
-async def create_collection(request,
-                            title: str = Form(...),
-                            description: str = Form(...),
-                            files: list[UploadedFile] = File(...), ):
+async def create_collection(
+    request,
+    title: str = Form(...),
+    description: str = Form(...),
+    files: list[UploadedFile] = File(...),
+):
     key = None if getattr(request, "auth", None) is None else request.auth
     if key is not None:
         key = await key
@@ -141,9 +143,7 @@ async def create_collection(request,
 
     create_index.si(collection_instance.id).apply_async()
 
-    return await sync_to_async(CollectionModelSchema)(
-        ...
-    )
+    return await sync_to_async(CollectionModelSchema)(...)
 ```
 
 3. `/collections/query` — a POST endpoint to query a document collection using the LLM. Accepts a JSON payload
@@ -152,9 +152,11 @@ async def create_collection(request,
    to this REST endpoint to query a specific collection.
 
 ```python
-@collections_router.post("/query",
-                         response=CollectionQueryOutput,
-                         summary="Ask a question of a document collection", )
+@collections_router.post(
+    "/query",
+    response=CollectionQueryOutput,
+    summary="Ask a question of a document collection",
+)
 def query_collection_view(request: HttpRequest, query_input: CollectionQueryInput):
     collection_id = query_input.collection_id
     query_str = query_input.query_str
@@ -166,9 +168,11 @@ def query_collection_view(request: HttpRequest, query_input: CollectionQueryInpu
    output is serialized using the `CollectionModelSchema`.
 
 ```python
-@collections_router.get("/available",
-                        response=list[CollectionModelSchema],
-                        summary="Get a list of all of the collections created with my api_key", )
+@collections_router.get(
+    "/available",
+    response=list[CollectionModelSchema],
+    summary="Get a list of all of the collections created with my api_key",
+)
 async def get_my_collections_view(request: HttpRequest):
     key = None if getattr(request, "auth", None) is None else request.auth
     if key is not None:
@@ -176,12 +180,7 @@ async def get_my_collections_view(request: HttpRequest):
 
     collections = Collection.objects.filter(api_key=key)
 
-    return [
-        {
-            ...
-        }
-        async for collection in collections
-    ]
+    return [{...} async for collection in collections]
 ```
 
 5. `/collections/{collection_id}/add_file`: A POST endpoint to add a file to an existing collection. Accepts
@@ -189,11 +188,15 @@ async def get_my_collections_view(request: HttpRequest):
    instance associated with the specified collection.
 
 ```python
-@collections_router.post("/{collection_id}/add_file", summary="Add a file to a collection")
-async def add_file_to_collection(request,
-                                 collection_id: int,
-                                 file: UploadedFile = File(...),
-                                 description: str = Form(...), ):
+@collections_router.post(
+    "/{collection_id}/add_file", summary="Add a file to a collection"
+)
+async def add_file_to_collection(
+    request,
+    collection_id: int,
+    file: UploadedFile = File(...),
+    description: str = Form(...),
+):
     collection = await sync_to_async(Collection.objects.get)(id=collection_id)
 ```
 
@@ -312,7 +315,9 @@ async def receive(self, text_data):
 
         await self.send(json.dumps({"response": formatted_response}, indent=4))
     else:
-        await self.send(json.dumps({"error": "No index loaded for this connection."}, indent=4))
+        await self.send(
+            json.dumps({"error": "No index loaded for this connection."}, indent=4)
+        )
 ```
 
 To load the collection model, the `load_collection_model` function is used, which can be found
diff --git a/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md b/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md
index da85608473..3e1ac4c2da 100644
--- a/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md
+++ b/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md
@@ -28,7 +28,7 @@ nest_asyncio.apply()
 
 Let's first download the raw 10-k files, from 2019-2022.
 
-```python
+```
 # NOTE: the code examples assume you're operating within a Jupyter notebook.
 # download files
 !mkdir data
@@ -40,7 +40,7 @@ To parse the HTML files into formatted text, we use the [Unstructured](https://g
 
 First we install the necessary packages:
 
-```python
+```
 !pip install llama-hub unstructured
 ```
 
diff --git a/docs/understanding/putting_it_all_together/q_and_a.md b/docs/understanding/putting_it_all_together/q_and_a.md
index 3544b2f7c4..8413aefb6a 100644
--- a/docs/understanding/putting_it_all_together/q_and_a.md
+++ b/docs/understanding/putting_it_all_together/q_and_a.md
@@ -6,12 +6,12 @@ The most basic example usage of LlamaIndex is through semantic search. We provid
 
 ```python
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
-documents = SimpleDirectoryReader('data').load_data()
+
+documents = SimpleDirectoryReader("data").load_data()
 index = VectorStoreIndex.from_documents(documents)
 query_engine = index.as_query_engine()
 response = query_engine.query("What did the author do growing up?")
 print(response)
-
 ```
 
 **Tutorials**
@@ -38,9 +38,7 @@ Empirically, setting `response_mode="tree_summarize"` also leads to better summa
 ```python
 index = SummaryIndex.from_documents(documents)
 
-query_engine = index.as_query_engine(
-    response_mode="tree_summarize"
-)
+query_engine = index.as_query_engine(response_mode="tree_summarize")
 response = query_engine.query("<summarization_query>")
 ```
 
@@ -72,10 +70,11 @@ from llama_index.indices.composability import ComposableGraph
 index1 = VectorStoreIndex.from_documents(notion_docs)
 index2 = VectorStoreIndex.from_documents(slack_docs)
 
-graph = ComposableGraph.from_indices(SummaryIndex, [index1, index2], index_summaries=["summary1", "summary2"])
+graph = ComposableGraph.from_indices(
+    SummaryIndex, [index1, index2], index_summaries=["summary1", "summary2"]
+)
 query_engine = graph.as_query_engine()
 response = query_engine.query("<query_str>")
-
 ```
 
 **Guides**
@@ -117,14 +116,9 @@ By default, this uses a `LLMSingleSelector` as the router, which uses the LLM to
 ```python
 from llama_index.query_engine import RouterQueryEngine
 
-query_engine = RouterQueryEngine.from_defaults(
-    query_engine_tools=[tool1, tool2]
-)
-
-response = query_engine.query(
-    "In Notion, give me a summary of the product roadmap."
-)
+query_engine = RouterQueryEngine.from_defaults(query_engine_tools=[tool1, tool2])
 
+response = query_engine.query("In Notion, give me a summary of the product roadmap.")
 ```
 
 **Guides**
@@ -138,6 +132,7 @@ You can explicitly perform compare/contrast queries with a **query transformatio
 
 ```python
 from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
+
 decompose_transform = DecomposeQueryTransform(
     service_context.llm_predictor, verbose=True
 )
@@ -166,15 +161,24 @@ from llama_index.tools import QueryEngineTool, ToolMetadata
 query_engine_tools = [
     QueryEngineTool(
         query_engine=sept_engine,
-        metadata=ToolMetadata(name='sept_22', description='Provides information about Uber quarterly financials ending September 2022')
+        metadata=ToolMetadata(
+            name="sept_22",
+            description="Provides information about Uber quarterly financials ending September 2022",
+        ),
     ),
     QueryEngineTool(
         query_engine=june_engine,
-        metadata=ToolMetadata(name='june_22', description='Provides information about Uber quarterly financials ending June 2022')
+        metadata=ToolMetadata(
+            name="june_22",
+            description="Provides information about Uber quarterly financials ending June 2022",
+        ),
     ),
     QueryEngineTool(
         query_engine=march_engine,
-        metadata=ToolMetadata(name='march_22', description='Provides information about Uber quarterly financials ending March 2022')
+        metadata=ToolMetadata(
+            name="march_22",
+            description="Provides information about Uber quarterly financials ending March 2022",
+        ),
     ),
 ]
 ```
@@ -184,8 +188,9 @@ Then, we define a `SubQuestionQueryEngine` over these tools:
 ```python
 from llama_index.query_engine import SubQuestionQueryEngine
 
-query_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)
-
+query_engine = SubQuestionQueryEngine.from_defaults(
+    query_engine_tools=query_engine_tools
+)
 ```
 
 This query engine can execute any number of sub-queries against any subset of query engine tools before synthesizing the final answer.
diff --git a/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md b/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md
index 5e3d049696..a5e7570009 100644
--- a/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md
+++ b/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md
@@ -25,7 +25,7 @@ st.title("🦙 Llama Index Term Extractor 🦙")
 document_text = st.text_area("Or enter raw text")
 if st.button("Extract Terms and Definitions") and document_text:
     with st.spinner("Extracting..."):
-        extracted_terms = document text  # this is a placeholder!
+        extracted_terms = document_text  # this is a placeholder!
     st.write(extracted_terms)
 ```
 
@@ -53,16 +53,22 @@ setup_tab, upload_tab = st.tabs(["Setup", "Upload/Extract Terms"])
 with setup_tab:
     st.subheader("LLM Setup")
     api_key = st.text_input("Enter your OpenAI API key here", type="password")
-    llm_name = st.selectbox('Which LLM?', ["text-davinci-003", "gpt-3.5-turbo", "gpt-4"])
-    model_temperature = st.slider("LLM Temperature", min_value=0.0, max_value=1.0, step=0.1)
-    term_extract_str = st.text_area("The query to extract terms and definitions with.", value=DEFAULT_TERM_STR)
+    llm_name = st.selectbox(
+        "Which LLM?", ["text-davinci-003", "gpt-3.5-turbo", "gpt-4"]
+    )
+    model_temperature = st.slider(
+        "LLM Temperature", min_value=0.0, max_value=1.0, step=0.1
+    )
+    term_extract_str = st.text_area(
+        "The query to extract terms and definitions with.", value=DEFAULT_TERM_STR
+    )
 
 with upload_tab:
     st.subheader("Extract and Query Definitions")
     document_text = st.text_area("Or enter raw text")
     if st.button("Extract Terms and Definitions") and document_text:
         with st.spinner("Extracting..."):
-            extracted_terms = document text  # this is a placeholder!
+            extracted_terms = document_text  # this is a placeholder!
         st.write(extracted_terms)
 ```
 
@@ -77,25 +83,42 @@ Now that we are able to define LLM settings and upload text, we can try using Ll
 We can add the following functions to both initialize our LLM, as well as use it to extract terms from the input text.
 
 ```python
-from llama_index import Document, SummaryIndex, LLMPredictor, ServiceContext, load_index_from_storage
+from llama_index import (
+    Document,
+    SummaryIndex,
+    LLMPredictor,
+    ServiceContext,
+    load_index_from_storage,
+)
 from llama_index.llms import OpenAI
 
+
 def get_llm(llm_name, model_temperature, api_key, max_tokens=256):
-    os.environ['OPENAI_API_KEY'] = api_key
+    os.environ["OPENAI_API_KEY"] = api_key
     return OpenAI(temperature=model_temperature, model=llm_name, max_tokens=max_tokens)
 
+
 def extract_terms(documents, term_extract_str, llm_name, model_temperature, api_key):
     llm = get_llm(llm_name, model_temperature, api_key, max_tokens=1024)
 
-    service_context = ServiceContext.from_defaults(llm=llm,
-                                                   chunk_size=1024)
+    service_context = ServiceContext.from_defaults(llm=llm, chunk_size=1024)
 
     temp_index = SummaryIndex.from_documents(documents, service_context=service_context)
     query_engine = temp_index.as_query_engine(response_mode="tree_summarize")
     terms_definitions = str(query_engine.query(term_extract_str))
-    terms_definitions = [x for x in terms_definitions.split("\n") if x and 'Term:' in x and 'Definition:' in x]
+    terms_definitions = [
+        x
+        for x in terms_definitions.split("\n")
+        if x and "Term:" in x and "Definition:" in x
+    ]
     # parse the text into a dict
-    terms_to_definition = {x.split("Definition:")[0].split("Term:")[-1].strip(): x.split("Definition:")[-1].strip() for x in terms_definitions}
+    terms_to_definition = {
+        x.split("Definition:")[0]
+        .split("Term:")[-1]
+        .strip(): x.split("Definition:")[-1]
+        .strip()
+        for x in terms_definitions
+    }
     return terms_to_definition
 ```
 
@@ -108,9 +131,13 @@ with upload_tab:
     document_text = st.text_area("Or enter raw text")
     if st.button("Extract Terms and Definitions") and document_text:
         with st.spinner("Extracting..."):
-            extracted_terms = extract_terms([Document(text=document_text)],
-                                            term_extract_str, llm_name,
-                                            model_temperature, api_key)
+            extracted_terms = extract_terms(
+                [Document(text=document_text)],
+                term_extract_str,
+                llm_name,
+                model_temperature,
+                api_key,
+            )
         st.write(extracted_terms)
 ```
 
@@ -132,14 +159,16 @@ First things first though, let's add a feature to initialize a global vector ind
 
 ```python
 ...
-if 'all_terms' not in st.session_state:
-    st.session_state['all_terms'] = DEFAULT_TERMS
+if "all_terms" not in st.session_state:
+    st.session_state["all_terms"] = DEFAULT_TERMS
 ...
 
+
 def insert_terms(terms_to_definition):
     for term, definition in terms_to_definition.items():
         doc = Document(text=f"Term: {term}\nDefinition: {definition}")
-        st.session_state['llama_index'].insert(doc)
+        st.session_state["llama_index"].insert(doc)
+
 
 @st.cache_resource
 def initialize_index(llm_name, model_temperature, api_key):
@@ -152,33 +181,48 @@ def initialize_index(llm_name, model_temperature, api_key):
 
     return index
 
+
 ...
 
 with upload_tab:
     st.subheader("Extract and Query Definitions")
     if st.button("Initialize Index and Reset Terms"):
-        st.session_state['llama_index'] = initialize_index(llm_name, model_temperature, api_key)
-        st.session_state['all_terms'] = {}
+        st.session_state["llama_index"] = initialize_index(
+            llm_name, model_temperature, api_key
+        )
+        st.session_state["all_terms"] = {}
 
     if "llama_index" in st.session_state:
-        st.markdown("Either upload an image/screenshot of a document, or enter the text manually.")
+        st.markdown(
+            "Either upload an image/screenshot of a document, or enter the text manually."
+        )
         document_text = st.text_area("Or enter raw text")
-        if st.button("Extract Terms and Definitions") and (uploaded_file or document_text):
-            st.session_state['terms'] = {}
+        if st.button("Extract Terms and Definitions") and (
+            uploaded_file or document_text
+        ):
+            st.session_state["terms"] = {}
             terms_docs = {}
             with st.spinner("Extracting..."):
-                terms_docs.update(extract_terms([Document(text=document_text)], term_extract_str, llm_name, model_temperature, api_key))
-            st.session_state['terms'].update(terms_docs)
+                terms_docs.update(
+                    extract_terms(
+                        [Document(text=document_text)],
+                        term_extract_str,
+                        llm_name,
+                        model_temperature,
+                        api_key,
+                    )
+                )
+            st.session_state["terms"].update(terms_docs)
 
-        if "terms" in st.session_state and st.session_state["terms"]::
+        if "terms" in st.session_state and st.session_state["terms"]:
             st.markdown("Extracted terms")
-            st.json(st.session_state['terms'])
+            st.json(st.session_state["terms"])
 
             if st.button("Insert terms?"):
                 with st.spinner("Inserting terms"):
-                    insert_terms(st.session_state['terms'])
-                st.session_state['all_terms'].update(st.session_state['terms'])
-                st.session_state['terms'] = {}
+                    insert_terms(st.session_state["terms"])
+                st.session_state["all_terms"].update(st.session_state["terms"])
+                st.session_state["terms"] = {}
                 st.experimental_rerun()
 ```
 
@@ -196,8 +240,8 @@ setup_tab, terms_tab, upload_tab, query_tab = st.tabs(
 ...
 with terms_tab:
     with terms_tab:
-    st.subheader("Current Extracted Terms and Definitions")
-    st.json(st.session_state["all_terms"])
+        st.subheader("Current Extracted Terms and Definitions")
+        st.json(st.session_state["all_terms"])
 ...
 with query_tab:
     st.subheader("Query for Terms/Definitions!")
@@ -216,7 +260,10 @@ with query_tab:
     if "llama_index" in st.session_state:
         query_text = st.text_input("Ask about a term or definition:")
         if query_text:
-            query_text = query_text + "\nIf you can't find the answer, answer the query with the best of your knowledge."
+            query_text = (
+                query_text
+                + "\nIf you can't find the answer, answer the query with the best of your knowledge."
+            )
             with st.spinner("Generating answer..."):
                 response = st.session_state["llama_index"].query(
                     query_text, similarity_top_k=5, response_mode="compact"
@@ -254,9 +301,9 @@ With our base app working, it might feel like a lot of work to build up a useful
 def insert_terms(terms_to_definition):
     for term, definition in terms_to_definition.items():
         doc = Document(text=f"Term: {term}\nDefinition: {definition}")
-        st.session_state['llama_index'].insert(doc)
+        st.session_state["llama_index"].insert(doc)
     # TEMPORARY - save to disk
-    st.session_state['llama_index'].storage_context.persist()
+    st.session_state["llama_index"].storage_context.persist()
 ```
 
 Now, we need some document to extract from! The repository for this project used the wikipedia page on New York City, and you can find the text [here](https://github.com/jerryjliu/llama_index/blob/main/examples/test_wiki/data/nyc_text.txt).
@@ -298,7 +345,11 @@ This is due to the concept of "refining" answers in Llama Index. Since we are qu
 So, the refine process seems to be messing with our results! Rather than appending extra instructions to the `query_str`, remove that, and Llama Index will let us provide our own custom prompts! Let's create those now, using the [default prompts](https://github.com/jerryjliu/llama_index/blob/main/llama_index/prompts/default_prompts.py) and [chat specific prompts](https://github.com/jerryjliu/llama_index/blob/main/llama_index/prompts/chat_prompts.py) as a guide. Using a new file `constants.py`, let's create some new query templates:
 
 ```python
-from llama_index.prompts import PromptTemplate, SelectorPromptTemplate, ChatPromptTemplate
+from llama_index.prompts import (
+    PromptTemplate,
+    SelectorPromptTemplate,
+    ChatPromptTemplate,
+)
 from llama_index.prompts.utils import is_chat_model
 from llama_index.llms.base import ChatMessage, MessageRole
 
@@ -359,17 +410,21 @@ So, now we can import these prompts into our app and use them during the query.
 
 ```python
 from constants import REFINE_TEMPLATE, TEXT_QA_TEMPLATE
+
 ...
-    if "llama_index" in st.session_state:
-        query_text = st.text_input("Ask about a term or definition:")
-        if query_text:
-            query_text = query_text  # Notice we removed the old instructions
-            with st.spinner("Generating answer..."):
-                response = st.session_state["llama_index"].query(
-                    query_text, similarity_top_k=5, response_mode="compact",
-                    text_qa_template=TEXT_QA_TEMPLATE, refine_template=REFINE_TEMPLATE
-                )
-            st.markdown(str(response))
+if "llama_index" in st.session_state:
+    query_text = st.text_input("Ask about a term or definition:")
+    if query_text:
+        query_text = query_text  # Notice we removed the old instructions
+        with st.spinner("Generating answer..."):
+            response = st.session_state["llama_index"].query(
+                query_text,
+                similarity_top_k=5,
+                response_mode="compact",
+                text_qa_template=TEXT_QA_TEMPLATE,
+                refine_template=REFINE_TEMPLATE,
+            )
+        st.markdown(str(response))
 ...
 ```
 
@@ -385,6 +440,7 @@ If you get an import error about PIL, install it using `pip install Pillow` firs
 from PIL import Image
 from llama_index.readers.file.base import DEFAULT_FILE_EXTRACTOR, ImageParser
 
+
 @st.cache_resource
 def get_file_extractor():
     image_parser = ImageParser(keep_image=True, parse_text=True)
@@ -399,6 +455,7 @@ def get_file_extractor():
 
     return file_extractor
 
+
 file_extractor = get_file_extractor()
 ...
 with upload_tab:
diff --git a/docs/understanding/putting_it_all_together/q_and_a/unified_query.md b/docs/understanding/putting_it_all_together/q_and_a/unified_query.md
index 82420fae6b..c1cd7896f0 100644
--- a/docs/understanding/putting_it_all_together/q_and_a/unified_query.md
+++ b/docs/understanding/putting_it_all_together/q_and_a/unified_query.md
@@ -17,7 +17,6 @@ In this example, we will analyze Wikipedia articles of different cities: Boston,
 The below code snippet downloads the relevant data into files.
 
 ```python
-
 from pathlib import Path
 import requests
 
@@ -25,26 +24,25 @@ wiki_titles = ["Toronto", "Seattle", "Chicago", "Boston", "Houston"]
 
 for title in wiki_titles:
     response = requests.get(
-        'https://en.wikipedia.org/w/api.php',
+        "https://en.wikipedia.org/w/api.php",
         params={
-            'action': 'query',
-            'format': 'json',
-            'titles': title,
-            'prop': 'extracts',
+            "action": "query",
+            "format": "json",
+            "titles": title,
+            "prop": "extracts",
             # 'exintro': True,
-            'explaintext': True,
-        }
+            "explaintext": True,
+        },
     ).json()
-    page = next(iter(response['query']['pages'].values()))
-    wiki_text = page['extract']
+    page = next(iter(response["query"]["pages"].values()))
+    wiki_text = page["extract"]
 
-    data_path = Path('data')
+    data_path = Path("data")
     if not data_path.exists():
         Path.mkdir(data_path)
 
-    with open(data_path / f"{title}.txt", 'w') as fp:
+    with open(data_path / f"{title}.txt", "w") as fp:
         fp.write(wiki_text)
-
 ```
 
 The next snippet loads all files into Document objects.
@@ -53,8 +51,9 @@ The next snippet loads all files into Document objects.
 # Load all wiki documents
 city_docs = {}
 for wiki_title in wiki_titles:
-    city_docs[wiki_title] = SimpleDirectoryReader(input_files=[f"data/{wiki_title}.txt"]).load_data()
-
+    city_docs[wiki_title] = SimpleDirectoryReader(
+        input_files=[f"data/{wiki_title}.txt"]
+    ).load_data()
 ```
 
 ### Defining the Set of Indexes
@@ -70,9 +69,7 @@ from llama_index.llms import OpenAI
 
 # set service context
 llm_gpt4 = OpenAI(temperature=0, model="gpt-4")
-service_context = ServiceContext.from_defaults(
-    llm=llm_gpt4, chunk_size=1024
-)
+service_context = ServiceContext.from_defaults(llm=llm_gpt4, chunk_size=1024)
 
 # Build city document index
 vector_indices = {}
@@ -87,15 +84,18 @@ for wiki_title in wiki_titles:
     # set id for vector index
     vector_indices[wiki_title].index_struct.index_id = wiki_title
     # persist to disk
-    storage_context.persist(persist_dir=f'./storage/{wiki_title}')
+    storage_context.persist(persist_dir=f"./storage/{wiki_title}")
 ```
 
 Querying a vector index lets us easily perform semantic search over a given city's documents.
 
 ```python
-response = vector_indices["Toronto"].as_query_engine().query("What are the sports teams in Toronto?")
+response = (
+    vector_indices["Toronto"]
+    .as_query_engine()
+    .query("What are the sports teams in Toronto?")
+)
 print(str(response))
-
 ```
 
 Example response:
@@ -131,7 +131,7 @@ graph = ComposableGraph.from_indices(
     SimpleKeywordTableIndex,
     [index for _, index in vector_indices.items()],
     [summary for _, summary in index_summaries.items()],
-    max_keywords_per_chunk=50
+    max_keywords_per_chunk=50,
 )
 
 # get root index
@@ -142,7 +142,6 @@ root_summary = (
     "This index contains Wikipedia articles about multiple cities. "
     "Use this index if you want to compare multiple cities. "
 )
-
 ```
 
 Querying this graph (with a query transform module), allows us to easily compare/contrast between different cities.
@@ -153,24 +152,23 @@ An example is shown below.
 from llama_index import LLMPredictor
 from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
 
-decompose_transform = DecomposeQueryTransform(
-    LLMPredictor(llm=llm_gpt4), verbose=True
-)
+decompose_transform = DecomposeQueryTransform(LLMPredictor(llm=llm_gpt4), verbose=True)
 
 # define custom query engines
 from llama_index.query_engine.transform_query_engine import TransformQueryEngine
+
 custom_query_engines = {}
 for index in vector_indices.values():
     query_engine = index.as_query_engine(service_context=service_context)
     query_engine = TransformQueryEngine(
         query_engine,
         query_transform=decompose_transform,
-        transform_extra_info={'index_summary': index.index_struct.summary},
+        transform_extra_info={"index_summary": index.index_struct.summary},
     )
     custom_query_engines[index.index_id] = query_engine
 custom_query_engines[graph.root_id] = graph.root_index.as_query_engine(
-    retriever_mode='simple',
-    response_mode='tree_summarize',
+    retriever_mode="simple",
+    response_mode="tree_summarize",
     service_context=service_context,
 )
 
@@ -178,9 +176,7 @@ custom_query_engines[graph.root_id] = graph.root_index.as_query_engine(
 query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)
 
 # query the graph
-query_str = (
-    "Compare and contrast the arts and culture of Houston and Boston. "
-)
+query_str = "Compare and contrast the arts and culture of Houston and Boston. "
 response_chatgpt = query_engine.query(query_str)
 ```
 
@@ -222,7 +218,9 @@ graph_description = (
     "This tool contains Wikipedia articles about multiple cities. "
     "Use this tool if you want to compare multiple cities. "
 )
-graph_tool = QueryEngineTool.from_defaults(graph_query_engine, description=graph_description)
+graph_tool = QueryEngineTool.from_defaults(
+    graph_query_engine, description=graph_description
+)
 query_engine_tools.append(graph_tool)
 ```
 
@@ -236,7 +234,7 @@ from llama_index.selectors.llm_selectors import LLMSingleSelector
 
 router_query_engine = RouterQueryEngine(
     selector=LLMSingleSelector.from_defaults(service_context=service_context),
-    query_engine_tools=query_engine_tools
+    query_engine_tools=query_engine_tools,
 )
 ```
 
@@ -255,16 +253,14 @@ Let's take a look at a few examples!
 response = router_query_engine.query(
     "Compare and contrast the arts and culture of Houston and Boston.",
 )
-print(str(response)
+print(str(response))
 ```
 
 **Asking Questions about specific Cities**
 
 ```python
-
 response = router_query_engine.query("What are the sports teams in Toronto?")
 print(str(response))
-
 ```
 
 This "outer" abstraction is able to handle different queries by routing to the right underlying abstractions.
diff --git a/docs/understanding/putting_it_all_together/structured_data.md b/docs/understanding/putting_it_all_together/structured_data.md
index 374f62b179..0e38a4b78c 100644
--- a/docs/understanding/putting_it_all_together/structured_data.md
+++ b/docs/understanding/putting_it_all_together/structured_data.md
@@ -20,7 +20,16 @@ A notebook for this tutorial is [available here](../../examples/index_structs/st
 First, we use SQLAlchemy to setup a simple sqlite db:
 
 ```python
-from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, select, column
+from sqlalchemy import (
+    create_engine,
+    MetaData,
+    Table,
+    Column,
+    String,
+    Integer,
+    select,
+    column,
+)
 
 engine = create_engine("sqlite:///:memory:")
 metadata_obj = MetaData()
@@ -49,6 +58,7 @@ to directly populate this table:
 
 ```python
 from sqlalchemy import insert
+
 rows = [
     {"city_name": "Toronto", "population": 2731571, "country": "Canada"},
     {"city_name": "Tokyo", "population": 13929286, "country": "Japan"},
@@ -85,9 +95,7 @@ query_engine = NLSQLTableQueryEngine(
     sql_database=sql_database,
     tables=["city_stats"],
 )
-query_str = (
-    "Which city has the highest population?"
-)
+query_str = "Which city has the highest population?"
 response = query_engine.query(query_str)
 ```
 
@@ -109,7 +117,10 @@ into the ObjectIndex constructor.
 from llama_index.objects import SQLTableNodeMapping, ObjectIndex, SQLTableSchema
 
 table_node_mapping = SQLTableNodeMapping(sql_database)
-table_schema_objs = [(SQLTableSchema(table_name="city_stats")), ...] # one SQLTableSchema for each table
+table_schema_objs = [
+    (SQLTableSchema(table_name="city_stats")),
+    ...,
+]  # one SQLTableSchema for each table
 
 obj_index = ObjectIndex.from_objects(
     table_schema_objs,
@@ -133,7 +144,9 @@ city_stats_text = (
 )
 
 table_node_mapping = SQLTableNodeMapping(sql_database)
-table_schema_objs = [(SQLTableSchema(table_name="city_stats", context_str=city_stats_text))]
+table_schema_objs = [
+    (SQLTableSchema(table_name="city_stats", context_str=city_stats_text))
+]
 ```
 
 ## Using natural language SQL queries
diff --git a/docs/understanding/querying/querying.md b/docs/understanding/querying/querying.md
index d60aef1a1a..70ff0a2605 100644
--- a/docs/understanding/querying/querying.md
+++ b/docs/understanding/querying/querying.md
@@ -10,7 +10,9 @@ The basis of all querying is the `QueryEngine`. The simplest way to get a QueryE
 
 ```python
 query_engine = index.as_query_engine()
-response = query_engine.query("Write an email to the user given their background information.")
+response = query_engine.query(
+    "Write an email to the user given their background information."
+)
 print(response)
 ```
 
@@ -57,10 +59,7 @@ response_synthesizer = get_response_synthesizer()
 query_engine = RetrieverQueryEngine(
     retriever=retriever,
     response_synthesizer=response_synthesizer,
-    node_postprocessors=[
-        SimilarityPostprocessor(similarity_cutoff=0.7)
-    ]
-
+    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
 )
 
 # query
@@ -103,8 +102,7 @@ To configure the desired node postprocessors:
 ```python
 node_postprocessors = [
     KeywordNodePostprocessor(
-        required_keywords=["Combinator"],
-        exclude_keywords=["Italy"]
+        required_keywords=["Combinator"], exclude_keywords=["Italy"]
     )
 ]
 query_engine = RetrieverQueryEngine.from_args(
@@ -120,7 +118,7 @@ After a retriever fetches relevant nodes, a `BaseSynthesizer` synthesizes the fi
 You can configure it via
 
 ```python
-query_engine = RetrieverQueryEngine.from_args(retriever, response_mode=<response_mode>)
+query_engine = RetrieverQueryEngine.from_args(retriever, response_mode=response_mode)
 ```
 
 Right now, we support the following options:
diff --git a/docs/understanding/storing/storing.md b/docs/understanding/storing/storing.md
index 77f530895f..69b870742c 100644
--- a/docs/understanding/storing/storing.md
+++ b/docs/understanding/storing/storing.md
@@ -42,7 +42,7 @@ LlamaIndex supports a [huge number of vector stores](/module_guides/storing/vect
 
 First you will need to install chroma:
 
-```python
+```
 pip install chromadb
 ```
 
@@ -75,10 +75,7 @@ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
 
 # create your index
-index = VectorStoreIndex.from_documents(
-    documents,
-    storage_context=storage_context
-)
+index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 
 # create a query engine and query
 query_engine = index.as_query_engine()
@@ -106,8 +103,7 @@ storage_context = StorageContext.from_defaults(vector_store=vector_store)
 
 # load your index from stored vectors
 index = VectorStoreIndex.from_vector_store(
-    vector_store,
-    storage_context=storage_context
+    vector_store, storage_context=storage_context
 )
 
 # create a query engine
diff --git a/docs/understanding/using_llms/using_llms.md b/docs/understanding/using_llms/using_llms.md
index 5cb25d4e80..a6d4343f63 100644
--- a/docs/understanding/using_llms/using_llms.md
+++ b/docs/understanding/using_llms/using_llms.md
@@ -18,7 +18,7 @@ LlamaIndex provides a single interface to a large number of different LLMs, allo
 ```python
 from llama_index.llms import OpenAI
 
-response = OpenAI().complete('Paul Graham is ')
+response = OpenAI().complete("Paul Graham is ")
 print(response)
 ```
 
@@ -31,8 +31,8 @@ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
 llm = OpenAI(temperature=0.1, model="gpt-4")
 service_context = ServiceContext.from_defaults(llm=llm)
 
-documents = SimpleDirectoryReader('data').load_data()
-index = VectorStoreIndex.from_documents(documents,service_context=service_context)
+documents = SimpleDirectoryReader("data").load_data()
+index = VectorStoreIndex.from_documents(documents, service_context=service_context)
 ```
 
 In this case, you've instantiated OpenAI and customized it to use the `gpt-4` model instead of the default `gpt-3.5-turbo`, and also modified the `temperature`. The `VectorStoreIndex` will now use gpt-4 to encode or `embed` your documents for later querying.
@@ -53,6 +53,7 @@ Once you have a local LLM such as Llama 2 installed, you can use it like this:
 
 ```python
 from llama_index import ServiceContext
+
 service_context = ServiceContext.from_defaults(llm="local")
 ```
 
diff --git a/llama_index/embeddings/elasticsearch.py b/llama_index/embeddings/elasticsearch.py
index f4c1cdaaf8..522ed27407 100644
--- a/llama_index/embeddings/elasticsearch.py
+++ b/llama_index/embeddings/elasticsearch.py
@@ -74,9 +74,7 @@ class ElasticsearchEmbedding(BaseEmbedding):
                 input_field = "your_input_field"
 
                 # Create Elasticsearch connection
-                es_connection = Elasticsearch(
-                    hosts=["localhost:9200"], basic_auth=("user", "password")
-                )
+                es_connection = Elasticsearch(hosts=["localhost:9200"], basic_auth=("user", "password"))
 
                 # Instantiate ElasticsearchEmbedding using the existing connection
                 embeddings = ElasticsearchEmbedding.from_es_connection(
diff --git a/llama_index/indices/keyword_table/README.md b/llama_index/indices/keyword_table/README.md
index bbf4150d72..5d22b808db 100644
--- a/llama_index/indices/keyword_table/README.md
+++ b/llama_index/indices/keyword_table/README.md
@@ -28,7 +28,7 @@ Use the popular RAKE keyword extractor.
 from llama_index import KeywordTableIndex, SimpleDirectoryReader
 
 # build index
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 index = KeywordTableIndex.from_documents(documents)
 # query
 query_engine = index.as_query_engine()
diff --git a/llama_index/indices/list/README.md b/llama_index/indices/list/README.md
index 81eca24700..f75159166f 100644
--- a/llama_index/indices/list/README.md
+++ b/llama_index/indices/list/README.md
@@ -14,10 +14,9 @@ During query-time, Summary Index constructs an answer using the _create and refi
 from llama_index import SummaryIndex, SimpleDirectoryReader
 
 # build index
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 index = SummaryIndex.from_documents(documents)
 # query
 query_engine = index.as_query_engine()
 response = query_engine.query("<question text>")
-
 ```
diff --git a/llama_index/indices/tree/README.md b/llama_index/indices/tree/README.md
index 3ff3954fb1..4dca8f6987 100644
--- a/llama_index/indices/tree/README.md
+++ b/llama_index/indices/tree/README.md
@@ -24,7 +24,7 @@ Simply use the root nodes as context to synthesize an answer to the query. This
 from llama_index import TreeIndex, SimpleDirectoryReader
 
 # build index
-documents = SimpleDirectoryReader('data').load_data()
+documents = SimpleDirectoryReader("data").load_data()
 index = TreeIndex.from_documents(documents)
 # query
 query_engine = index.as_query_engine()
diff --git a/llama_index/readers/string_iterable.py b/llama_index/readers/string_iterable.py
index 435ac87307..222ebd295c 100644
--- a/llama_index/readers/string_iterable.py
+++ b/llama_index/readers/string_iterable.py
@@ -16,7 +16,8 @@ class StringIterableReader(BasePydanticReader):
             from llama_index import StringIterableReader, TreeIndex
 
             documents = StringIterableReader().load_data(
-                texts=["I went to the store", "I bought an apple"])
+                texts=["I went to the store", "I bought an apple"]
+            )
             index = TreeIndex.from_documents(documents)
             query_engine = index.as_query_engine()
             query_engine.query("what did I buy?")
diff --git a/llama_index/storage/docstore/keyval_docstore.py b/llama_index/storage/docstore/keyval_docstore.py
index 9b3302287a..e27198f609 100644
--- a/llama_index/storage/docstore/keyval_docstore.py
+++ b/llama_index/storage/docstore/keyval_docstore.py
@@ -28,10 +28,7 @@ class KVDocumentStore(BaseDocumentStore):
 
         summary_index = SummaryIndex(nodes, storage_context=storage_context)
         vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
-        keyword_table_index = SimpleKeywordTableIndex(
-            nodes,
-            storage_context=storage_context
-        )
+        keyword_table_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)
 
     This will use the same docstore for multiple index structures.
 
diff --git a/llama_index/tools/tool_spec/load_and_search/README.md b/llama_index/tools/tool_spec/load_and_search/README.md
index 31a398b89a..094c8c9ed4 100644
--- a/llama_index/tools/tool_spec/load_and_search/README.md
+++ b/llama_index/tools/tool_spec/load_and_search/README.md
@@ -18,11 +18,10 @@ tool = wiki_spec.to_tool_list()[1]
 
 # Wrap the tool, splitting into a loader and a reader
 agent = OpenAIAgent.from_tools(
- LoadAndSearchToolSpec.from_defaults(
-    tool
- ).to_tool_list(), verbose=True)
+    LoadAndSearchToolSpec.from_defaults(tool).to_tool_list(), verbose=True
+)
 
-agent.chat('who is ben affleck married to')
+agent.chat("who is ben affleck married to")
 ```
 
 `load`: Calls the wrapped function and loads the data into an index
-- 
GitLab