diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 531933cf10f601fce96c40825be825fbb6b98152..3e6c55d5d5f30a1b10d506adf918bd08013515f0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -26,13 +26,26 @@ repos:
     hooks:
       - id: black-jupyter
         name: black-src
+        alias: black
         exclude: docs/
   - repo: https://github.com/psf/black-pre-commit-mirror
     rev: 23.10.1
     hooks:
       - id: black-jupyter
-        name: black-docs
+        name: black-docs-py
+        alias: black
         files: docs/
+        # Using PEP 8's line length in docs prevents excess left/right scrolling
+        args: [--line-length=79]
+  - repo: https://github.com/adamchainz/blacken-docs
+    rev: 1.16.0
+    hooks:
+      - id: blacken-docs
+        name: black-docs-text
+        alias: black
+        types_or: [rst, markdown, tex]
+        additional_dependencies: [black==23.10.1]
+        # Using PEP 8's line length in docs prevents excess left/right scrolling
         args: [--line-length=79]
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v3.0.3
@@ -53,8 +66,3 @@ repos:
     hooks:
       - id: toml-sort-fix
         exclude: poetry.lock
-  - repo: https://github.com/adamchainz/blacken-docs
-    rev: "1.16.0"
-    hooks:
-      - id: blacken-docs
-        additional_dependencies: [black==23.10.1]
diff --git a/Makefile b/Makefile
index b2b8b95da58beb3dde48b7cbed590e5b250ae617..d35d30f7481e41106e2fa64387e77ac24c67dc7e 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ help:	## Show all Makefile targets.
 
 format:	## Run code autoformatters (black).
 	pre-commit install
-	pre-commit run black-jupyter --all-files
+	pre-commit run black --all-files
 
 lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
 	pre-commit install && pre-commit run --all-files --show-diff-on-failure
diff --git a/README.md b/README.md
index a42ec47e76f6a9a24d1d8bbabbe7a7960e753dd3..89df1f3030314a58958facf0c3a9bbbaf0c0eb86 100644
--- a/README.md
+++ b/README.md
@@ -99,12 +99,16 @@ from llama_index.embeddings import HuggingFaceEmbedding
 from llama_index import ServiceContext
 
 embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
-service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
+service_context = ServiceContext.from_defaults(
+    llm=llm, embed_model=embed_model
+)
 
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
 documents = SimpleDirectoryReader("YOUR_DATA_DIRECTORY").load_data()
-index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+index = VectorStoreIndex.from_documents(
+    documents, service_context=service_context
+)
 ```
 
 To query:
diff --git a/docs/community/integrations/graphsignal.md b/docs/community/integrations/graphsignal.md
index d6ad8ddd2a71ec2f60011d1f7a87a59f97b4469e..51fd3ad759e55dda2ab917c0de14aa3a85241481 100644
--- a/docs/community/integrations/graphsignal.md
+++ b/docs/community/integrations/graphsignal.md
@@ -18,7 +18,9 @@ pip install graphsignal
 import graphsignal
 
 # Provide an API key directly or via GRAPHSIGNAL_API_KEY environment variable
-graphsignal.configure(api_key="my-api-key", deployment="my-llama-index-app-prod")
+graphsignal.configure(
+    api_key="my-api-key", deployment="my-llama-index-app-prod"
+)
 ```
 
 You can get an API key [here](https://app.graphsignal.com/).
diff --git a/docs/community/integrations/guidance.md b/docs/community/integrations/guidance.md
index 4004c8a56d97018708d25f3c82b527c3e4bde0be..130173d7adebde4331025cdb51c6ea560fa8c2f5 100644
--- a/docs/community/integrations/guidance.md
+++ b/docs/community/integrations/guidance.md
@@ -77,7 +77,9 @@ intermediate response has the expected structure (so that they can be parsed cor
 As an example, we implement a `GuidanceQuestionGenerator` that can be plugged into a `SubQuestionQueryEngine` to make it more robust than using the default setting.
 
 ```python
-from llama_index.question_gen.guidance_generator import GuidanceQuestionGenerator
+from llama_index.question_gen.guidance_generator import (
+    GuidanceQuestionGenerator,
+)
 from guidance.llms import OpenAI as GuidanceOpenAI
 
 # define guidance based question generator
diff --git a/docs/community/integrations/lmformatenforcer.md b/docs/community/integrations/lmformatenforcer.md
index 24306b1b0572a72497e3a1a619b84d3e4d48651a..b7083d1cfe3e6fd0efa697b0aee872d68f366ec7 100644
--- a/docs/community/integrations/lmformatenforcer.md
+++ b/docs/community/integrations/lmformatenforcer.md
@@ -53,7 +53,10 @@ Album(
     artist="The Shining Choir",
     songs=[
         Song(title="Redrum", length_seconds=300),
-        Song(title="All Work and No Play Makes Jack a Dull Boy", length_seconds=240),
+        Song(
+            title="All Work and No Play Makes Jack a Dull Boy",
+            length_seconds=240,
+        ),
         Song(title="Heeeeere's Johnny!", length_seconds=180),
     ],
 )
diff --git a/docs/community/integrations/using_with_langchain.md b/docs/community/integrations/using_with_langchain.md
index 11ce40d7f6022a5b0372dd7bc1f0473e3117bd2d..dcb4d3d3466fad2224477bbde28817d67b7418de 100644
--- a/docs/community/integrations/using_with_langchain.md
+++ b/docs/community/integrations/using_with_langchain.md
@@ -21,7 +21,10 @@ LlamaIndex provides Tool abstractions so that you can use a LlamaIndex query eng
 For instance, you can choose to create a "Tool" from an `QueryEngine` directly as follows:
 
 ```python
-from llama_index.langchain_helpers.agents import IndexToolConfig, LlamaIndexTool
+from llama_index.langchain_helpers.agents import (
+    IndexToolConfig,
+    LlamaIndexTool,
+)
 
 tool_config = IndexToolConfig(
     query_engine=query_engine,
diff --git a/docs/community/integrations/vector_stores.md b/docs/community/integrations/vector_stores.md
index 47df4538e8cf58fa1b8273fb8ba2f3b828b2c8be..d857285a216697c641c38559f9c49133393a2d67 100644
--- a/docs/community/integrations/vector_stores.md
+++ b/docs/community/integrations/vector_stores.md
@@ -74,7 +74,9 @@ storage_context = StorageContext.from_defaults(
 
 # Load documents and build index
 documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
-index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+index = VectorStoreIndex.from_documents(
+    documents, storage_context=storage_context
+)
 
 # Query index
 query_engine = index.as_query_engine()
@@ -129,7 +131,9 @@ Then connect and use Redis as a vector database with LlamaIndex
 from llama_index.vector_stores import RedisVectorStore
 
 vector_store = RedisVectorStore(
-    index_name="llm-project", redis_url="redis://localhost:6379", overwrite=True
+    index_name="llm-project",
+    redis_url="redis://localhost:6379",
+    overwrite=True,
 )
 ```
 
@@ -183,7 +187,8 @@ resource_owner_config = weaviate.AuthClientPassword(
     password="<password>",
 )
 client = weaviate.Client(
-    "https://<cluster-id>.semi.network/", auth_client_secret=resource_owner_config
+    "https://<cluster-id>.semi.network/",
+    auth_client_secret=resource_owner_config,
 )
 
 # construct vector store
@@ -206,10 +211,14 @@ vector_store = ZepVectorStore(
 
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
 
-index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+index = VectorStoreIndex.from_documents(
+    documents, storage_context=storage_context
+)
 
 # Query index using both a text query and metadata filters
-filters = MetadataFilters(filters=[ExactMatchFilter(key="theme", value="Mafia")])
+filters = MetadataFilters(
+    filters=[ExactMatchFilter(key="theme", value="Mafia")]
+)
 retriever = index.as_retriever(filters=filters)
 result = retriever.retrieve("What is inception about?")
 ```
@@ -223,7 +232,9 @@ from llama_index.vector_stores import PineconeVectorStore
 # Creating a Pinecone index
 api_key = "api_key"
 pinecone.init(api_key=api_key, environment="us-west1-gcp")
-pinecone.create_index("quickstart", dimension=1536, metric="euclidean", pod_type="p1")
+pinecone.create_index(
+    "quickstart", dimension=1536, metric="euclidean", pod_type="p1"
+)
 index = pinecone.Index("quickstart")
 
 # can define filters specific to this vector index (so you can
@@ -271,7 +282,9 @@ cluster = Cluster(["127.0.0.1"])
 cassio.init(session=cluster.connect(), keyspace="my_keyspace")
 
 # After the above `cassio.init(...)`, create a vector store:
-vector_store = CassandraVectorStore(table="cass_v_table", embedding_dimension=1536)
+vector_store = CassandraVectorStore(
+    table="cass_v_table", embedding_dimension=1536
+)
 ```
 
 **Chroma**
@@ -319,7 +332,9 @@ import pymilvus
 from llama_index.vector_stores import MilvusVectorStore
 
 # construct vector store
-vector_store = MilvusVectorStore(uri="https://localhost:19530", overwrite="True")
+vector_store = MilvusVectorStore(
+    uri="https://localhost:19530", overwrite="True"
+)
 ```
 
 **Note**: `MilvusVectorStore` depends on the `pymilvus` library.
@@ -339,7 +354,9 @@ from llama_index.vector_stores import MilvusVectorStore
 
 # construct vector store
 vector_store = MilvusVectorStore(
-    uri="foo.vectordb.zillizcloud.com", token="your_token_here", overwrite="True"
+    uri="foo.vectordb.zillizcloud.com",
+    token="your_token_here",
+    overwrite="True",
 )
 ```
 
@@ -423,16 +440,22 @@ from llama_index.storage.storage_context import StorageContext
 from llama_index.readers.file.base import SimpleDirectoryReader
 
 # mongo_uri = os.environ["MONGO_URI"]
-mongo_uri = "mongodb+srv://<username>:<password>@<host>?retryWrites=true&w=majority"
+mongo_uri = (
+    "mongodb+srv://<username>:<password>@<host>?retryWrites=true&w=majority"
+)
 mongodb_client = pymongo.MongoClient(mongo_uri)
 
 # construct store
 store = MongoDBAtlasVectorSearch(mongodb_client)
 storage_context = StorageContext.from_defaults(vector_store=store)
-uber_docs = SimpleDirectoryReader(input_files=["../data/10k/uber_2021.pdf"]).load_data()
+uber_docs = SimpleDirectoryReader(
+    input_files=["../data/10k/uber_2021.pdf"]
+).load_data()
 
 # construct index
-index = VectorStoreIndex.from_documents(uber_docs, storage_context=storage_context)
+index = VectorStoreIndex.from_documents(
+    uber_docs, storage_context=storage_context
+)
 ```
 
 **Neo4j**
@@ -517,7 +540,9 @@ reader = ChromaReader(
 
 query_vector = [n1, n2, n3, ...]
 
-documents = reader.load_data(collection_name="demo", query_vector=query_vector, limit=5)
+documents = reader.load_data(
+    collection_name="demo", query_vector=query_vector, limit=5
+)
 index = SummaryIndex.from_documents(documents)
 
 query_engine = index.as_query_engine()
@@ -542,7 +567,9 @@ query_vector = [n1, n2, n3, ...]
 # See the Python client: https;//github.com/qdrant/qdrant_client
 # for more details
 
-documents = reader.load_data(collection_name="demo", query_vector=query_vector, limit=5)
+documents = reader.load_data(
+    collection_name="demo", query_vector=query_vector, limit=5
+)
 ```
 
 NOTE: Since Weaviate can store a hybrid of document and vector objects, the user may either choose to explicitly specify `class_name` and `properties` in order to query documents, or they may choose to specify a raw GraphQL query. See below for usage.
diff --git a/docs/getting_started/customization.rst b/docs/getting_started/customization.rst
index 3ca772a365a3336a9a53e31dfe2cdd45f980032e..ed92118447a21d46a8ab90900787a5ce98edcf7e 100644
--- a/docs/getting_started/customization.rst
+++ b/docs/getting_started/customization.rst
@@ -33,7 +33,9 @@ The `ServiceContext <../module_guides/supporting_modules/service_context.html>`_
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
     documents = SimpleDirectoryReader("data").load_data()
-    index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+    index = VectorStoreIndex.from_documents(
+        documents, service_context=service_context
+    )
     query_engine = index.as_query_engine()
     response = query_engine.query("What did the author do growing up?")
     print(response)
@@ -61,7 +63,9 @@ The `ServiceContext <../module_guides/supporting_modules/service_context.html>`_
     from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
     documents = SimpleDirectoryReader("data").load_data()
-    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+    index = VectorStoreIndex.from_documents(
+        documents, storage_context=storage_context
+    )
     query_engine = index.as_query_engine()
     response = query_engine.query("What did the author do growing up?")
     print(response)
diff --git a/docs/module_guides/deploying/agents/tools/usage_pattern.md b/docs/module_guides/deploying/agents/tools/usage_pattern.md
index a7f687848a3a5191f7a9884521ee26e681cb9865..03091a1e58f0c52f4aeebbfb506fb950945c100f 100644
--- a/docs/module_guides/deploying/agents/tools/usage_pattern.md
+++ b/docs/module_guides/deploying/agents/tools/usage_pattern.md
@@ -48,6 +48,9 @@ langchain_tools = [t.to_langchain_tool() for t in tools]
 from langchain.agents import initialize_agent
 
 agent_executor = initialize_agent(
-    langchain_tools, llm, agent="conversational-react-description", memory=memory
+    langchain_tools,
+    llm,
+    agent="conversational-react-description",
+    memory=memory,
 )
 ```
diff --git a/docs/module_guides/deploying/agents/usage_pattern.md b/docs/module_guides/deploying/agents/usage_pattern.md
index 3b77b84770539916adcc66bfef883832a220b6f9..d41c1281f67312e520f66b55a27b91c2386faaf3 100644
--- a/docs/module_guides/deploying/agents/usage_pattern.md
+++ b/docs/module_guides/deploying/agents/usage_pattern.md
@@ -87,7 +87,8 @@ query_engine_tools = [
     QueryEngineTool(
         query_engine=gmail_agent,
         metadata=ToolMetadata(
-            name="gmail_agent", description="Tool that can send emails on Gmail."
+            name="gmail_agent",
+            description="Tool that can send emails on Gmail.",
         ),
     ),
 ]
@@ -129,7 +130,9 @@ We then define our `FnRetrieverOpenAIAgent`:
 ```python
 from llama_index.agent import FnRetrieverOpenAIAgent
 
-agent = FnRetrieverOpenAIAgent.from_retriever(obj_index.as_retriever(), verbose=True)
+agent = FnRetrieverOpenAIAgent.from_retriever(
+    obj_index.as_retriever(), verbose=True
+)
 ```
 
 ### Context Retrieval Agents
@@ -155,7 +158,9 @@ context_index = VectorStoreIndex.from_documents(docs)
 
 # add context agent
 context_agent = ContextRetrieverOpenAIAgent.from_tools_and_retriever(
-    query_engine_tools, context_index.as_retriever(similarity_top_k=1), verbose=True
+    query_engine_tools,
+    context_index.as_retriever(similarity_top_k=1),
+    verbose=True,
 )
 response = context_agent.chat("What is the YZ of March 2022?")
 ```
@@ -171,7 +176,9 @@ plan over a set of subtools.
 from llama_index.tools import QueryPlanTool
 from llama_index import get_response_synthesizer
 
-response_synthesizer = get_response_synthesizer(service_context=service_context)
+response_synthesizer = get_response_synthesizer(
+    service_context=service_context
+)
 query_plan_tool = QueryPlanTool.from_defaults(
     query_engine_tools=[query_tool_sept, query_tool_june, query_tool_march],
     response_synthesizer=response_synthesizer,
@@ -186,5 +193,7 @@ agent = OpenAIAgent.from_tools(
 )
 
 # should output a query plan to call march, june, and september tools
-response = agent.query("Analyze Uber revenue growth in March, June, and September")
+response = agent.query(
+    "Analyze Uber revenue growth in March, June, and September"
+)
 ```
diff --git a/docs/module_guides/deploying/chat_engines/usage_pattern.md b/docs/module_guides/deploying/chat_engines/usage_pattern.md
index 29f3f4ec02297fd7b1031e7082ae8ff0d1e467b5..ec4d20ce258f8a0fd6f6027016a2ca14406d6a61 100644
--- a/docs/module_guides/deploying/chat_engines/usage_pattern.md
+++ b/docs/module_guides/deploying/chat_engines/usage_pattern.md
@@ -71,7 +71,9 @@ Here's an example where we configure the following:
 ```python
 from llama_index.prompts import PromptTemplate
 from llama_index.llms import ChatMessage, MessageRole
-from llama_index.chat_engine.condense_question import CondenseQuestionChatEngine
+from llama_index.chat_engine.condense_question import (
+    CondenseQuestionChatEngine,
+)
 
 custom_prompt = PromptTemplate(
     """\
diff --git a/docs/module_guides/deploying/query_engine/usage_pattern.md b/docs/module_guides/deploying/query_engine/usage_pattern.md
index a106e749872539f67cef888ba559d282390c05c3..1b46cab76eed72b88298e0f081f321d88394ba87 100644
--- a/docs/module_guides/deploying/query_engine/usage_pattern.md
+++ b/docs/module_guides/deploying/query_engine/usage_pattern.md
@@ -108,7 +108,10 @@ You can also define a custom query engine. Simply subclass the `CustomQueryEngin
 ```python
 from llama_index.query_engine import CustomQueryEngine
 from llama_index.retrievers import BaseRetriever
-from llama_index.response_synthesizers import get_response_synthesizer, BaseSynthesizer
+from llama_index.response_synthesizers import (
+    get_response_synthesizer,
+    BaseSynthesizer,
+)
 
 
 class RAGQueryEngine(CustomQueryEngine):
diff --git a/docs/module_guides/evaluating/usage_pattern.md b/docs/module_guides/evaluating/usage_pattern.md
index 3c031af42f232dc1479f37690973e2f93e5dbe86..8b6dae3ab2447cc85ca0b15b9f9b3358c514691a 100644
--- a/docs/module_guides/evaluating/usage_pattern.md
+++ b/docs/module_guides/evaluating/usage_pattern.md
@@ -159,7 +159,9 @@ response = query_engine.query(query)
 response_str = response.response
 for source_node in response.source_nodes:
     eval_result = evaluator.evaluate(
-        query=query, response=response_str, contexts=[source_node.get_content()]
+        query=query,
+        response=response_str,
+        contexts=[source_node.get_content()],
     )
     print(str(eval_result.passing))
 ```
diff --git a/docs/module_guides/evaluating/usage_pattern_retrieval.md b/docs/module_guides/evaluating/usage_pattern_retrieval.md
index 6600f6d4892e58d62efe3bc0fab88958a490ae6a..3696d05d50985855f353589f5eb91cb7acc7dcdb 100644
--- a/docs/module_guides/evaluating/usage_pattern_retrieval.md
+++ b/docs/module_guides/evaluating/usage_pattern_retrieval.md
@@ -17,7 +17,9 @@ retriever_evaluator = RetrieverEvaluator.from_metric_names(
     ["mrr", "hit_rate"], retriever=retriever
 )
 
-retriever_evaluator.evaluate(query="query", expected_ids=["node_id1", "node_id2"])
+retriever_evaluator.evaluate(
+    query="query", expected_ids=["node_id1", "node_id2"]
+)
 ```
 
 ## Building an Evaluation Dataset
@@ -27,7 +29,9 @@ You can manually curate a retrieval evaluation dataset of questions + node id's.
 ```python
 from llama_index.evaluation import generate_question_context_pairs
 
-qa_dataset = generate_question_context_pairs(nodes, llm=llm, num_questions_per_chunk=2)
+qa_dataset = generate_question_context_pairs(
+    nodes, llm=llm, num_questions_per_chunk=2
+)
 ```
 
 The returned result is a `EmbeddingQAFinetuneDataset` object (containing `queries`, `relevant_docs`, and `corpus`).
diff --git a/docs/module_guides/indexing/composability.md b/docs/module_guides/indexing/composability.md
index 3f2afe255c11c4169ddc4139e2277b676a2d9077..c5e43b290bf87ed770b4140aecf1b73a25d139c5 100644
--- a/docs/module_guides/indexing/composability.md
+++ b/docs/module_guides/indexing/composability.md
@@ -49,7 +49,9 @@ You may choose to manually specify the summary text, or use LlamaIndex itself to
 a summary, for instance with the following:
 
 ```python
-summary = index1.query("What is a summary of this document?", retriever_mode="all_leaf")
+summary = index1.query(
+    "What is a summary of this document?", retriever_mode="all_leaf"
+)
 index1_summary = str(summary)
 ```
 
diff --git a/docs/module_guides/indexing/document_management.md b/docs/module_guides/indexing/document_management.md
index 0abfc8898a86ea76c896439d558f9978e93e6696..acbf217122aefb2436a66d70b98341f0183b7d43 100644
--- a/docs/module_guides/indexing/document_management.md
+++ b/docs/module_guides/indexing/document_management.md
@@ -48,7 +48,8 @@ If a Document is already present within an index, you can "update" a Document wi
 # NOTE: the document has a `doc_id` specified
 doc_chunks[0].text = "Brand new document text"
 index.update_ref_doc(
-    doc_chunks[0], update_kwargs={"delete_kwargs": {"delete_from_docstore": True}}
+    doc_chunks[0],
+    update_kwargs={"delete_kwargs": {"delete_from_docstore": True}},
 )
 ```
 
@@ -68,7 +69,10 @@ doc_chunks[0] = Document(text="Super new document text", id_="doc_id_0")
 
 # add a new document
 doc_chunks.append(
-    Document(text="This isn't in the index yet, but it will be soon!", id_="doc_id_3")
+    Document(
+        text="This isn't in the index yet, but it will be soon!",
+        id_="doc_id_3",
+    )
 )
 
 # refresh the index
diff --git a/docs/module_guides/loading/documents_and_nodes/usage_documents.md b/docs/module_guides/loading/documents_and_nodes/usage_documents.md
index 41195db50412b547fa966ae2922fe0c496b5663c..28ac753489c923b7e7ec59e9866413569b3dad83 100644
--- a/docs/module_guides/loading/documents_and_nodes/usage_documents.md
+++ b/docs/module_guides/loading/documents_and_nodes/usage_documents.md
@@ -43,7 +43,8 @@ There are a few ways to set up this dictionary:
 
 ```python
 document = Document(
-    text="text", metadata={"filename": "<doc_file_name>", "category": "<category>"}
+    text="text",
+    metadata={"filename": "<doc_file_name>", "category": "<category>"},
 )
 ```
 
@@ -61,7 +62,9 @@ from llama_index import SimpleDirectoryReader
 filename_fn = lambda filename: {"file_name": filename}
 
 # automatically sets the metadata of each document according to filename_fn
-documents = SimpleDirectoryReader("./data", file_metadata=filename_fn).load_data()
+documents = SimpleDirectoryReader(
+    "./data", file_metadata=filename_fn
+).load_data()
 ```
 
 ### Customizing the id
@@ -158,7 +161,10 @@ document = Document(
     text_template="Metadata: {metadata_str}\n-----\nContent: {content}",
 )
 
-print("The LLM sees this: \n", document.get_content(metadata_mode=MetadataMode.LLM))
+print(
+    "The LLM sees this: \n",
+    document.get_content(metadata_mode=MetadataMode.LLM),
+)
 print(
     "The Embedding model sees this: \n",
     document.get_content(metadata_mode=MetadataMode.EMBED),
diff --git a/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md b/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md
index 9bb6cb7c85ede8b0a1e8bb984ea44b5d3973c024..17372550df69432ad36d3e70f9949abab59458f2 100644
--- a/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md
+++ b/docs/module_guides/loading/documents_and_nodes/usage_metadata_extractor.md
@@ -19,7 +19,9 @@ from llama_index.node_parser.extractors import (
 )
 from llama_index.text_splitter import TokenTextSplitter
 
-text_splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128)
+text_splitter = TokenTextSplitter(
+    separator=" ", chunk_size=512, chunk_overlap=128
+)
 metadata_extractor = MetadataExtractor(
     extractors=[
         TitleExtractor(nodes=5),
diff --git a/docs/module_guides/loading/documents_and_nodes/usage_nodes.md b/docs/module_guides/loading/documents_and_nodes/usage_nodes.md
index 6bebdce04ed147a37cd7008d8425cdcc48a37185..643267b291bc87276e985d84ae51261342730573 100644
--- a/docs/module_guides/loading/documents_and_nodes/usage_nodes.md
+++ b/docs/module_guides/loading/documents_and_nodes/usage_nodes.md
@@ -23,8 +23,12 @@ from llama_index.schema import TextNode, NodeRelationship, RelatedNodeInfo
 node1 = TextNode(text="<text_chunk>", id_="<node_id>")
 node2 = TextNode(text="<text_chunk>", id_="<node_id>")
 # set relationships
-node1.relationships[NodeRelationship.NEXT] = RelatedNodeInfo(node_id=node2.node_id)
-node2.relationships[NodeRelationship.PREVIOUS] = RelatedNodeInfo(node_id=node1.node_id)
+node1.relationships[NodeRelationship.NEXT] = RelatedNodeInfo(
+    node_id=node2.node_id
+)
+node2.relationships[NodeRelationship.PREVIOUS] = RelatedNodeInfo(
+    node_id=node1.node_id
+)
 nodes = [node1, node2]
 ```
 
diff --git a/docs/module_guides/loading/node_parsers/root.md b/docs/module_guides/loading/node_parsers/root.md
index 6a04db518c629d0fdce8190cd70c3f240d5a460c..946db9a2b498c99d0909eb9eb3bded6e546f2edb 100644
--- a/docs/module_guides/loading/node_parsers/root.md
+++ b/docs/module_guides/loading/node_parsers/root.md
@@ -42,7 +42,9 @@ documents = SimpleDirectoryReader("./data").load_data()
 node_parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=20)
 service_context = ServiceContext.from_defaults(node_parser=node_parser)
 
-index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+index = VectorStoreIndex.from_documents(
+    documents, service_context=service_context
+)
 ```
 
 ## Customization
diff --git a/docs/module_guides/models/embeddings.md b/docs/module_guides/models/embeddings.md
index dcafee735f67ffb93973817bb08642e3ff11e905..63af416cf030d06e16724bad91640c9e496ae6f9 100644
--- a/docs/module_guides/models/embeddings.md
+++ b/docs/module_guides/models/embeddings.md
@@ -91,7 +91,9 @@ To configure the model used (from Hugging Face hub), add the model name separate
 ```python
 from llama_index import ServiceContext
 
-service_context = ServiceContext.from_defaults(embed_model="local:BAAI/bge-large-en")
+service_context = ServiceContext.from_defaults(
+    embed_model="local:BAAI/bge-large-en"
+)
 ```
 
 ### HuggingFace Optimum ONNX Embeddings
@@ -109,7 +111,9 @@ Creation with specifying the model and output path:
 ```python
 from llama_index.embeddings import OptimumEmbedding
 
-OptimumEmbedding.create_and_save_optimum_model("BAAI/bge-small-en-v1.5", "./bge_onnx")
+OptimumEmbedding.create_and_save_optimum_model(
+    "BAAI/bge-small-en-v1.5", "./bge_onnx"
+)
 ```
 
 And then usage:
@@ -179,7 +183,9 @@ class InstructorEmbeddings(BaseEmbedding):
 You can also use embeddings as a standalone module for your project, existing application, or general testing and exploration.
 
 ```python
-embeddings = embed_model.get_text_embedding("It is raining cats and dogs here!")
+embeddings = embed_model.get_text_embedding(
+    "It is raining cats and dogs here!"
+)
 ```
 
 ## Modules
diff --git a/docs/module_guides/models/llms/usage_custom.md b/docs/module_guides/models/llms/usage_custom.md
index 50437883201029f08a1bb34f0dcfe164e94b3339..5bc619cb2aab53d2bdcbbc2813a0311462aa17f3 100644
--- a/docs/module_guides/models/llms/usage_custom.md
+++ b/docs/module_guides/models/llms/usage_custom.md
@@ -39,11 +39,15 @@ llm = OpenAI(temperature=0.1, model="gpt-4")
 service_context = ServiceContext.from_defaults(llm=llm)
 
 # build index
-index = KeywordTableIndex.from_documents(documents, service_context=service_context)
+index = KeywordTableIndex.from_documents(
+    documents, service_context=service_context
+)
 
 # get response from query
 query_engine = index.as_query_engine()
-response = query_engine.query("What did the author do after his time at Y Combinator?")
+response = query_engine.query(
+    "What did the author do after his time at Y Combinator?"
+)
 ```
 
 ## Example: Changing the number of output tokens (for OpenAI, Cohere, AI21)
@@ -55,7 +59,11 @@ For OpenAI, Cohere, AI21, you just need to set the `max_tokens` parameter
 (or maxTokens for AI21). We will handle text chunking/calculations under the hood.
 
 ```python
-from llama_index import KeywordTableIndex, SimpleDirectoryReader, ServiceContext
+from llama_index import (
+    KeywordTableIndex,
+    SimpleDirectoryReader,
+    ServiceContext,
+)
 from llama_index.llms import OpenAI
 
 documents = SimpleDirectoryReader("data").load_data()
@@ -70,7 +78,11 @@ service_context = ServiceContext.from_defaults(llm=llm)
 If you are using other LLM classes from langchain, you may need to explicitly configure the `context_window` and `num_output` via the `ServiceContext` since the information is not available by default.
 
 ```python
-from llama_index import KeywordTableIndex, SimpleDirectoryReader, ServiceContext
+from llama_index import (
+    KeywordTableIndex,
+    SimpleDirectoryReader,
+    ServiceContext,
+)
 from llama_index.llms import OpenAI
 
 # alternatively
@@ -203,20 +215,26 @@ class OurLLM(CustomLLM):
     def metadata(self) -> LLMMetadata:
         """Get LLM metadata."""
         return LLMMetadata(
-            context_window=context_window, num_output=num_output, model_name=model_name
+            context_window=context_window,
+            num_output=num_output,
+            model_name=model_name,
         )
 
     @llm_completion_callback()
     def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
         prompt_length = len(prompt)
-        response = pipeline(prompt, max_new_tokens=num_output)[0]["generated_text"]
+        response = pipeline(prompt, max_new_tokens=num_output)[0][
+            "generated_text"
+        ]
 
         # only return newly generated tokens
         text = response[prompt_length:]
         return CompletionResponse(text=text)
 
     @llm_completion_callback()
-    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
+    def stream_complete(
+        self, prompt: str, **kwargs: Any
+    ) -> CompletionResponseGen:
         raise NotImplementedError()
 
 
diff --git a/docs/module_guides/models/llms/usage_standalone.md b/docs/module_guides/models/llms/usage_standalone.md
index 9f07b3348aced3e8c6365de935e41ae0c1228edd..2aed9aed88a25f10f3d9a016c083a9b97886bded 100644
--- a/docs/module_guides/models/llms/usage_standalone.md
+++ b/docs/module_guides/models/llms/usage_standalone.md
@@ -26,7 +26,9 @@ for delta in resp:
 from llama_index.llms import ChatMessage, OpenAI
 
 messages = [
-    ChatMessage(role="system", content="You are a pirate with a colorful personality"),
+    ChatMessage(
+        role="system", content="You are a pirate with a colorful personality"
+    ),
     ChatMessage(role="user", content="What is your name"),
 ]
 resp = OpenAI().chat(messages)
diff --git a/docs/module_guides/models/prompts/usage_pattern.md b/docs/module_guides/models/prompts/usage_pattern.md
index 56f85e3762d9acb67f93e97253ea15c04c1fdfbc..f532e03d6ed36ed617d2df629af36455742ee0d0 100644
--- a/docs/module_guides/models/prompts/usage_pattern.md
+++ b/docs/module_guides/models/prompts/usage_pattern.md
@@ -102,7 +102,9 @@ qa_prompt_tmpl_str = (
 )
 qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
 
-query_engine.update_prompts({"response_synthesizer:text_qa_template": qa_prompt_tmpl})
+query_engine.update_prompts(
+    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
+)
 ```
 
 #### Modify prompts used in query engine
diff --git a/docs/module_guides/observability/callbacks/token_counting_migration.md b/docs/module_guides/observability/callbacks/token_counting_migration.md
index d2b03abb02821185298bd4cec3a2d67ff4d96f5a..de7df75239d79db6737ddf7206a5f487692b73ce 100644
--- a/docs/module_guides/observability/callbacks/token_counting_migration.md
+++ b/docs/module_guides/observability/callbacks/token_counting_migration.md
@@ -25,12 +25,16 @@ token_counter = TokenCountingHandler(
 
 callback_manager = CallbackManager([token_counter])
 
-service_context = ServiceContext.from_defaults(callback_manager=callback_manager)
+service_context = ServiceContext.from_defaults(
+    callback_manager=callback_manager
+)
 
 document = SimpleDirectoryReader("./data").load_data()
 
 # if verbose is turned on, you will see embedding token usage printed
-index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+index = VectorStoreIndex.from_documents(
+    documents, service_context=service_context
+)
 
 # otherwise, you can access the count directly
 print(token_counter.total_embedding_token_count)
diff --git a/docs/module_guides/querying/node_postprocessors/root.md b/docs/module_guides/querying/node_postprocessors/root.md
index c0c40b79ee9016bc06314db1af727a805951ce49..84bc86b596413d42d2689802e1f9740b595a2722 100644
--- a/docs/module_guides/querying/node_postprocessors/root.md
+++ b/docs/module_guides/querying/node_postprocessors/root.md
@@ -46,7 +46,9 @@ index = VectorStoreIndex.from_documents(documents)
 
 query_engine = index.as_query_engine(
     node_postprocessors=[
-        TimeWeightedPostprocessor(time_decay=0.5, time_access_refresh=False, top_k=1)
+        TimeWeightedPostprocessor(
+            time_decay=0.5, time_access_refresh=False, top_k=1
+        )
     ]
 )
 
diff --git a/docs/module_guides/querying/output_parser.md b/docs/module_guides/querying/output_parser.md
index 5cc242a6a070a47a767e17ce001f2bb394429015..60fc812f637f77c3d7d8e5d1389373049b0fd2ff 100644
--- a/docs/module_guides/querying/output_parser.md
+++ b/docs/module_guides/querying/output_parser.md
@@ -115,12 +115,15 @@ response_schemas = [
         description="Describes the author's educational experience/background.",
     ),
     ResponseSchema(
-        name="Work", description="Describes the author's work experience/background."
+        name="Work",
+        description="Describes the author's work experience/background.",
     ),
 ]
 
 # define output parser
-lc_output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+lc_output_parser = StructuredOutputParser.from_response_schemas(
+    response_schemas
+)
 output_parser = LangchainOutputParser(lc_output_parser)
 
 # format each prompt with output parser instructions
diff --git a/docs/module_guides/querying/response_synthesizers/response_synthesizers.md b/docs/module_guides/querying/response_synthesizers/response_synthesizers.md
index 66275a3d94bd489d038bf1999b8869472182d370..b75cdb3f090666c5934742abe66f2f47a3528fb0 100644
--- a/docs/module_guides/querying/response_synthesizers/response_synthesizers.md
+++ b/docs/module_guides/querying/response_synthesizers/response_synthesizers.md
@@ -32,14 +32,20 @@ response_synthesizer = get_response_synthesizer(
 response = response_synthesizer.synthesize(
     "query string",
     nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
-    additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
+    additional_source_nodes=[
+        NodeWithScore(node=Node(text="text"), score=1.0),
+        ...,
+    ],
 )
 
 # asynchronous
 response = await response_synthesizer.asynthesize(
     "query string",
     nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
-    additional_source_nodes=[NodeWithScore(node=Node(text="text"), score=1.0), ...],
+    additional_source_nodes=[
+        NodeWithScore(node=Node(text="text"), score=1.0),
+        ...,
+    ],
 )
 ```
 
diff --git a/docs/module_guides/querying/response_synthesizers/root.md b/docs/module_guides/querying/response_synthesizers/root.md
index d777736320c94d58d529499a7fd3475166c5b76f..8ac2b388dfeec9c599e90618cad9eb1bc889a6b3 100644
--- a/docs/module_guides/querying/response_synthesizers/root.md
+++ b/docs/module_guides/querying/response_synthesizers/root.md
@@ -18,11 +18,18 @@ Use a response synthesizer on it's own:
 
 ```python
 from llama_index.schema import Node
-from llama_index.response_synthesizers import ResponseMode, get_response_synthesizer
+from llama_index.response_synthesizers import (
+    ResponseMode,
+    get_response_synthesizer,
+)
 
-response_synthesizer = get_response_synthesizer(response_mode=ResponseMode.COMPACT)
+response_synthesizer = get_response_synthesizer(
+    response_mode=ResponseMode.COMPACT
+)
 
-response = response_synthesizer.synthesize("query text", nodes=[Node(text="text"), ...])
+response = response_synthesizer.synthesize(
+    "query text", nodes=[Node(text="text"), ...]
+)
 ```
 
 Or in a query engine after you've created an index:
@@ -136,7 +143,9 @@ class BaseSynthesizer(ABC):
         streaming: bool = False,
     ) -> None:
         """Init params."""
-        self._service_context = service_context or ServiceContext.from_defaults()
+        self._service_context = (
+            service_context or ServiceContext.from_defaults()
+        )
         self._callback_manager = self._service_context.callback_manager
         self._streaming = streaming
 
diff --git a/docs/module_guides/querying/router/root.md b/docs/module_guides/querying/router/root.md
index 689b6ccb5df70fc9f03b4fa5b75f6c64aacd48f9..b06737d7ea5c57592b4d8bfa6291cd42765e213a 100644
--- a/docs/module_guides/querying/router/root.md
+++ b/docs/module_guides/querying/router/root.md
@@ -63,7 +63,10 @@ We also highlight using our router as a standalone module.
 Some examples are given below with LLM and Pydantic based single/multi selectors:
 
 ```python
-from llama_index.selectors.llm_selectors import LLMSingleSelector, LLMMultiSelector
+from llama_index.selectors.llm_selectors import (
+    LLMSingleSelector,
+    LLMMultiSelector,
+)
 from llama_index.selectors.pydantic_selectors import (
     PydanticMultiSelector,
     PydanticSingleSelector,
@@ -88,7 +91,10 @@ A `RouterQueryEngine` is composed on top of other query engines as tools.
 
 ```python
 from llama_index.query_engine.router_query_engine import RouterQueryEngine
-from llama_index.selectors.pydantic_selectors import PydanticSingleSelector, Pydantic
+from llama_index.selectors.pydantic_selectors import (
+    PydanticSingleSelector,
+    Pydantic,
+)
 from llama_index.tools.query_engine import QueryEngineTool
 from llama_index import (
     VectorStoreIndex,
@@ -171,7 +177,10 @@ choices = [
 ]
 
 # choices as a list of strings
-choices = ["choice 1 - description for choice 1", "choice 2: description for choice 2"]
+choices = [
+    "choice 1 - description for choice 1",
+    "choice 2: description for choice 2",
+]
 
 selector = LLMSingleSelector.from_defaults()
 selector_result = selector.select(
diff --git a/docs/module_guides/storing/customization.md b/docs/module_guides/storing/customization.md
index 3ff9c7a8d818795e742223863cb1466145a3872d..ac6e1ad677ffc547cc43cf3c5d73889fd5adcbb1 100644
--- a/docs/module_guides/storing/customization.md
+++ b/docs/module_guides/storing/customization.md
@@ -111,7 +111,9 @@ from llama_index.vector_stores import PineconeVectorStore
 # Creating a Pinecone index
 api_key = "api_key"
 pinecone.init(api_key=api_key, environment="us-west1-gcp")
-pinecone.create_index("quickstart", dimension=1536, metric="euclidean", pod_type="p1")
+pinecone.create_index(
+    "quickstart", dimension=1536, metric="euclidean", pod_type="p1"
+)
 index = pinecone.Index("quickstart")
 
 # construct vector store
@@ -124,7 +126,9 @@ storage_context = StorageContext.from_defaults(vector_store=vector_store)
 documents = SimpleDirectoryReader("./data").load_data()
 
 # create index, which will insert documents/vectors to pinecone
-index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+index = VectorStoreIndex.from_documents(
+    documents, storage_context=storage_context
+)
 ```
 
 If you have an existing vector store with data already loaded in,
diff --git a/docs/module_guides/storing/save_load.md b/docs/module_guides/storing/save_load.md
index da5c6f23b2300289645b1e6c679bd55222049cbd..0868cc7f480e42f947ae16f54333ed17845ffdef 100644
--- a/docs/module_guides/storing/save_load.md
+++ b/docs/module_guides/storing/save_load.md
@@ -22,7 +22,9 @@ To load data, user simply needs to re-create the storage context using the same
 ```python
 storage_context = StorageContext.from_defaults(
     docstore=SimpleDocumentStore.from_persist_dir(persist_dir="<persist_dir>"),
-    vector_store=SimpleVectorStore.from_persist_dir(persist_dir="<persist_dir>"),
+    vector_store=SimpleVectorStore.from_persist_dir(
+        persist_dir="<persist_dir>"
+    ),
     index_store=SimpleIndexStore.from_persist_dir(persist_dir="<persist_dir>"),
 )
 ```
diff --git a/docs/module_guides/storing/storing.md b/docs/module_guides/storing/storing.md
index 32275d18d7fbf65a873607eb161f2d36b2b3d7e3..df43986e703a2bf84f5d0605898221012b11a9ed 100644
--- a/docs/module_guides/storing/storing.md
+++ b/docs/module_guides/storing/storing.md
@@ -35,7 +35,9 @@ from llama_index.vector_stores import DeepLakeVectorStore
 vector_store = DeepLakeVectorStore(dataset_path="<dataset_path>")
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
 # Load documents and build index
-index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+index = VectorStoreIndex.from_documents(
+    documents, storage_context=storage_context
+)
 
 
 ## reload an existing one
diff --git a/docs/module_guides/supporting_modules/service_context.md b/docs/module_guides/supporting_modules/service_context.md
index 6017f52ff6cdbbca16b4670229c906ccde37b493..13c788abb62c98cb7e38aef5ffef31c77b545c29 100644
--- a/docs/module_guides/supporting_modules/service_context.md
+++ b/docs/module_guides/supporting_modules/service_context.md
@@ -67,7 +67,12 @@ for convenience (so you don't have to manually construct them).
 Here's a complete example that sets up all objects using their default settings:
 
 ```python
-from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper
+from llama_index import (
+    ServiceContext,
+    LLMPredictor,
+    OpenAIEmbedding,
+    PromptHelper,
+)
 from llama_index.llms import OpenAI
 from llama_index.text_splitter import TokenTextSplitter
 from llama_index.node_parser import SimpleNodeParser
@@ -78,7 +83,10 @@ node_parser = SimpleNodeParser.from_defaults(
     text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)
 )
 prompt_helper = PromptHelper(
-    context_window=4096, num_output=256, chunk_overlap_ratio=0.1, chunk_size_limit=None
+    context_window=4096,
+    num_output=256,
+    chunk_overlap_ratio=0.1,
+    chunk_size_limit=None,
 )
 
 service_context = ServiceContext.from_defaults(
diff --git a/docs/optimizing/advanced_retrieval/query_transformations.md b/docs/optimizing/advanced_retrieval/query_transformations.md
index 68e6512d4ef0671d0f9771af662fe22680b2d1fc..c559fe2fad55e1c67af3436c1e1b97e11c1ffc9f 100644
--- a/docs/optimizing/advanced_retrieval/query_transformations.md
+++ b/docs/optimizing/advanced_retrieval/query_transformations.md
@@ -28,7 +28,9 @@ To use HyDE, an example code snippet is shown below.
 ```python
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
 from llama_index.indices.query.query_transform.base import HyDEQueryTransform
-from llama_index.query_engine.transform_query_engine import TransformQueryEngine
+from llama_index.query_engine.transform_query_engine import (
+    TransformQueryEngine,
+)
 
 # load documents, build index
 documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
@@ -65,9 +67,13 @@ Here's a corresponding example code snippet over a composed graph.
 ```python
 # Setting: a summary index composed over multiple vector indices
 # llm_predictor_chatgpt corresponds to the ChatGPT LLM interface
-from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
+from llama_index.indices.query.query_transform.base import (
+    DecomposeQueryTransform,
+)
 
-decompose_transform = DecomposeQueryTransform(llm_predictor_chatgpt, verbose=True)
+decompose_transform = DecomposeQueryTransform(
+    llm_predictor_chatgpt, verbose=True
+)
 
 # initialize indexes and graph
 ...
@@ -83,7 +89,9 @@ vector_query_engine = TransformQueryEngine(
 custom_query_engines = {vector_index.index_id: vector_query_engine}
 
 # query
-query_str = "Compare and contrast the airports in Seattle, Houston, and Toronto. "
+query_str = (
+    "Compare and contrast the airports in Seattle, Houston, and Toronto. "
+)
 query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)
 response = query_engine.query(query_str)
 ```
@@ -104,10 +112,14 @@ An example image is shown below.
 Here's a corresponding example code snippet.
 
 ```python
-from llama_index.indices.query.query_transform.base import StepDecomposeQueryTransform
+from llama_index.indices.query.query_transform.base import (
+    StepDecomposeQueryTransform,
+)
 
 # gpt-4
-step_decompose_transform = StepDecomposeQueryTransform(llm_predictor, verbose=True)
+step_decompose_transform = StepDecomposeQueryTransform(
+    llm_predictor, verbose=True
+)
 
 query_engine = index.as_query_engine()
 query_engine = MultiStepQueryEngine(
diff --git a/docs/optimizing/basic_strategies/basic_strategies.md b/docs/optimizing/basic_strategies/basic_strategies.md
index c73b23963c84fd4275059ad6ae0c9e335790ebb1..0ebeb3b049daeb8ce7847f6036ccb31fac2886d8 100644
--- a/docs/optimizing/basic_strategies/basic_strategies.md
+++ b/docs/optimizing/basic_strategies/basic_strategies.md
@@ -119,7 +119,9 @@ documents = [
     Document(text="text", metadata={"author": "John Doe"}),
 ]
 
-filters = MetadataFilters(filters=[ExactMatchFilter(key="author", value="John Doe")])
+filters = MetadataFilters(
+    filters=[ExactMatchFilter(key="author", value="John Doe")]
+)
 
 index = VectorStoreIndex.from_documents(documents)
 query_engine = index.as_query_engine(filters=filters)
diff --git a/docs/understanding/evaluating/cost_analysis/usage_pattern.md b/docs/understanding/evaluating/cost_analysis/usage_pattern.md
index 4abc5d09b8fe909f8d598241dacb07ff1f31df5e..9d77ad34f46771aadb78688605fabb039462609b 100644
--- a/docs/understanding/evaluating/cost_analysis/usage_pattern.md
+++ b/docs/understanding/evaluating/cost_analysis/usage_pattern.md
@@ -44,7 +44,9 @@ set_global_service_context(
 ```python
 from llama_index import VectorStoreIndex, SimpleDirectoryReader
 
-documents = SimpleDirectoryReader("./docs/examples/data/paul_graham").load_data()
+documents = SimpleDirectoryReader(
+    "./docs/examples/data/paul_graham"
+).load_data()
 
 index = VectorStoreIndex.from_documents(documents)
 ```
diff --git a/docs/understanding/loading/loading.md b/docs/understanding/loading/loading.md
index 4d9ce537b7e998b5c6c77c261bb4e6bf6f107d37..5086c03345d12e16cca33a7f264bd22dc436437c 100644
--- a/docs/understanding/loading/loading.md
+++ b/docs/understanding/loading/loading.md
@@ -59,7 +59,9 @@ documents = SimpleDirectoryReader("./data").load_data()
 node_parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=10)
 service_context = ServiceContext.from_defaults(node_parser=node_parser)
 
-index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+index = VectorStoreIndex.from_documents(
+    documents, service_context=service_context
+)
 ```
 
 ```{tip}
@@ -87,7 +89,8 @@ When creating documents, you can also attach useful metadata that can be used at
 
 ```python
 document = Document(
-    text="text", metadata={"filename": "<doc_file_name>", "category": "<category>"}
+    text="text",
+    metadata={"filename": "<doc_file_name>", "category": "<category>"},
 )
 ```
 
diff --git a/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md b/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md
index f5a899b14943298a04194f2ac82d5160a17f0c04..a6bfa25154fa5ab326a56e4524a280488e64f9c0 100644
--- a/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md
+++ b/docs/understanding/putting_it_all_together/apps/fullstack_app_guide.md
@@ -83,7 +83,10 @@ def query_index():
     global index
     query_text = request.args.get("text", None)
     if query_text is None:
-        return "No text found, please include a ?text=blah parameter in the URL", 400
+        return (
+            "No text found, please include a ?text=blah parameter in the URL",
+            400,
+        )
     query_engine = index.as_query_engine()
     response = query_engine.query(query_text)
     return str(response), 200
@@ -177,7 +180,10 @@ def query_index():
     global index
     query_text = request.args.get("text", None)
     if query_text is None:
-        return "No text found, please include a ?text=blah parameter in the URL", 400
+        return (
+            "No text found, please include a ?text=blah parameter in the URL",
+            400,
+        )
     response = manager.query_index(query_text)._getvalue()
     return str(response), 200
 
diff --git a/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md b/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md
index 2b8f1c6ad51a37a4d453454b9195b91898e1dacd..1ddea915affb43da414829cd07e35f9e5bfed5a6 100644
--- a/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md
+++ b/docs/understanding/putting_it_all_together/apps/fullstack_with_delphic.md
@@ -157,7 +157,9 @@ async def create_collection(
     response=CollectionQueryOutput,
     summary="Ask a question of a document collection",
 )
-def query_collection_view(request: HttpRequest, query_input: CollectionQueryInput):
+def query_collection_view(
+    request: HttpRequest, query_input: CollectionQueryInput
+):
     collection_id = query_input.collection_id
     query_str = query_input.query_str
     response = query_collection(collection_id, query_str)
@@ -307,7 +309,9 @@ async def receive(self, text_data):
 
         markdown_response = f"## Response\n\n{response}\n\n"
         if response.source_nodes:
-            markdown_sources = f"## Sources\n\n{response.get_formatted_sources()}"
+            markdown_sources = (
+                f"## Sources\n\n{response.get_formatted_sources()}"
+            )
         else:
             markdown_sources = ""
 
@@ -316,7 +320,9 @@ async def receive(self, text_data):
         await self.send(json.dumps({"response": formatted_response}, indent=4))
     else:
         await self.send(
-            json.dumps({"error": "No index loaded for this connection."}, indent=4)
+            json.dumps(
+                {"error": "No index loaded for this connection."}, indent=4
+            )
         )
 ```
 
@@ -358,7 +364,9 @@ async def load_collection_model(collection_id: str | int) -> VectorStoreIndex:
         if not cache_file_path.exists():
             cache_dir.mkdir(parents=True, exist_ok=True)
             with collection.model.open("rb") as model_file:
-                with cache_file_path.open("w+", encoding="utf-8") as cache_file:
+                with cache_file_path.open(
+                    "w+", encoding="utf-8"
+                ) as cache_file:
                     cache_file.write(model_file.read().decode("utf-8"))
 
         # define LLM
diff --git a/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md b/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md
index 3e1ac4c2da11b7e12f970009b07a88081a5506be..1c7f58118773aeffb64bd9e34593faceb0a49ed4 100644
--- a/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md
+++ b/docs/understanding/putting_it_all_together/chatbots/building_a_chatbot.md
@@ -98,7 +98,9 @@ from llama_index import load_index_from_storage
 
 index_set = {}
 for year in years:
-    storage_context = StorageContext.from_defaults(persist_dir=f"./storage/{year}")
+    storage_context = StorageContext.from_defaults(
+        persist_dir=f"./storage/{year}"
+    )
     cur_index = load_index_from_storage(
         storage_context, service_context=service_context
     )
@@ -189,7 +191,9 @@ If we test it with a query regarding the 10-k of a given year, the agent will us
 the relevant vector index Tool.
 
 ```python
-response = agent.chat("What were some of the biggest risk factors in 2020 for Uber?")
+response = agent.chat(
+    "What were some of the biggest risk factors in 2020 for Uber?"
+)
 print(str(response))
 ```
 
diff --git a/docs/understanding/putting_it_all_together/q_and_a.md b/docs/understanding/putting_it_all_together/q_and_a.md
index 8413aefb6a64f34d403f8c3a322011e629014ce6..ac6bee7ffcaa872e493c4acc2f48e3239030a00b 100644
--- a/docs/understanding/putting_it_all_together/q_and_a.md
+++ b/docs/understanding/putting_it_all_together/q_and_a.md
@@ -116,9 +116,13 @@ By default, this uses a `LLMSingleSelector` as the router, which uses the LLM to
 ```python
 from llama_index.query_engine import RouterQueryEngine
 
-query_engine = RouterQueryEngine.from_defaults(query_engine_tools=[tool1, tool2])
+query_engine = RouterQueryEngine.from_defaults(
+    query_engine_tools=[tool1, tool2]
+)
 
-response = query_engine.query("In Notion, give me a summary of the product roadmap.")
+response = query_engine.query(
+    "In Notion, give me a summary of the product roadmap."
+)
 ```
 
 **Guides**
@@ -131,7 +135,9 @@ response = query_engine.query("In Notion, give me a summary of the product roadm
 You can explicitly perform compare/contrast queries with a **query transformation** module within a ComposableGraph.
 
 ```python
-from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
+from llama_index.indices.query.query_transform.base import (
+    DecomposeQueryTransform,
+)
 
 decompose_transform = DecomposeQueryTransform(
     service_context.llm_predictor, verbose=True
diff --git a/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md b/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md
index a5e7570009502659b3068e56db4627a8d6dfb42a..36f4789d3fd7947620e54615dc17d030ec23962e 100644
--- a/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md
+++ b/docs/understanding/putting_it_all_together/q_and_a/terms_definitions_tutorial.md
@@ -60,7 +60,8 @@ with setup_tab:
         "LLM Temperature", min_value=0.0, max_value=1.0, step=0.1
     )
     term_extract_str = st.text_area(
-        "The query to extract terms and definitions with.", value=DEFAULT_TERM_STR
+        "The query to extract terms and definitions with.",
+        value=DEFAULT_TERM_STR,
     )
 
 with upload_tab:
@@ -95,15 +96,21 @@ from llama_index.llms import OpenAI
 
 def get_llm(llm_name, model_temperature, api_key, max_tokens=256):
     os.environ["OPENAI_API_KEY"] = api_key
-    return OpenAI(temperature=model_temperature, model=llm_name, max_tokens=max_tokens)
+    return OpenAI(
+        temperature=model_temperature, model=llm_name, max_tokens=max_tokens
+    )
 
 
-def extract_terms(documents, term_extract_str, llm_name, model_temperature, api_key):
+def extract_terms(
+    documents, term_extract_str, llm_name, model_temperature, api_key
+):
     llm = get_llm(llm_name, model_temperature, api_key, max_tokens=1024)
 
     service_context = ServiceContext.from_defaults(llm=llm, chunk_size=1024)
 
-    temp_index = SummaryIndex.from_documents(documents, service_context=service_context)
+    temp_index = SummaryIndex.from_documents(
+        documents, service_context=service_context
+    )
     query_engine = temp_index.as_query_engine(response_mode="tree_summarize")
     terms_definitions = str(query_engine.query(term_extract_str))
     terms_definitions = [
@@ -471,7 +478,8 @@ with upload_tab:
             "Either upload an image/screenshot of a document, or enter the text manually."
         )
         uploaded_file = st.file_uploader(
-            "Upload an image/screenshot of a document:", type=["png", "jpg", "jpeg"]
+            "Upload an image/screenshot of a document:",
+            type=["png", "jpg", "jpeg"],
         )
         document_text = st.text_area("Or enter raw text")
         if st.button("Extract Terms and Definitions") and (
diff --git a/docs/understanding/putting_it_all_together/q_and_a/unified_query.md b/docs/understanding/putting_it_all_together/q_and_a/unified_query.md
index c1cd7896f0609c496a793bde8bdd224a6c8f3d3b..8de57a374640ad66a0d2997bc15b89f18c2a469b 100644
--- a/docs/understanding/putting_it_all_together/q_and_a/unified_query.md
+++ b/docs/understanding/putting_it_all_together/q_and_a/unified_query.md
@@ -135,7 +135,9 @@ graph = ComposableGraph.from_indices(
 )
 
 # get root index
-root_index = graph.get_index(graph.index_struct.root_id, SimpleKeywordTableIndex)
+root_index = graph.get_index(
+    graph.index_struct.root_id, SimpleKeywordTableIndex
+)
 # set id of root index
 root_index.set_index_id("compare_contrast")
 root_summary = (
@@ -150,12 +152,18 @@ An example is shown below.
 ```python
 # define decompose_transform
 from llama_index import LLMPredictor
-from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
+from llama_index.indices.query.query_transform.base import (
+    DecomposeQueryTransform,
+)
 
-decompose_transform = DecomposeQueryTransform(LLMPredictor(llm=llm_gpt4), verbose=True)
+decompose_transform = DecomposeQueryTransform(
+    LLMPredictor(llm=llm_gpt4), verbose=True
+)
 
 # define custom query engines
-from llama_index.query_engine.transform_query_engine import TransformQueryEngine
+from llama_index.query_engine.transform_query_engine import (
+    TransformQueryEngine,
+)
 
 custom_query_engines = {}
 for index in vector_indices.values():
@@ -209,7 +217,9 @@ for wiki_title in wiki_titles:
     summary = index_summaries[wiki_title]
 
     query_engine = index.as_query_engine(service_context=service_context)
-    vector_tool = QueryEngineTool.from_defaults(query_engine, description=summary)
+    vector_tool = QueryEngineTool.from_defaults(
+        query_engine, description=summary
+    )
     query_engine_tools.append(vector_tool)
 
 
diff --git a/docs/understanding/putting_it_all_together/structured_data.md b/docs/understanding/putting_it_all_together/structured_data.md
index 0e38a4b78ca0cfb70e2ba6e59963533c1797290e..c82f97cb39f75319c53fd37dc9ff1274448c6b48 100644
--- a/docs/understanding/putting_it_all_together/structured_data.md
+++ b/docs/understanding/putting_it_all_together/structured_data.md
@@ -114,7 +114,11 @@ SQLDatabase and produces a Node object for each SQLTableSchema object passed
 into the ObjectIndex constructor.
 
 ```python
-from llama_index.objects import SQLTableNodeMapping, ObjectIndex, SQLTableSchema
+from llama_index.objects import (
+    SQLTableNodeMapping,
+    ObjectIndex,
+    SQLTableSchema,
+)
 
 table_node_mapping = SQLTableNodeMapping(sql_database)
 table_schema_objs = [
diff --git a/docs/understanding/querying/querying.md b/docs/understanding/querying/querying.md
index 70ff0a2605fb5c28535bf293f24780dd7f14c6ac..4790578915e3ecbc8bbca9ef5441e4af94934edd 100644
--- a/docs/understanding/querying/querying.md
+++ b/docs/understanding/querying/querying.md
@@ -118,7 +118,9 @@ After a retriever fetches relevant nodes, a `BaseSynthesizer` synthesizes the fi
 You can configure it via
 
 ```python
-query_engine = RetrieverQueryEngine.from_args(retriever, response_mode=response_mode)
+query_engine = RetrieverQueryEngine.from_args(
+    retriever, response_mode=response_mode
+)
 ```
 
 Right now, we support the following options:
diff --git a/docs/understanding/storing/storing.md b/docs/understanding/storing/storing.md
index 69b870742c2f3e32204bb3cdded3bd30237e387c..e344c7929b5fc7a97887ae0782abaf24942033f1 100644
--- a/docs/understanding/storing/storing.md
+++ b/docs/understanding/storing/storing.md
@@ -75,7 +75,9 @@ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
 
 # create your index
-index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+index = VectorStoreIndex.from_documents(
+    documents, storage_context=storage_context
+)
 
 # create a query engine and query
 query_engine = index.as_query_engine()
diff --git a/docs/understanding/using_llms/using_llms.md b/docs/understanding/using_llms/using_llms.md
index a6d4343f631c1a300dc115ab9a550fb6ca34f5a9..2b89039848e148233ce6276c39516a159c1dd1d9 100644
--- a/docs/understanding/using_llms/using_llms.md
+++ b/docs/understanding/using_llms/using_llms.md
@@ -32,7 +32,9 @@ llm = OpenAI(temperature=0.1, model="gpt-4")
 service_context = ServiceContext.from_defaults(llm=llm)
 
 documents = SimpleDirectoryReader("data").load_data()
-index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+index = VectorStoreIndex.from_documents(
+    documents, service_context=service_context
+)
 ```
 
 In this case, you've instantiated OpenAI and customized it to use the `gpt-4` model instead of the default `gpt-3.5-turbo`, and also modified the `temperature`. The `VectorStoreIndex` will now use gpt-4 to encode or `embed` your documents for later querying.