diff --git a/docs/examples/agent/agent_runner/query_pipeline_agent.ipynb b/docs/examples/agent/agent_runner/query_pipeline_agent.ipynb
index 6197c0843900423889fde65ff689a22c3f8640d6..ba330c8c37de4b14db29aecc60bac50ac2705bff 100644
--- a/docs/examples/agent/agent_runner/query_pipeline_agent.ipynb
+++ b/docs/examples/agent/agent_runner/query_pipeline_agent.ipynb
@@ -16,12 +16,47 @@
     "- a \"simple\" agent that adds a retry layer around a text-to-sql query engine."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "763a442a-cfcd-4e63-9121-e3a45dc3acff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core import SQLDatabase\n",
+    "from sqlalchemy import (\n",
+    "    create_engine,\n",
+    "    MetaData,\n",
+    "    Table,\n",
+    "    Column,\n",
+    "    String,\n",
+    "    Integer,\n",
+    "    select,\n",
+    "    column,\n",
+    ")\n",
+    "\n",
+    "engine = create_engine(\"sqlite:///chinook.db\")\n",
+    "sql_database = SQLDatabase(engine)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0aacdcad-f0c1-40f4-b319-6c4cf3b309c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.query_pipeline import QueryPipeline"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "122dce22-6d3a-4d4a-a265-a6a3d3f90d26",
    "metadata": {},
    "source": [
-    "## Setup Data\n",
+    "## Setup\n",
+    "\n",
+    "### Setup Data\n",
     "\n",
     "We use the chinook database as sample data. [Source](https://www.sqlitetutorial.net/sqlite-sample-database/)."
    ]
@@ -61,46 +96,28 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "763a442a-cfcd-4e63-9121-e3a45dc3acff",
+   "cell_type": "markdown",
+   "id": "419f97cf-63c1-456b-babd-b07d9ce4b937",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "from llama_index.core import SQLDatabase\n",
-    "from sqlalchemy import (\n",
-    "    create_engine,\n",
-    "    MetaData,\n",
-    "    Table,\n",
-    "    Column,\n",
-    "    String,\n",
-    "    Integer,\n",
-    "    select,\n",
-    "    column,\n",
-    ")\n",
+    "### Setup Observability\n",
     "\n",
-    "engine = create_engine(\"sqlite:///chinook.db\")\n",
-    "sql_database = SQLDatabase(engine)"
+    "We setup Arize Phoenix for observability."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0aacdcad-f0c1-40f4-b319-6c4cf3b309c7",
+   "id": "5fb303b7-2fb1-496b-aff8-57844cdc519b",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.core.query_pipeline import QueryPipeline"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "419f97cf-63c1-456b-babd-b07d9ce4b937",
-   "metadata": {},
-   "source": [
-    "### Setup Observability\n",
+    "# define global callback setting\n",
+    "from llama_index.core.settings import Settings\n",
+    "from llama_index.core.callbacks import CallbackManager\n",
     "\n",
-    "We setup Arize Phoenix for observability."
+    "callback_manager = CallbackManager()\n",
+    "Settings.callback_manager = callback_manager"
    ]
   },
   {
@@ -108,17 +125,7 @@
    "execution_count": null,
    "id": "754f11d3-f053-46f7-acb4-ae8ee7d3fe07",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "🌍 To view the Phoenix app in your browser, visit http://127.0.0.1:6006/\n",
-      "📺 To view the Phoenix app in a notebook, run `px.active_session().view()`\n",
-      "📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# setup Arize Phoenix for logging/observability\n",
     "import phoenix as px\n",
@@ -192,7 +199,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.query_pipeline import QueryPipeline as QP\n",
+    "from llama_index.core.query_pipeline import QueryPipeline as QP\n",
     "\n",
     "qp = QP(verbose=True)"
    ]
@@ -491,7 +498,7 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x2b016a920>"
+       "<IPython.lib.display.IFrame at 0x297838880>"
       ]
      },
      "execution_count": null,
@@ -566,10 +573,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "> Running step 1778ae52-0a31-4199-be65-af574fbf70f1. Step input: What are some tracks from the artist AC/DC? Limit it to 3\n",
+      "> Running step edb9926c-7290-4b8d-ac80-1421432a0ea6. Step input: What are some tracks from the artist AC/DC? Limit it to 3\n",
       "\u001b[1;3;38;2;155;135;227m> Running module agent_input with input: \n",
       "state: {'sources': [], 'memory': ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method Encoding.encode of <Encoding 'cl100k_base'>>, allowed_special='all'), chat_store=SimpleChatSto...\n",
-      "task: task_id='0c7b3203-ce9e-496f-9be3-c66425b8eb85' input='What are some tracks from the artist AC/DC? Limit it to 3' memory=ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method ...\n",
+      "task: task_id='b9b747a7-880f-4e91-9eed-b64574cbb6d0' input='What are some tracks from the artist AC/DC? Limit it to 3' memory=ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method ...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module react_prompt with input: \n",
       "input: What are some tracks from the artist AC/DC? Limit it to 3\n",
@@ -580,13 +587,13 @@
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module react_output_parser with input: \n",
       "chat_response: assistant: Thought: I need to use a tool to help me answer the question.\n",
       "Action: sql_tool\n",
-      "Action Input: {\"input\": \"What are some tracks from the artist AC/DC? Limit it to 3\"}\n",
+      "Action Input: {\"input\": \"Select track_name from tracks where artist_name = 'AC/DC' limit 3\"}\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module run_tool with input: \n",
-      "reasoning_step: thought='I need to use a tool to help me answer the question.' action='sql_tool' action_input={'input': 'What are some tracks from the artist AC/DC? Limit it to 3'}\n",
+      "reasoning_step: thought='I need to use a tool to help me answer the question.' action='sql_tool' action_input={'input': \"Select track_name from tracks where artist_name = 'AC/DC' limit 3\"}\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module process_agent_response with input: \n",
-      "response_dict: {'response_str': 'Observation: {\\'output\\': ToolOutput(content=\\'Some tracks from the artist AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let\\\\\\'s Get It Up\".\\', ...\n",
+      "response_dict: {'response_str': 'Observation: {\\'output\\': ToolOutput(content=\\'The top 3 tracks by AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let\\\\\\'s Get It Up\".\\', tool_nam...\n",
       "\n",
       "\u001b[0m"
      ]
@@ -606,10 +613,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "> Running step 07ac733e-d234-4751-bcad-002a655fb2e3. Step input: None\n",
+      "> Running step 37e2312b-540b-4c79-9261-15318d4796d9. Step input: None\n",
       "\u001b[1;3;38;2;155;135;227m> Running module agent_input with input: \n",
       "state: {'sources': [], 'memory': ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method Encoding.encode of <Encoding 'cl100k_base'>>, allowed_special='all'), chat_store=SimpleChatSto...\n",
-      "task: task_id='0c7b3203-ce9e-496f-9be3-c66425b8eb85' input='What are some tracks from the artist AC/DC? Limit it to 3' memory=ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method ...\n",
+      "task: task_id='b9b747a7-880f-4e91-9eed-b64574cbb6d0' input='What are some tracks from the artist AC/DC? Limit it to 3' memory=ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method ...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module react_prompt with input: \n",
       "input: What are some tracks from the artist AC/DC? Limit it to 3\n",
@@ -618,15 +625,15 @@
       "messages: [ChatMessage(role=<MessageRole.SYSTEM: 'system'>, content='\\nYou are designed to help with a variety of tasks, from answering questions     to providing summaries to other types of analyses.\\n\\n## Too...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module react_output_parser with input: \n",
-      "chat_response: assistant: Thought: The user has repeated the request, but it seems they might not have noticed the previous response. I will reiterate the information provided by the tool.\n",
+      "chat_response: assistant: Thought: The user has repeated the request, possibly due to not noticing the previous response. I will provide the information again.\n",
       "\n",
-      "Answer: Some tracks from ...\n",
+      "Answer: The top 3 tracks by AC/DC are \"For Those About...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module process_response with input: \n",
-      "response_step: thought='The user has repeated the request, but it seems they might not have noticed the previous response. I will reiterate the information provided by the tool.' response='Some tracks from the artis...\n",
+      "response_step: thought='The user has repeated the request, possibly due to not noticing the previous response. I will provide the information again.' response='The top 3 tracks by AC/DC are \"For Those About To Rock ...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module process_agent_response with input: \n",
-      "response_dict: {'response_str': 'Some tracks from the artist AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let\\'s Get It Up\".', 'is_done': True}\n",
+      "response_dict: {'response_str': 'The top 3 tracks by AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let\\'s Get It Up\".', 'is_done': True}\n",
       "\n",
       "\u001b[0m"
      ]
@@ -677,7 +684,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Some tracks from the artist AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let's Get It Up\".\n"
+      "The top 3 tracks by AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let's Get It Up\".\n"
      ]
     }
    ],
@@ -695,10 +702,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "> Running step 0b0ed367-3246-442e-a8be-34ee4ef320c4. Step input: What are some tracks from the artist AC/DC? Limit it to 3\n",
+      "> Running step 781d6e78-5bfe-4330-b8fc-3242deb6f64a. Step input: What are some tracks from the artist AC/DC? Limit it to 3\n",
       "\u001b[1;3;38;2;155;135;227m> Running module agent_input with input: \n",
       "state: {'sources': [], 'memory': ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method Encoding.encode of <Encoding 'cl100k_base'>>, allowed_special='all'), chat_store=SimpleChatSto...\n",
-      "task: task_id='d699cfb9-9632-4ea0-ae43-0fe86f623a6e' input='What are some tracks from the artist AC/DC? Limit it to 3' memory=ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method ...\n",
+      "task: task_id='c09dd358-19e8-4fcc-8b82-326783ba4af2' input='What are some tracks from the artist AC/DC? Limit it to 3' memory=ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method ...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module react_prompt with input: \n",
       "input: What are some tracks from the artist AC/DC? Limit it to 3\n",
@@ -715,12 +722,12 @@
       "reasoning_step: thought='I need to use a tool to help me answer the question.' action='sql_tool' action_input={'input': \"SELECT track_name FROM tracks WHERE artist_name = 'AC/DC' LIMIT 3\"}\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module process_agent_response with input: \n",
-      "response_dict: {'response_str': 'Observation: {\\'output\\': ToolOutput(content=\\'The top 3 tracks by AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let\\\\\\'s Get It Up\".\\', tool_nam...\n",
+      "response_dict: {'response_str': 'Observation: {\\'output\\': ToolOutput(content=\\'The top three tracks by AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let\\\\\\'s Get It Up\".\\', tool...\n",
       "\n",
-      "\u001b[0m> Running step 2d670abc-4d8f-4cd6-8ad3-36152340ec8b. Step input: None\n",
+      "\u001b[0m> Running step a65d44a6-7a98-49ec-86ce-eb4b3bcd6a48. Step input: None\n",
       "\u001b[1;3;38;2;155;135;227m> Running module agent_input with input: \n",
       "state: {'sources': [], 'memory': ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method Encoding.encode of <Encoding 'cl100k_base'>>, allowed_special='all'), chat_store=SimpleChatSto...\n",
-      "task: task_id='d699cfb9-9632-4ea0-ae43-0fe86f623a6e' input='What are some tracks from the artist AC/DC? Limit it to 3' memory=ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method ...\n",
+      "task: task_id='c09dd358-19e8-4fcc-8b82-326783ba4af2' input='What are some tracks from the artist AC/DC? Limit it to 3' memory=ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method ...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module react_prompt with input: \n",
       "input: What are some tracks from the artist AC/DC? Limit it to 3\n",
@@ -732,10 +739,10 @@
       "chat_response: assistant: Thought: The user has repeated the request for tracks from the artist AC/DC, limited to 3, despite having already received an answer. It's possible that the user did not see the previous re...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module process_response with input: \n",
-      "response_step: thought=\"The user has repeated the request for tracks from the artist AC/DC, limited to 3, despite having already received an answer. It's possible that the user did not see the previous response or t...\n",
+      "response_step: thought=\"The user has repeated the request for tracks from the artist AC/DC, limited to 3, despite having already received an answer. It's possible that the user did not see the previous response, or ...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module process_agent_response with input: \n",
-      "response_dict: {'response_str': 'The top 3 tracks by AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let\\'s Get It Up\".', 'is_done': True}\n",
+      "response_dict: {'response_str': 'The top three tracks by AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let\\'s Get It Up\".', 'is_done': True}\n",
       "\n",
       "\u001b[0m"
      ]
@@ -759,7 +766,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The top 3 tracks by AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let's Get It Up\".\n"
+      "The top three tracks by AC/DC are \"For Those About To Rock (We Salute You)\", \"Put The Finger On You\", and \"Let's Get It Up\".\n"
      ]
     }
    ],
@@ -985,7 +992,7 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x2b78f0af0>"
+       "<IPython.lib.display.IFrame at 0x2b28a9780>"
       ]
      },
      "execution_count": null,
@@ -1038,7 +1045,7 @@
      "text": [
       "\u001b[1;3;38;2;155;135;227m> Running module input with input: \n",
       "state: {'sources': [], 'memory': ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method Encoding.encode of <Encoding 'cl100k_base'>>, allowed_special='all'), chat_store=SimpleChatSto...\n",
-      "task: task_id='0952d9f0-458d-4bca-8531-56fd8f880d47' input=\"How many albums did the artist who wrote 'Restless and Wild' release? (answer should be non-zero)?\" memory=ChatMemoryBuffer(token_limit=3000, toke...\n",
+      "task: task_id='2d8a63de-7410-4422-98f3-f0ca41884f58' input=\"How many albums did the artist who wrote 'Restless and Wild' release? (answer should be non-zero)?\" memory=ChatMemoryBuffer(token_limit=3000, toke...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module retry_prompt with input: \n",
       "input: How many albums did the artist who wrote 'Restless and Wild' release? (answer should be non-zero)?\n",
@@ -1051,35 +1058,59 @@
       "will convert the query to a SQL statement. I...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module sql_query_engine with input: \n",
-      "input: assistant: Given the user's input and the requirement that the answer should be non-zero, a proper natural language query that could be interpreted by a text-to-SQL agent might be:\n",
+      "input: assistant: Given the conversation history, it seems that the previous attempt to generate a SQL query from the user's question may have resulted in an error. To avoid the same problem, we need to reph...\n",
       "\n",
-      "\"Find the total nu...\n",
+      "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module output_component with input: \n",
+      "output: I'm sorry, but there seems to be an error in the SQL query. The query you provided is invalid SQL. Please double-check the syntax and try again.\n",
+      "\n",
+      "\u001b[0m> Inferred SQL Query: SELECT COUNT(albums.AlbumId) \n",
+      "FROM albums \n",
+      "JOIN tracks ON albums.AlbumId = tracks.AlbumId \n",
+      "WHERE tracks.Name = 'Restless and Wild' \n",
+      "AND albums.ArtistId = tracks.Composer \n",
+      "AND COUNT(albums.AlbumId) > 0\n",
+      "> SQL Response: I'm sorry, but there seems to be an error in the SQL query. The query you provided is invalid SQL. Please double-check the syntax and try again.\n",
+      "\u001b[1;3;38;2;155;135;227m> Running module input with input: \n",
+      "state: {'sources': [], 'memory': ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method Encoding.encode of <Encoding 'cl100k_base'>>, allowed_special='all'), chat_store=SimpleChatSto...\n",
+      "task: task_id='2d8a63de-7410-4422-98f3-f0ca41884f58' input=\"How many albums did the artist who wrote 'Restless and Wild' release? (answer should be non-zero)?\" memory=ChatMemoryBuffer(token_limit=3000, toke...\n",
+      "\n",
+      "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module retry_prompt with input: \n",
+      "input: How many albums did the artist who wrote 'Restless and Wild' release? (answer should be non-zero)?\n",
+      "convo_history: User: How many albums did the artist who wrote 'Restless and Wild' release? (answer should be non-zero)?\n",
+      "Assistant (inferred SQL query): SELECT COUNT(albums.AlbumId) \n",
+      "FROM albums \n",
+      "JOIN tracks ON album...\n",
+      "\n",
+      "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module llm with input: \n",
+      "messages: You are trying to generate a proper natural language query given a user input.\n",
+      "\n",
+      "This query will then be interpreted by a downstream text-to-SQL agent which\n",
+      "will convert the query to a SQL statement. I...\n",
+      "\n",
+      "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module sql_query_engine with input: \n",
+      "input: assistant: Given the previous error, it seems that the SQL query was incorrect because it attempted to use an aggregate function (`COUNT`) in the `WHERE` clause, which is not allowed. Additionally, th...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module output_component with input: \n",
-      "output: The total number of albums released by the artist credited with writing the song 'Restless and Wild' is 1.\n",
+      "output: The number of albums released by the artist who composed the track 'Restless and Wild' is 1.\n",
       "\n",
-      "\u001b[0m> Inferred SQL Query: SELECT COUNT(DISTINCT albums.AlbumId) AS TotalAlbums\n",
-      "FROM albums\n",
-      "JOIN tracks ON albums.AlbumId = tracks.AlbumId\n",
+      "\u001b[0m> Inferred SQL Query: SELECT COUNT(DISTINCT albums.AlbumId) AS NumAlbums\n",
+      "FROM tracks\n",
+      "JOIN albums ON tracks.AlbumId = albums.AlbumId\n",
+      "JOIN artists ON albums.ArtistId = artists.ArtistId\n",
       "WHERE tracks.Name = 'Restless and Wild'\n",
-      "AND albums.ArtistId = (\n",
-      "    SELECT ArtistId\n",
-      "    FROM tracks\n",
-      "    JOIN albums ON tracks.AlbumId = albums.AlbumId\n",
-      "    WHERE tracks.Name = 'Restless and Wild'\n",
-      "    LIMIT 1\n",
-      ")\n",
-      "HAVING TotalAlbums > 0;\n",
-      "> SQL Response: The total number of albums released by the artist credited with writing the song 'Restless and Wild' is 1.\n",
+      "GROUP BY artists.ArtistId\n",
+      "HAVING NumAlbums > 0;\n",
+      "> SQL Response: The number of albums released by the artist who composed the track 'Restless and Wild' is 1.\n",
       "\u001b[1;3;38;2;155;135;227m> Running module input with input: \n",
       "state: {'sources': [], 'memory': ChatMemoryBuffer(token_limit=3000, tokenizer_fn=functools.partial(<bound method Encoding.encode of <Encoding 'cl100k_base'>>, allowed_special='all'), chat_store=SimpleChatSto...\n",
-      "task: task_id='0952d9f0-458d-4bca-8531-56fd8f880d47' input=\"How many albums did the artist who wrote 'Restless and Wild' release? (answer should be non-zero)?\" memory=ChatMemoryBuffer(token_limit=3000, toke...\n",
+      "task: task_id='2d8a63de-7410-4422-98f3-f0ca41884f58' input=\"How many albums did the artist who wrote 'Restless and Wild' release? (answer should be non-zero)?\" memory=ChatMemoryBuffer(token_limit=3000, toke...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module retry_prompt with input: \n",
       "input: How many albums did the artist who wrote 'Restless and Wild' release? (answer should be non-zero)?\n",
       "convo_history: User: How many albums did the artist who wrote 'Restless and Wild' release? (answer should be non-zero)?\n",
-      "Assistant (inferred SQL query): SELECT COUNT(DISTINCT albums.AlbumId) AS TotalAlbums\n",
-      "FROM album...\n",
+      "Assistant (inferred SQL query): SELECT COUNT(albums.AlbumId) \n",
+      "FROM albums \n",
+      "JOIN tracks ON album...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module llm with input: \n",
       "messages: You are trying to generate a proper natural language query given a user input.\n",
@@ -1088,26 +1119,25 @@
       "will convert the query to a SQL statement. I...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module sql_query_engine with input: \n",
-      "input: assistant: Given the conversation history, it seems that the previous query was successful in fetching a non-zero count of albums released by the artist who wrote 'Restless and Wild'. However, the use...\n",
+      "input: assistant: Given the conversation history, it seems that the previous SQL query was successful in retrieving the number of albums released by the artist who composed the track 'Restless and Wild'. How...\n",
       "\n",
       "\u001b[0m\u001b[1;3;38;2;155;135;227m> Running module output_component with input: \n",
-      "output: Based on the revised query, it seems that there are no distinct albums released by the composer who wrote the song 'Restless and Wild'.\n",
+      "output: I apologize, but there seems to be an error in the SQL query provided. Please double-check the syntax and try again.\n",
       "\n",
-      "\u001b[0m> Inferred SQL Query: SELECT COUNT(DISTINCT albums.AlbumId) AS AlbumCount\n",
+      "\u001b[0m> Inferred SQL Query: SELECT COUNT(DISTINCT albums.AlbumId) AS TotalAlbums\n",
       "FROM albums\n",
-      "JOIN tracks ON albums.AlbumId = tracks.AlbumId\n",
-      "WHERE tracks.Composer = 'Restless and Wild'\n",
-      "HAVING AlbumCount > 0;\n",
-      "> SQL Response: Based on the revised query, it seems that there are no distinct albums released by the composer who wrote the song 'Restless and Wild'.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "KeyboardInterrupt\n",
-      "\n"
+      "JOIN artists ON albums.ArtistId = artists.ArtistId\n",
+      "WHERE artists.ArtistId = (\n",
+      "    SELECT artists.ArtistId\n",
+      "    FROM tracks\n",
+      "    JOIN albums ON tracks.AlbumId = albums.AlbumId\n",
+      "    JOIN artists ON albums.ArtistId = artists.ArtistId\n",
+      "    WHERE tracks.Name = 'Restless and Wild'\n",
+      "    LIMIT 1\n",
+      ")\n",
+      "AND TotalAlbums > 0;\n",
+      "> SQL Response: I apologize, but there seems to be an error in the SQL query provided. Please double-check the syntax and try again.\n",
+      "I apologize, but there seems to be an error in the SQL query provided. Please double-check the syntax and try again.\n"
      ]
     }
    ],
@@ -1121,9 +1151,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llama_index_v2",
+   "display_name": "llama_index_v3",
    "language": "python",
-   "name": "llama_index_v2"
+   "name": "llama_index_v3"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/docs/examples/vector_stores/SimpleIndexDemo.ipynb b/docs/examples/vector_stores/SimpleIndexDemo.ipynb
index 59027eef41398e97d33f162ce0c4f9fef76089ee..9482de6c2e611cdad086c010aca3178a1522fbe5 100644
--- a/docs/examples/vector_stores/SimpleIndexDemo.ipynb
+++ b/docs/examples/vector_stores/SimpleIndexDemo.ipynb
@@ -1,7 +1,6 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "1dae7d57",
    "metadata": {},
@@ -10,7 +9,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05",
    "metadata": {},
@@ -19,7 +17,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "0805b5f9",
    "metadata": {},
@@ -52,7 +49,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119",
    "metadata": {},
@@ -63,20 +59,66 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7",
+   "id": "5cbfa6f3-3684-4c81-ac12-94ff067cae7f",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:numexpr.utils:Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
-      "Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
-      "INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n",
-      "NumExpr defaulting to 8 threads.\n"
+      "[nltk_data] Downloading package stopwords to\n",
+      "[nltk_data]     /Users/jerryliu/nltk_data...\n",
+      "[nltk_data]   Package stopwords is already up-to-date!\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
+   "source": [
+    "import nltk\n",
+    "\n",
+    "nltk.download(\"stopwords\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0329ff24-4917-4975-ad1a-f8faf2c3b822",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package stopwords to /Users/jerryliu/Programmi\n",
+      "[nltk_data]     ng/gpt_index/.venv/lib/python3.10/site-\n",
+      "[nltk_data]     packages/llama_index/core/_static/nltk_cache...\n",
+      "[nltk_data]   Unzipping corpora/stopwords.zip.\n",
+      "[nltk_data] Downloading package punkt to /Users/jerryliu/Programming/g\n",
+      "[nltk_data]     pt_index/.venv/lib/python3.10/site-\n",
+      "[nltk_data]     packages/llama_index/core/_static/nltk_cache...\n",
+      "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import llama_index.core"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import logging\n",
     "import sys\n",
@@ -94,7 +136,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "290eb3c0",
    "metadata": {},
@@ -107,7 +148,25 @@
    "execution_count": null,
    "id": "1a3916d7",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--2024-02-12 13:21:13--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
+      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n",
+      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 75042 (73K) [text/plain]\n",
+      "Saving to: ‘data/paul_graham/paul_graham_essay.txt’\n",
+      "\n",
+      "data/paul_graham/pa 100%[===================>]  73.28K  --.-KB/s    in 0.02s   \n",
+      "\n",
+      "2024-02-12 13:21:13 (4.76 MB/s) - ‘data/paul_graham/paul_graham_essay.txt’ saved [75042/75042]\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "!mkdir -p 'data/paul_graham/'\n",
     "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"
@@ -129,7 +188,16 @@
    "execution_count": null,
    "id": "ad144ee7-96da-4dd6-be00-fd6cf0c78e58",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
+     ]
+    }
+   ],
    "source": [
     "index = VectorStoreIndex.from_documents(documents)"
    ]
@@ -156,7 +224,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "INFO:llama_index.indices.loading:Loading indices with ids: ['vector_index']\n",
+      "INFO:llama_index.core.indices.loading:Loading indices with ids: ['vector_index']\n",
       "Loading indices with ids: ['vector_index']\n"
      ]
     }
@@ -169,7 +237,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "b6caf93b-6345-4c65-a346-a95b0f1746c4",
    "metadata": {},
@@ -182,7 +249,18 @@
    "execution_count": null,
    "id": "85466fdf-93f3-4cb1-a5f9-0056a8245a6f",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
+     ]
+    }
+   ],
    "source": [
     "# set Logging to DEBUG for more detailed outputs\n",
     "query_engine = index.as_query_engine(response_mode=\"tree_summarize\")\n",
@@ -198,7 +276,7 @@
     {
      "data": {
       "text/markdown": [
-       "<b>The author worked on writing and programming outside of school before college. They wrote short stories and tried writing programs on an IBM 1401 computer using an early version of Fortran. They later got a microcomputer, a TRS-80, and started programming more extensively, including writing simple games and a word processor. They initially planned to study philosophy in college but switched to AI. They also started publishing essays online and eventually wrote a book called \"Hackers & Painters.\"</b>"
+       "<b>The author wrote short stories and also worked on programming, specifically on an IBM 1401 computer in 9th grade. They later transitioned to working with microcomputers, starting with a kit-built microcomputer and eventually acquiring a TRS-80. They wrote simple games, a program to predict rocket heights, and even a word processor. Although the author initially planned to study philosophy in college, they eventually switched to studying AI.</b>"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -213,7 +291,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "c80abba3-d338-42fd-9df3-b4e5ceb01cdf",
    "metadata": {},
@@ -233,13 +310,31 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jerryliu/Programming/gpt_index/.venv/lib/python3.10/site-packages/sklearn/svm/_classes.py:31: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
       "Query mode: svm\n"
      ]
     },
     {
      "data": {
       "text/markdown": [
-       "<b>The author wrote short stories and also started programming on an IBM 1401 computer in 9th grade. They later got their own microcomputer, a TRS-80, and wrote simple games, a rocket prediction program, and a word processor.</b>"
+       "<b>The author wrote short stories and also worked on programming, specifically on an IBM 1401 computer in 9th grade. They later got a microcomputer and started programming on it, writing simple games and a word processor. They initially planned to study philosophy in college but ended up switching to AI.</b>"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -252,13 +347,31 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jerryliu/Programming/gpt_index/.venv/lib/python3.10/site-packages/sklearn/svm/_classes.py:31: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
       "Query mode: linear_regression\n"
      ]
     },
     {
      "data": {
       "text/markdown": [
-       "<b>The author worked on writing and programming growing up. They wrote short stories and also started programming on an IBM 1401 computer in 9th grade using an early version of Fortran. Later, they got a microcomputer and wrote simple games, a rocket prediction program, and a word processor.</b>"
+       "<b>The author wrote short stories and also worked on programming, specifically on an IBM 1401 computer in 9th grade. They later got a microcomputer and started programming on it, writing simple games and a word processor. They initially planned to study philosophy in college but ended up switching to AI.</b>"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -271,13 +384,31 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jerryliu/Programming/gpt_index/.venv/lib/python3.10/site-packages/sklearn/svm/_classes.py:31: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
       "Query mode: logistic_regression\n"
      ]
     },
     {
      "data": {
       "text/markdown": [
-       "<b>The author worked on writing and programming growing up. They wrote short stories and also started programming on an IBM 1401 computer in 9th grade using an early version of Fortran. Later, they got a microcomputer and wrote simple games, a rocket prediction program, and a word processor.</b>"
+       "<b>The author wrote short stories and also worked on programming, specifically on an IBM 1401 computer in 9th grade. They later got a microcomputer and started programming on it, writing simple games and a word processor. They initially planned to study philosophy in college but eventually switched to AI.</b>"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -310,7 +441,7 @@
     {
      "data": {
       "text/markdown": [
-       "<b>The author worked on writing and programming growing up. They wrote short stories and also started programming on an IBM 1401 computer in 9th grade using an early version of Fortran. Later, they got a microcomputer and wrote simple games, a rocket prediction program, and a word processor.</b>"
+       "<b>The author wrote short stories and also worked on programming, specifically on an IBM 1401 computer in 9th grade. They later got a microcomputer and started programming on it, writing simple games and a word processor. They initially planned to study philosophy in college but eventually switched to AI.</b>"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -334,21 +465,29 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Now all I had to do was learn Italian.\n",
+      "What I Worked On\n",
       "\n",
-      "Only stranieri (foreigners) had to take this entrance exam. In retrospect it may well have been a way of excluding them, because there were so many stranieri attracted by the idea of studying art in Florence that the Italian students would otherwise have been outnumbered. I was in decent shape at painting and drawing from the RISD foundation that summer, but I still don't know how I managed to pass the written exam. I remember that I answered the essay question by writing about Cezanne, and that I cranked up the intellectual level as high as I could to make the most of my limited vocabulary. [2]\n",
+      "February 2021\n",
+      "\n",
+      "Before college the two main things I worked on, outside of school, were writing and programming. I didn't write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.\n",
+      "\n",
+      "The first programs I tried writing were on the IBM 1401 that our school district used for what was then called \"data processing.\" This was in 9th grade, so I was 13 or 14. The school district's 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain's lair down there, with all these alien-looking machines — CPU, disk drives, printer, card reader — sitting up on a raised floor under bright fluorescent lights.\n",
+      "\n",
+      "The language we used was an early version of Fortran. You had to type programs on punch cards, then stack them in the card reader and press a button to load the program into memory and run it. The result would ordinarily be to print something on the spectacularly loud printer.\n",
       "\n",
-      "I'm only up to age 25 and already there are such conspicuous patterns. Here I was, yet again about to attend some august institution in the hopes of learning about some prestigious subject, and yet again about to be disappointed. The students and faculty in the painting department at the Accademia were the nicest people you could imagine, but they had long since arrived at an arrangement whereby the students wouldn't require the faculty to teach anything, and in return the faculty wouldn't require the students to learn anything. And at the same time all involved would adhere outwardly to the conventions of a 19th century atelier. We actually had one of those little stoves, fed with kindling, that you see in 19th century studio paintings, and a nude model sitting as close to it as possible without getting burned. Except hardly anyone else painted her besides me. The rest of the students spent their time chatting or occasionally trying to imitate things they'd seen in American art magazines.\n",
+      "I was puzzled by the 1401. I couldn't figure out what to do with it. And in retrospect there's not much I could have done with it. The only form of input to programs was data stored on punched cards, and I didn't have any data stored on punched cards. The only other option was to do things that didn't rely on any input, like calculate approximations of pi, but I didn't know enough math to do anything interesting of that type. So I'm not surprised I can't remember any programs I wrote, because they can't have done much. My clearest memory is of the moment I learned it was possible for programs not to terminate, when one of mine didn't. On a machine without time-sharing, this was a social as well as a technical error, as the data center manager's expression made clear.\n",
       "\n",
-      "Our model turned out to live just down the street from me. She made a living from a combination of modelling and making fakes for a local antique dealer. She'd copy an obscure old painting out of a book, and then he'd take the copy and maltreat it to make it look old. [3]\n",
+      "With microcomputers, everything changed. Now you could have a computer sitting right in front of you, on a desk, that could respond to your keystrokes as it was running instead of just churning through a stack of punch cards and then stopping. [1]\n",
       "\n",
-      "While I was a student at the Accademia I started painting still lives in my bedroom at night. These paintings were tiny, because the room was, and because I painted them on leftover scraps of canvas, which was all I could afford at the time. Painting still lives is different from painting people, because the subject, as its name suggests, can't move. People can't sit for more than about 15 minutes at a time, and when they do they don't sit very still. So the traditional m.o. for painting people is to know how to paint a generic person, which you then modify to match the specific person you're painting. Whereas a still life you can, if you want, copy pixel by pixel from what you're seeing. You don't want to stop there, of course, or you get merely photographic accuracy, and what makes a still life interesting is that it's been through a head. You want to emphasize the visual cues that tell you, for example, that the reason the color changes suddenly at a certain point is that it's the edge of an object. By subtly emphasizing such things you can make paintings that are more realistic than photographs not just in some metaphorical sense, but in the strict information-theoretic sense. [4]\n",
+      "The first of my friends to get a microcomputer built it himself. It was sold as a kit by Heathkit. I remember vividly how impressed and envious I felt watching him sitting in front of it, typing programs right into the computer.\n",
       "\n",
-      "I liked painting still lives because I was curious about what I was seeing. In everyday life, we aren't consciously aware of much we're seeing. Most visual perception is handled by low-level processes that merely tell your brain \"that's a water droplet\" without telling you details like where the lightest and darkest points are, or \"that's a bush\" without telling you the shape and position of every leaf. This is a feature of brains, not a bug. In everyday life it would be distracting to notice every leaf on every bush. But when you have to paint something, you have to look more closely, and when you do there's a lot to see. You can still be noticing new things after days of trying to paint something people usually take for granted, just as you can after days of trying to write an essay about something people usually take for granted.\n",
+      "Computers were expensive in those days and it took me years of nagging before I convinced my father to buy one, a TRS-80, in about 1980. The gold standard then was the Apple II, but a TRS-80 was good enough. This was when I really started programming. I wrote simple games, a program to predict how high my model rockets would fly, and a word processor that my father used to write at least one book. There was only room in memory for about 2 pages of text, so he'd write 2 pages at a time and then print them out, but it was a lot better than a typewriter.\n",
       "\n",
-      "This is not the only way to paint. I'm not 100% sure it's even a good way to paint. But it seemed a good enough bet to be worth trying.\n",
+      "Though I liked programming, I didn't plan to study it in college. In college I was going to study philosophy, which sounded much more powerful. It seemed, to my naive high school self, to be the study of the ultimate truths, compared to which the things studied in other fields would be mere domain knowledge. What I discovered when I got to college was that the other fields took up so much of the space of ideas that there wasn't much left for these supposed ultimate truths. All that seemed left for philosophy were edge cases that people in other fields felt could safely be ignored.\n",
       "\n",
-      "Our teacher, professor Ulivi, was a nice guy. He could see I worked hard, and gave me a good grade, which he wrote down in a sort of passport each student had. But the Accademia wasn't teaching me anything except Italian, and my money was running out, so at the end of the first year I went back to the US.\n"
+      "I couldn't have put this into words when I was 18. All I knew at the time was that I kept taking philosophy courses and they kept being boring. So I decided to switch to AI.\n",
+      "\n",
+      "AI was in the air in the mid 1980s, but there were two things especially that made me want to work on it: a novel by Heinlein called The Moon is a Harsh Mistress, which featured an intelligent computer called Mike, and a PBS documentary that showed Terry Winograd using SHRDLU. I haven't tried rereading The Moon is a Harsh Mistress, so I don't know how well it has aged, but when I read it I was drawn entirely into its world. It seemed only a matter of time before we'd have Mike, and when I saw Winograd using SHRDLU, it seemed like that time would be a few years at most.\n"
      ]
     }
    ],
@@ -357,7 +496,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "0da9092e",
    "metadata": {},
@@ -380,7 +518,18 @@
    "execution_count": null,
    "id": "bbecbdb5",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
+     ]
+    }
+   ],
    "source": [
     "query_bundle = QueryBundle(\n",
     "    query_str=\"What did the author do growing up?\",\n",
@@ -414,7 +563,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "d7ff3d56",
    "metadata": {},
@@ -429,7 +577,18 @@
    "execution_count": null,
    "id": "60a27232",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
+     ]
+    }
+   ],
    "source": [
     "query_engine = index.as_query_engine(\n",
     "    vector_store_query_mode=\"mmr\", vector_store_kwargs={\"mmr_threshold\": 0.2}\n",
@@ -438,7 +597,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "5636a15c-8938-4809-958b-03b8c445ecbd",
    "metadata": {},
@@ -456,15 +614,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "> Source (Doc id: fa51aa2a-af68-450f-bb00-786df71f2cdc): What I Worked On\n",
+      "> Source (Doc id: c4118521-8f55-4a4d-819a-2db546b6491e): What I Worked On\n",
       "\n",
       "February 2021\n",
       "\n",
       "Before college the two main things I worked on, outside of schoo...\n",
       "\n",
-      "> Source (Doc id: 4636483a-a416-4971-804f-abfb80a44378): Now all I had to do was learn Italian.\n",
-      "\n",
-      "Only stranieri (foreigners) had to take this entrance exa...\n"
+      "> Source (Doc id: 74f77233-e4fe-4389-9820-76dd9f765af6): Which meant being easy to use and inexpensive. It was lucky for us that we were poor, because tha...\n"
      ]
     }
    ],
@@ -473,7 +629,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "c0c5d984-db20-4679-adb1-1ea956a64150",
    "metadata": {},
@@ -488,7 +643,16 @@
    "execution_count": null,
    "id": "59b8379d-f08f-4334-8525-6ddf4d13e33f",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
+     ]
+    }
+   ],
    "source": [
     "from llama_index.core import Document\n",
     "\n",
@@ -502,7 +666,16 @@
    "execution_count": null,
    "id": "aa281be0-1c7d-4d9c-a208-0ee5b7ab9953",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
+     ]
+    }
+   ],
    "source": [
     "from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters\n",
     "\n",
@@ -560,9 +733,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llama-index",
+   "display_name": "llama_index_v2",
    "language": "python",
-   "name": "llama-index"
+   "name": "llama_index_v2"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/llama-index-core/llama_index/core/query_pipeline/__init__.py b/llama-index-core/llama_index/core/query_pipeline/__init__.py
index 1f8dd6d3df484962e1dc4541dcb29f84a647515b..bbb39b1a3963372c35947252c9710c86c5587ed3 100644
--- a/llama-index-core/llama_index/core/query_pipeline/__init__.py
+++ b/llama-index-core/llama_index/core/query_pipeline/__init__.py
@@ -2,13 +2,20 @@
 from llama_index.core.query_pipeline.components.agent import (
     AgentFnComponent,
     AgentInputComponent,
+    CustomAgentComponent,
+    QueryComponent,
 )
 from llama_index.core.query_pipeline.components.argpacks import ArgPackComponent
 from llama_index.core.query_pipeline.components.function import FnComponent
 from llama_index.core.query_pipeline.components.input import InputComponent
 from llama_index.core.query_pipeline.components.router import RouterComponent
 from llama_index.core.query_pipeline.components.tool_runner import ToolRunnerComponent
-from llama_index.core.query_pipeline.query import QueryPipeline
+from llama_index.core.query_pipeline.query import (
+    QueryPipeline,
+    Link,
+    ChainableMixin,
+    QueryComponent,
+)
 
 __all__ = [
     "AgentFnComponent",
@@ -19,4 +26,9 @@ __all__ = [
     "RouterComponent",
     "ToolRunnerComponent",
     "QueryPipeline",
+    "CustomAgentComponent",
+    "QueryComponent",
+    "Link",
+    "ChainableMixin",
+    "QueryComponent",
 ]