From 4ace6c533501467f574c9c5e38e5fad0ed5a9c3d Mon Sep 17 00:00:00 2001
From: Simon Suo <simonsdsuo@gmail.com>
Date: Mon, 3 Jul 2023 23:46:03 -0700
Subject: [PATCH] Update all notebooks to use native openai integration (#6696)

---
 benchmarks/struct_indices/spider/evaluate.py  |  23 +-
 .../struct_indices/spider/generate_sql.py     |   9 +-
 .../struct_indices/spider/spider_utils.py     |   9 +-
 .../agent/openai_agent_query_cookbook.ipynb   |   8 +-
 .../agent/openai_agent_query_plan.ipynb       |  10 +-
 .../callbacks/LlamaDebugHandler.ipynb         |  10 +-
 .../callbacks/TokenCountingHandler.ipynb      |  17 +-
 .../callbacks/WandbCallbackHandler.ipynb      |  20 +-
 .../chat_engine/chat_engine_react.ipynb       | 162 +++++++++++--
 .../citation/pdf_page_reference.ipynb         |  67 +++---
 .../City_Analysis-Decompose.ipynb             |  13 +-
 .../City_Analysis-Unified-Query.ipynb         |  16 +-
 .../city_analysis/City_Analysis.ipynb         |  22 +-
 .../PineconeDemo-CityAnalysis.ipynb           |  20 +-
 .../customization/llms/AzureOpenAI.ipynb      |   4 +-
 .../llms/SimpleIndexDemo-ChatGPT.ipynb        |  71 +-----
 .../SimpleIndexDemo-Huggingface_camel.ipynb   |   6 +-
 ...SimpleIndexDemo-Huggingface_stablelm.ipynb |   6 +-
 .../streaming/SimpleIndexDemo-streaming.ipynb |   3 +-
 ...ne_condense_question_stream_response.ipynb |  98 +++-----
 .../examples/docstore/MongoDocstoreDemo.ipynb |  14 +-
 .../RedisDocstoreIndexStoreDemo.ipynb         |  13 +-
 .../evaluation/QuestionGeneration.ipynb       | 224 +++++++++---------
 .../evaluation/TestNYC-Evaluation-Query.ipynb |  13 +-
 .../evaluation/TestNYC-Evaluation.ipynb       |  11 +-
 .../doc_summary/DocSummary.ipynb              |   6 +-
 .../knowledge_graph/KnowledgeGraphDemo.ipynb  |   6 +-
 ...orStoreIndex_vs_CustomIndex_combined.ipynb |   9 +-
 .../NebulaGraphKGIndexDemo.ipynb              |   6 +-
 .../struct_indices/SQLIndexDemo.ipynb         |   7 +-
 docs/examples/llm/langchain.ipynb             | 114 +++++++++
 .../LLMReranker-Gatsby.ipynb                  |  10 +-
 .../LLMReranker-Lyft-10k.ipynb                |   6 +-
 docs/examples/node_postprocessor/PII.ipynb    |   6 +-
 docs/examples/output_parsing/df_program.ipynb |  10 +-
 .../output_parsing/evaporate_program.ipynb    |  20 +-
 .../guidance_sub_question.ipynb               |  12 +-
 .../query_engine/JointQASummary.ipynb         |  10 +-
 .../SQLAutoVectorQueryEngine.ipynb            |   6 +-
 .../query_engine/SQLJoinQueryEngine.ipynb     |   6 +-
 .../query_engine/citation_query_engine.ipynb  |   5 +-
 .../query_engine/flare_query_engine.ipynb     |   7 +-
 .../query_engine/json_query_engine.ipynb      |   4 +-
 .../SimpleIndexDemo-multistep.ipynb           |  11 +-
 docs/examples/tools/OnDemandLoaderTool.ipynb  |   5 +
 docs/examples/usecases/10k_sub_question.ipynb |  83 ++++---
 .../usecases/10q_fn_agent-react-compare.ipynb |  13 +-
 docs/examples/usecases/10q_sub_question.ipynb |  14 +-
 .../vector_stores/SimpleIndexDemoMMR.ipynb    |  10 +-
 .../customization/llms_migration_guide.md     |   8 +-
 .../async/AsyncComposableIndicesSEC.ipynb     |   6 +-
 examples/docstore/DocstoreDemo.ipynb          |  13 +-
 examples/docstore/DynamoDBDocstoreDemo.ipynb  |  14 +-
 examples/docstore/MongoDocstoreDemo.ipynb     |  16 +-
 .../RedisDocstoreIndexStoreDemo.ipynb         |  13 +-
 examples/experimental/Evaporate.ipynb         |  14 +-
 .../paul_graham_essay/DavinciComparison.ipynb |  18 +-
 .../paul_graham_essay/GPT4Comparison.ipynb    |  18 +-
 .../test_wiki/TestNYC-Benchmark-GPT4.ipynb    |  37 ++-
 examples/test_wiki/TestNYC-Tree-GPT4.ipynb    |  11 +-
 experimental/cli/configuration.py             |   6 +-
 llama_index/agent/context_retriever_agent.py  |   4 +-
 llama_index/evaluation/dataset_generation.py  |  11 +-
 llama_index/indices/base.py                   |   9 +-
 llama_index/indices/service_context.py        |   5 +-
 llama_index/program/llm_program.py            |  15 +-
 tests/program/test_llm_program.py             |  14 +-
 67 files changed, 861 insertions(+), 626 deletions(-)
 create mode 100644 docs/examples/llm/langchain.ipynb

diff --git a/benchmarks/struct_indices/spider/evaluate.py b/benchmarks/struct_indices/spider/evaluate.py
index ebed59cb17..8e54632266 100644
--- a/benchmarks/struct_indices/spider/evaluate.py
+++ b/benchmarks/struct_indices/spider/evaluate.py
@@ -5,9 +5,9 @@ import json
 import logging
 import os
 from typing import Dict, List, Optional
+from llama_index.llms.base import ChatMessage, MessageRole
 
-from llama_index.bridge.langchain import ChatOpenAI
-from llama_index.bridge.langchain import HumanMessage
+from llama_index.llms.openai import OpenAI
 from llama_index.response.schema import Response
 from spider_utils import create_indexes, load_examples
 from tqdm import tqdm
@@ -45,29 +45,30 @@ HypothesisAnswerCorrect: """
 
 
 def _answer(
-    llm: ChatOpenAI, question: str, sql_query: str, sql_result: Optional[str]
+    llm: OpenAI, question: str, sql_query: str, sql_result: Optional[str]
 ) -> str:
     prompt = answer_template.format(
         question=question, sql_query=sql_query, sql_result=sql_result
     )
-    response = llm([HumanMessage(content=prompt)])
-    return response.content
+    response = llm.chat([ChatMessage(role=MessageRole.USER, content=prompt)])
+    return response.message.content or ""
 
 
 def _match(
-    llm: ChatOpenAI, question: str, reference_answer: str, hypothesis_answer: str
+    llm: OpenAI, question: str, reference_answer: str, hypothesis_answer: str
 ) -> bool:
     prompt = match_template.format(
         question=question,
         reference_answer=reference_answer,
         hypothesis_answer=hypothesis_answer,
     )
-    response = llm([HumanMessage(content=prompt)])
-    return "true" in response.content.lower()
+    response = llm.chat([ChatMessage(role=MessageRole.USER, content=prompt)])
+    content = response.message.content or ""
+    return "true" in content.lower()
 
 
 def _get_answers(
-    llm: ChatOpenAI,
+    llm: OpenAI,
     indexes: Dict[str, SQLStructStoreIndex],
     db_names: List[str],
     sql_queries: List[str],
@@ -112,7 +113,7 @@ def _get_answers(
 
 
 def _match_answers(
-    llm: ChatOpenAI,
+    llm: OpenAI,
     gold_results: List[dict],
     pred_results: List[dict],
     examples: List[dict],
@@ -212,7 +213,7 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     # Create the LlamaIndexes for all databases.
-    llm = ChatOpenAI(model=args.model, temperature=0)
+    llm = OpenAI(model=args.model, temperature=0)
 
     # Load all examples.
     train, dev = load_examples(args.spider_dir)
diff --git a/benchmarks/struct_indices/spider/generate_sql.py b/benchmarks/struct_indices/spider/generate_sql.py
index d32fc5f3fc..94bb7e88dd 100644
--- a/benchmarks/struct_indices/spider/generate_sql.py
+++ b/benchmarks/struct_indices/spider/generate_sql.py
@@ -5,14 +5,14 @@ import logging
 import os
 import re
 
-from llama_index.bridge.langchain import ChatOpenAI, OpenAI
-from llama_index.bridge.langchain import BaseLanguageModel
 from sqlalchemy import create_engine, text
 from tqdm import tqdm
 
 from llama_index import SQLStructStoreIndex, LLMPredictor, SQLDatabase
 from typing import Any, cast
 
+from llama_index.llms.openai import OpenAI
+
 logging.getLogger("root").setLevel(logging.WARNING)
 
 
@@ -105,10 +105,7 @@ if __name__ == "__main__":
         databases[db_name] = (SQLDatabase(engine=engine), engine)
 
     # Create the LlamaIndexes for all databases.
-    if args.model in ["gpt-3.5-turbo", "gpt-4"]:
-        llm: BaseLanguageModel = ChatOpenAI(model=args.model, temperature=0)
-    else:
-        llm = OpenAI(model=args.model, temperature=0)
+    llm = OpenAI(model=args.model, temperature=0)
     llm_predictor = LLMPredictor(llm=llm)
     llm_indexes = {}
     for db_name, (db, engine) in databases.items():
diff --git a/benchmarks/struct_indices/spider/spider_utils.py b/benchmarks/struct_indices/spider/spider_utils.py
index e29fc02e68..40c71338a7 100644
--- a/benchmarks/struct_indices/spider/spider_utils.py
+++ b/benchmarks/struct_indices/spider/spider_utils.py
@@ -2,13 +2,12 @@
 
 import json
 import os
-from typing import Dict, Tuple, Union
+from typing import Dict, Tuple
 
-from llama_index.bridge.langchain import OpenAI
-from llama_index.bridge.langchain import ChatOpenAI
 from sqlalchemy import create_engine, text
 
 from llama_index import SQLStructStoreIndex, LLMPredictor, SQLDatabase
+from llama_index.llms.openai import OpenAI
 
 
 def load_examples(spider_dir: str) -> Tuple[list, list]:
@@ -22,9 +21,7 @@ def load_examples(spider_dir: str) -> Tuple[list, list]:
     return train_spider + train_others, dev
 
 
-def create_indexes(
-    spider_dir: str, llm: Union[ChatOpenAI, OpenAI]
-) -> Dict[str, SQLStructStoreIndex]:
+def create_indexes(spider_dir: str, llm: OpenAI) -> Dict[str, SQLStructStoreIndex]:
     """Create indexes for all databases."""
     # Create all necessary SQL database objects.
     databases = {}
diff --git a/docs/examples/agent/openai_agent_query_cookbook.ipynb b/docs/examples/agent/openai_agent_query_cookbook.ipynb
index e9b7b55aac..e9f3540cb2 100644
--- a/docs/examples/agent/openai_agent_query_cookbook.ipynb
+++ b/docs/examples/agent/openai_agent_query_cookbook.ipynb
@@ -622,16 +622,16 @@
    "outputs": [],
    "source": [
     "from llama_index.node_parser import SimpleNodeParser\n",
-    "from llama_index import ServiceContext, LLMPredictor\n",
+    "from llama_index import ServiceContext\n",
     "from llama_index.storage import StorageContext\n",
     "from llama_index.vector_stores import PineconeVectorStore\n",
     "from llama_index.langchain_helpers.text_splitter import TokenTextSplitter\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "\n",
     "# define node parser and LLM\n",
     "chunk_size = 1024\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\", streaming=True))\n",
-    "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm_predictor=llm_predictor)\n",
+    "llm = OpenAI(temperature=0, model=\"gpt-4\")\n",
+    "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm=llm)\n",
     "text_splitter = TokenTextSplitter(chunk_size=chunk_size)\n",
     "node_parser = SimpleNodeParser(text_splitter=text_splitter)\n",
     "\n",
diff --git a/docs/examples/agent/openai_agent_query_plan.ipynb b/docs/examples/agent/openai_agent_query_plan.ipynb
index 5c6aaaa8ad..4af2947fa7 100644
--- a/docs/examples/agent/openai_agent_query_plan.ipynb
+++ b/docs/examples/agent/openai_agent_query_plan.ipynb
@@ -60,7 +60,7 @@
    "source": [
     "from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext, GPTVectorStoreIndex\n",
     "from llama_index.response.pprint_utils import pprint_response\n",
-    "from langchain.chat_models import ChatOpenAI"
+    "from llama_index.llms import OpenAI"
    ]
   },
   {
@@ -72,12 +72,8 @@
    },
    "outputs": [],
    "source": [
-    "# llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\", max_tokens=-1, streaming=True))\n",
-    "# llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\", streaming=True))\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\", streaming=True))\n",
-    "llm_predictor_gpt4 = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\", streaming=True))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)\n",
-    "service_context_gpt4 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt4)"
+    "llm = OpenAI(temperature=0, model=\"gpt-4\")\n",
+    "service_context = ServiceContext.from_defaults(llm=llm)"
    ]
   },
   {
diff --git a/docs/examples/callbacks/LlamaDebugHandler.ipynb b/docs/examples/callbacks/LlamaDebugHandler.ipynb
index 7138289864..1f9b27382e 100644
--- a/docs/examples/callbacks/LlamaDebugHandler.ipynb
+++ b/docs/examples/callbacks/LlamaDebugHandler.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "fedcd46b",
    "metadata": {},
@@ -61,12 +62,13 @@
    "outputs": [],
    "source": [
     "from llama_index import ServiceContext, LLMPredictor, TreeIndex\n",
-    "from langchain.chat_models import ChatOpenAI\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
+    "from llama_index.llms import OpenAI\n",
+    "llm = OpenAI(model='gpt-3.5-turbo', temperature=0)\n",
+    "service_context = ServiceContext.from_defaults(llm=llm)"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "ee34d08b",
    "metadata": {},
@@ -87,6 +89,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "25851e27",
    "metadata": {},
@@ -146,6 +149,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "4e69b186",
    "metadata": {},
diff --git a/docs/examples/callbacks/TokenCountingHandler.ipynb b/docs/examples/callbacks/TokenCountingHandler.ipynb
index ac29cef074..83edf68f05 100644
--- a/docs/examples/callbacks/TokenCountingHandler.ipynb
+++ b/docs/examples/callbacks/TokenCountingHandler.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -25,11 +26,10 @@
    ],
    "source": [
     "import tiktoken\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "\n",
     "from llama_index import (\n",
     "    SimpleDirectoryReader, \n",
-    "    LLMPredictor, \n",
     "    VectorStoreIndex, \n",
     "    ServiceContext, \n",
     "    set_global_service_context\n",
@@ -41,6 +41,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -61,17 +62,16 @@
     "\n",
     "callback_manager = CallbackManager([token_counter])\n",
     "\n",
-    "llm_predictor = LLMPredictor(\n",
-    "    llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)\n",
-    ")\n",
+    "llm = OpenAI(model='gpt-3.5-turbo', temperature=0)\n",
     "\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, callback_manager=callback_manager)\n",
+    "service_context = ServiceContext.from_defaults(llm=llm, callback_manager=callback_manager)\n",
     "\n",
     "# set the global default!\n",
     "set_global_service_context(service_context)"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -120,6 +120,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -136,6 +137,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -179,6 +181,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -186,6 +189,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -228,6 +232,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
diff --git a/docs/examples/callbacks/WandbCallbackHandler.ipynb b/docs/examples/callbacks/WandbCallbackHandler.ipynb
index 41bffbfe3f..6c2eaf4be8 100644
--- a/docs/examples/callbacks/WandbCallbackHandler.ipynb
+++ b/docs/examples/callbacks/WandbCallbackHandler.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "c0d8b66c",
    "metadata": {},
@@ -54,10 +55,11 @@
     ")\n",
     "from llama_index.indices.composability import ComposableGraph\n",
     "from llama_index import load_index_from_storage, load_graph_from_storage\n",
-    "from langchain.chat_models import ChatOpenAI"
+    "from llama_index.llms import OpenAI"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "e6feb252",
    "metadata": {},
@@ -72,10 +74,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(model_name='gpt-4', temperature=0))"
+    "llm = OpenAI(model='gpt-4', temperature=0)"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "8790f4c7",
    "metadata": {},
@@ -110,10 +113,11 @@
     "wandb_callback = WandbCallbackHandler(run_args=run_args)\n",
     "\n",
     "callback_manager = CallbackManager([llama_debug, wandb_callback])\n",
-    "service_context = ServiceContext.from_defaults(callback_manager=callback_manager, llm_predictor=llm_predictor)"
+    "service_context = ServiceContext.from_defaults(callback_manager=callback_manager, llm=llm)"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "c4cf969a",
    "metadata": {},
@@ -122,6 +126,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "a4a7c101",
    "metadata": {},
@@ -176,6 +181,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "0a948efc",
    "metadata": {},
@@ -202,6 +208,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "7ed156a6",
    "metadata": {},
@@ -240,6 +247,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "ae4de4a9",
    "metadata": {},
@@ -289,6 +297,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "d7250272",
    "metadata": {},
@@ -426,6 +435,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "60aa7e5f",
    "metadata": {},
@@ -509,6 +519,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "cda70171",
    "metadata": {},
@@ -535,6 +546,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "ff60da73",
    "metadata": {},
@@ -580,6 +592,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "b30ddfc9",
    "metadata": {},
@@ -630,6 +643,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "c49ff101",
    "metadata": {},
diff --git a/docs/examples/chat_engine/chat_engine_react.ipynb b/docs/examples/chat_engine/chat_engine_react.ipynb
index 66dd3de149..636356e654 100644
--- a/docs/examples/chat_engine/chat_engine_react.ipynb
+++ b/docs/examples/chat_engine/chat_engine_react.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "2509fcb7-f84e-4c01-b23c-6e0942c2df10",
    "metadata": {
@@ -11,6 +12,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "84aca142-a611-4a18-8673-c261edd1631c",
    "metadata": {
@@ -21,6 +23,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "9d9dcfd5-ec96-4407-a5b7-ca3fadf22419",
    "metadata": {
@@ -34,6 +37,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "d765ea5c-e193-4298-96cc-978c87ffcafe",
    "metadata": {},
@@ -44,6 +48,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "70be3c76-98d4-4df5-9cc6-8ceba34f23fb",
    "metadata": {
@@ -54,6 +59,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "8841bc6e-a196-4416-b886-31ea93e33d13",
    "metadata": {
@@ -65,22 +71,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "8bf4cb33-46c7-4f45-a43c-b478f68e9b0b",
    "metadata": {
     "tags": []
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/suo/miniconda3/envs/llama/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.7) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
-    "from langchain.chat_models import ChatOpenAI\n",
     "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
+    "from langchain.llms import OpenAI\n",
     "\n",
-    "service_context = ServiceContext.from_defaults(llm=ChatOpenAI())\n",
+    "service_context = ServiceContext.from_defaults(llm=OpenAI())\n",
     "data = SimpleDirectoryReader(input_dir=\"../data/paul_graham/\").load_data()\n",
     "index = VectorStoreIndex.from_documents(data, service_context=service_context)"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "0df6a939-9f2c-44cc-ad9d-8203029e8fbd",
    "metadata": {
@@ -92,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "05eb7917-998c-4c36-b41a-84d29d3468c9",
    "metadata": {
     "tags": []
@@ -103,6 +119,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "4bbfb7bb-827d-4968-b3ae-7c9f92224e8c",
    "metadata": {},
@@ -124,15 +141,15 @@
      "text": [
       "\n",
       "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+      "\u001b[1m> Entering new  chain...\u001b[0m\n",
       "\u001b[32;1m\u001b[1;3m\n",
       "Thought: Do I need to use a tool? Yes\n",
       "Action: Query Engine Tool\n",
       "Action Input: What did Paul Graham do in the summer of 1995?\u001b[0m\n",
       "Observation: \u001b[36;1m\u001b[1;3m\n",
-      "In the summer of 1995, Paul Graham submitted the camera-ready copy of ANSI Common Lisp to the publishers and started trying to write software to build online stores. He and Robert Morris wrote software to resize images and set up an http server to serve the pages, and they attempted to sign up galleries to use their software.\u001b[0m\n",
+      "In the summer of 1995, Paul Graham worked on building a web application for creating web applications. He recruited Dan Giffin, two undergrads, and attempted to build what he envisioned as a software as a service company called Aspra. He also worked on a new dialect of Lisp which he called Arc.\u001b[0m\n",
       "Thought:\u001b[32;1m\u001b[1;3m Do I need to use a tool? No\n",
-      "AI: Paul Graham was working on software to build online stores and resize images in the summer of 1995.\u001b[0m\n",
+      "AI: This is the information I found about what Paul Graham did in the summer of 1995.\u001b[0m\n",
       "\n",
       "\u001b[1m> Finished chain.\u001b[0m\n"
      ]
@@ -154,7 +171,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Paul Graham was working on software to build online stores and resize images in the summer of 1995.\n"
+      "This is the information I found about what Paul Graham did in the summer of 1995.\n"
      ]
     }
    ],
@@ -176,10 +193,8 @@
      "text": [
       "\n",
       "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Thought: Do I need to use a tool? No\n",
-      "AI: You asked me what Paul Graham did in the summer of 1995.\u001b[0m\n",
+      "\u001b[1m> Entering new  chain...\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3mAI: You asked me what Paul Graham did in the summer of 1995.\u001b[0m\n",
       "\n",
       "\u001b[1m> Finished chain.\u001b[0m\n"
      ]
@@ -210,6 +225,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "ac91c351-d6ec-4958-9290-ad3078e7abe3",
    "metadata": {
@@ -220,6 +236,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "60b87162-1a18-4390-b9a5-ddec6c1d1e36",
    "metadata": {},
@@ -236,12 +253,13 @@
    },
    "outputs": [],
    "source": [
-    "from langchain.chat_models import ChatOpenAI\n",
     "from llama_index import ServiceContext\n",
-    "service_context = ServiceContext.from_defaults(llm=ChatOpenAI(temperature=0.))"
+    "from langchain.chat_models import ChatOpenAI\n",
+    "service_context = ServiceContext.from_defaults(llm=ChatOpenAI(temperature=0., model='gpt-3.5-turbo'))"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "aae7f923-cbd5-40af-ba2e-d3c2e3babcbf",
    "metadata": {},
@@ -275,15 +293,15 @@
      "text": [
       "\n",
       "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+      "\u001b[1m> Entering new  chain...\u001b[0m\n",
       "\u001b[32;1m\u001b[1;3m{\n",
       "    \"action\": \"Query Engine Tool\",\n",
       "    \"action_input\": \"What did Paul Graham do in the summer of 1995?\"\n",
       "}\u001b[0m\n",
-      "Observation: \u001b[36;1m\u001b[1;3mIn the summer of 1995, Paul Graham and Robert Morris started trying to write software to build online stores.\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mBased on the given context information, it is not mentioned what Paul Graham did in the summer of 1995.\u001b[0m\n",
       "Thought:\u001b[32;1m\u001b[1;3m{\n",
       "    \"action\": \"Final Answer\",\n",
-      "    \"action_input\": \"In the summer of 1995, Paul Graham and Robert Morris started trying to write software to build online stores.\"\n",
+      "    \"action_input\": \"Based on the available information, it is not mentioned what Paul Graham did in the summer of 1995.\"\n",
       "}\u001b[0m\n",
       "\n",
       "\u001b[1m> Finished chain.\u001b[0m\n"
@@ -306,7 +324,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "In the summer of 1995, Paul Graham and Robert Morris started trying to write software to build online stores.\n"
+      "Based on the available information, it is not mentioned what Paul Graham did in the summer of 1995.\n"
      ]
     }
    ],
@@ -328,10 +346,15 @@
      "text": [
       "\n",
       "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+      "\u001b[1m> Entering new  chain...\u001b[0m\n",
       "\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What did Paul Graham do in the summer of 1995?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mBased on the given context information, it is not mentioned what Paul Graham did in the summer of 1995.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
       "    \"action\": \"Final Answer\",\n",
-      "    \"action_input\": \"You asked me 'what did Paul Graham do in the summer of 1995?'\"\n",
+      "    \"action_input\": \"Based on the available information, it is not mentioned what Paul Graham did in the summer of 1995.\"\n",
       "}\u001b[0m\n",
       "\n",
       "\u001b[1m> Finished chain.\u001b[0m\n"
@@ -354,7 +377,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "You asked me 'what did Paul Graham do in the summer of 1995?'\n"
+      "Based on the available information, it is not mentioned what Paul Graham did in the summer of 1995.\n"
      ]
     }
    ],
@@ -363,6 +386,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "42c4a975-8034-447f-a5fe-1e71e59f02c0",
    "metadata": {},
@@ -396,11 +420,97 @@
      "text": [
       "\n",
       "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+      "\u001b[1m> Entering new  chain...\u001b[0m\n",
       "\u001b[32;1m\u001b[1;3m{\n",
-      "    \"action\": \"Final Answer\",\n",
-      "    \"action_input\": \"You asked me: 'What is Assistant and what can it do?'\"\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What did the user ask before?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mBased on the given context information, it is not possible to determine what the user asked before.\u001b[0m\n",
+      "Thought:"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
+      "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m{\n",
+      "    \"action\": \"Query Engine Tool\",\n",
+      "    \"action_input\": \"What was the response to the user's last comment?\"\n",
       "}\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe response to the user's last comment is not provided in the given context information.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m\u001b[0m\n",
       "\n",
       "\u001b[1m> Finished chain.\u001b[0m\n"
      ]
@@ -422,7 +532,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "You asked me: 'What is Assistant and what can it do?'\n"
+      "Agent stopped due to iteration limit or time limit.\n"
      ]
     }
    ],
diff --git a/docs/examples/citation/pdf_page_reference.ipynb b/docs/examples/citation/pdf_page_reference.ipynb
index cf0a03712b..9ac7e61c62 100644
--- a/docs/examples/citation/pdf_page_reference.ipynb
+++ b/docs/examples/citation/pdf_page_reference.ipynb
@@ -2,21 +2,12 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "id": "ffa328f9",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/suo/miniconda3/envs/llama/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from llama_index import SimpleDirectoryReader, VectorStoreIndex, download_loader, RAKEKeywordTableIndex"
    ]
@@ -32,7 +23,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "id": "3a003a97-874c-4807-b017-37270ea7a682",
    "metadata": {
     "tags": []
@@ -40,12 +31,10 @@
    "outputs": [],
    "source": [
     "from llama_index import LLMPredictor, ServiceContext\n",
-    "from langchain import OpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "\n",
     "service_context = ServiceContext.from_defaults(\n",
-    "    llm_predictor=LLMPredictor(\n",
-    "        llm=OpenAI(temperature=0, model_name=\"text-davinci-003\", streaming=True)\n",
-    "    )\n",
+    "    llm=OpenAI(temperature=0, model=\"text-davinci-003\")\n",
     ")"
    ]
   },
@@ -62,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "id": "0ab64036-518b-488a-9bc5-888534c4bf50",
    "metadata": {
     "tags": []
@@ -75,7 +64,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "id": "aa856570-ae8a-4fe6-8f7c-59a698308fdb",
    "metadata": {
     "tags": []
@@ -87,7 +76,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "id": "40133993-2413-444c-a2a3-796cf9e263f8",
    "metadata": {
     "tags": []
@@ -111,7 +100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "a2d96a34-481c-4330-9618-8bae1aa087d6",
    "metadata": {
     "tags": []
@@ -122,16 +111,16 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "• Decreased demand for our platform leading to decreased revenues and decreased earning opportunities for drivers on our platform (Page 6)\n",
-      "• Establishing new health and safety requirements for ridesharing and updating workplace policies (Page 6)\n",
-      "• Cost-cutting measures, including lay-offs, furloughs and salary reductions (Page 18)\n",
-      "• Delays or prevention of testing, developing or deploying autonomous vehicle-related technology (Page 18)\n",
-      "• Reduced consumer demand for autonomous vehicle travel resulting from an overall reduced demand for travel (Page 18)\n",
-      "• Impacts to the supply chains of our current or prospective partners and suppliers (Page 18)\n",
-      "• Economic impacts limiting our or our current or prospective partners’ or suppliers’ ability to expend resources on developing and deploying autonomous vehicle-related technology (Page 18)\n",
-      "• Decreased morale, culture and ability to attract and retain employees (Page 18)\n",
-      "• Reduced demand for services on our platform or greater operating expenses (Page 18)\n",
-      "• Decreased revenues and earnings (Page 18)"
+      "• The ongoing COVID-19 pandemic continues to impact communities in the United States, Canada and globally (page 6). \n",
+      "• The pandemic and related responses caused decreased demand for our platform leading to decreased revenues as well as decreased earning opportunities for drivers on our platform (page 6).\n",
+      "• Our business continues to be impacted by the COVID-19 pandemic (page 6).\n",
+      "• The exact timing and pace of the recovery remain uncertain (page 6).\n",
+      "• The extent to which our operations will continue to be impacted by the pandemic will depend largely on future developments, which are highly uncertain and cannot be accurately predicted (page 6).\n",
+      "• An increase in cases due to variants of the virus has caused many businesses to delay employees returning to the office (page 6).\n",
+      "• We anticipate that continued social distancing, altered consumer behavior, reduced travel and commuting, and expected corporate cost cutting will be significant challenges for us (page 6).\n",
+      "• We have adopted multiple measures, including, but not limited, to establishing new health and safety requirements for ridesharing and updating workplace policies (page 6).\n",
+      "• We have had to take certain cost-cutting measures, including lay-offs, furloughs and salary reductions, which may have adversely affect employee morale, our culture and our ability to attract and retain employees (page 18).\n",
+      "• The ultimate impact of the COVID-19 pandemic on our users, customers, employees, business, operations and financial performance depends on many factors that are not within our control (page 18)."
      ]
     }
    ],
@@ -151,7 +140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "id": "ea3a9505-23e7-482f-a516-87c3ccd3da70",
    "metadata": {
     "tags": []
@@ -163,16 +152,16 @@
      "text": [
       "-----\n",
       "Text:\t Impact of COVID-19 to our BusinessThe  ongoing  COVID-19  pandemic  continues  to  impact  communities  in  the  United  States,  Canada  and  globally.  Since  the  pandemic  began  in  March  2020,governments  and  private  businesses  -  at  the  recommendation  of  public  health  officials  -  have  enacted  precautions  to  mitigate  the  spread  of  the  virus,  including  travelrestrictions  and  social  distancing  measures  in  many  regions  of  the  United  States  and  Canada,  and  many  enterprises  have  instituted  and  maintained  work  from  homeprograms and limited  the number of employees on site. Beginning in the middle of March 2020, the pandemic and these related responses caused decreased demand for ourplatform  leading to decreased revenues as well as decreased earning opportunities for drivers on our platform. Our business continues to be impacted by the COVID-19pandemic. Although  we have seen some signs of demand improving, particularly compared to the dema ...\n",
-      "Metadata:\t {'page_label': '6'}\n",
-      "Score:\t 0.823\n",
+      "Metadata:\t {'page_label': '6', 'file_name': 'lyft_2021.pdf'}\n",
+      "Score:\t 0.821\n",
       "-----\n",
-      "Text:\t storing unrented and returned vehicles. These impacts to the demand for and operations of the different rental programs have and may continue to adversely affectour business, financial condi tion and results of operation.• The COVID-19 pandemic may delay or prevent us, or our current or prospective partners and suppliers, from being able to test, develop or deploy autonomousvehicle-related  technology,  including  through  direct  impacts  of  the  COVID-19  virus  on  employee  and  contractor  health;  reduced  consumer  demand  forautonomous vehicle  travel resulting from an overall reduced demand for travel; shelter-in-place orders by local, state or federal governments negatively impactingoperations,  including our ability to test autonomous vehicle-related technology; impacts to the supply chains of our current or prospective partners and suppliers;or  economic  impacts  limiting  our  or  our  current  or  prospective  partners’  or  suppliers’  ability  to  expend  resources  o ...\n",
-      "Metadata:\t {'page_label': '18'}\n",
-      "Score:\t 0.811\n",
+      "Text:\t will  continue  to  be  impacted  by  the  pandemic  will  depend  largely  on  future  developments,  which  are  highly  uncertain  and  cannot  beaccurately predicted,  including new information which may emerge concerning COVID-19 variants and the severity of the pandemic and actions by government authoritiesand  private businesses to contain the pandemic or recover from its impact, among other things. For example, an increase in cases due to variants of the virus has causedmany  businesses to delay employees returning to the office. Even as travel restrictions and shelter-in-place orders are modified or lifted, we anticipate that continued socialdistancing, altered consu mer behavior, reduced travel and commuting, and expected corporate cost cutting will be significant challenges for us. The strength and duration ofthese challenges cannot b e presently estimated.In  response to the COVID-19 pandemic, we have adopted multiple measures, including, but not limited, to establishing ne ...\n",
+      "Metadata:\t {'page_label': '56', 'file_name': 'lyft_2021.pdf'}\n",
+      "Score:\t 0.808\n",
       "-----\n",
-      "Text:\t and unpredictable effects of COVID-19, we are not currently in a position to forecast the expected impact of COVID-19 on our financialand operating results. Our business could be adversely affected by natural d isasters, public health crises, political crises, economic downturns or other unexpected events.A significant  natural disaster, such as an earthquake, fire, hurricane, tornado, flood or significant power outage, could disrupt our operations, mobile networks, theInternet  or the operations of our third-party technology providers. In particular, our corporate headquarters are located in the San Francisco Bay Area, a region known forseismic  activity and increasingly for fires. The impact of climate change may increase these risks. In addition, any public health crises, such as the COVID-19 pandemic,other  epidemics, political crises, such as terrorist attacks, war and other political or social instability and other geopolitical developments, or other catastrophic events,whether   ...\n",
-      "Metadata:\t {'page_label': '18'}\n",
-      "Score:\t 0.806\n"
+      "Text:\t storing unrented and returned vehicles. These impacts to the demand for and operations of the different rental programs have and may continue to adversely affectour business, financial condi tion and results of operation.• The COVID-19 pandemic may delay or prevent us, or our current or prospective partners and suppliers, from being able to test, develop or deploy autonomousvehicle-related  technology,  including  through  direct  impacts  of  the  COVID-19  virus  on  employee  and  contractor  health;  reduced  consumer  demand  forautonomous vehicle  travel resulting from an overall reduced demand for travel; shelter-in-place orders by local, state or federal governments negatively impactingoperations,  including our ability to test autonomous vehicle-related technology; impacts to the supply chains of our current or prospective partners and suppliers;or  economic  impacts  limiting  our  or  our  current  or  prospective  partners’  or  suppliers’  ability  to  expend  resources  o ...\n",
+      "Metadata:\t {'page_label': '18', 'file_name': 'lyft_2021.pdf'}\n",
+      "Score:\t 0.805\n"
      ]
     }
    ],
diff --git a/docs/examples/composable_indices/city_analysis/City_Analysis-Decompose.ipynb b/docs/examples/composable_indices/city_analysis/City_Analysis-Decompose.ipynb
index a5127cd4cc..48b4b48547 100644
--- a/docs/examples/composable_indices/city_analysis/City_Analysis-Decompose.ipynb
+++ b/docs/examples/composable_indices/city_analysis/City_Analysis-Decompose.ipynb
@@ -59,10 +59,8 @@
                 "    VectorStoreIndex, \n",
                 "    SimpleKeywordTableIndex, \n",
                 "    SimpleDirectoryReader,\n",
-                "    LLMPredictor,\n",
                 "    ServiceContext\n",
-                ")\n",
-                "from langchain.llms.openai import OpenAIChat"
+                ")"
             ]
         },
         {
@@ -175,8 +173,11 @@
             ],
             "source": [
                 "# # LLM Predictor (gpt-3.5-turbo)\n",
-                "llm_predictor_chatgpt = LLMPredictor(llm=OpenAIChat(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-                "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt)"
+                "from llama_index.llms.openai import OpenAI\n",
+                "\n",
+                "\n",
+                "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+                "service_context = ServiceContext.from_defaults(llm=chatgpt)"
             ]
         },
         {
@@ -359,7 +360,7 @@
             "source": [
                 "from llama_index.indices.query.query_transform.base import DecomposeQueryTransform\n",
                 "decompose_transform = DecomposeQueryTransform(\n",
-                "    llm_predictor_chatgpt, verbose=True\n",
+                "    service_context.llm_predictor, verbose=True\n",
                 ")"
             ]
         },
diff --git a/docs/examples/composable_indices/city_analysis/City_Analysis-Unified-Query.ipynb b/docs/examples/composable_indices/city_analysis/City_Analysis-Unified-Query.ipynb
index a844cc24c9..fc2d8116e8 100644
--- a/docs/examples/composable_indices/city_analysis/City_Analysis-Unified-Query.ipynb
+++ b/docs/examples/composable_indices/city_analysis/City_Analysis-Unified-Query.ipynb
@@ -67,10 +67,8 @@
                 "    VectorStoreIndex, \n",
                 "    SimpleKeywordTableIndex, \n",
                 "    SimpleDirectoryReader,\n",
-                "    LLMPredictor,\n",
                 "    ServiceContext\n",
-                ")\n",
-                "from langchain.llms.openai import OpenAIChat"
+                ")"
             ]
         },
         {
@@ -182,15 +180,17 @@
                 }
             ],
             "source": [
-                "# # LLM Predictor (gpt-3.5-turbo)\n",
-                "llm_predictor_chatgpt = LLMPredictor(llm=OpenAIChat(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
+                "from llama_index.llms import OpenAI\n",
+                "\n",
+                "\n",
+                "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
                 "service_context = ServiceContext.from_defaults(\n",
-                "    llm_predictor=llm_predictor_chatgpt, chunk_size=1024\n",
+                "    llm=chatgpt, chunk_size=1024\n",
                 ")\n",
                 "\n",
-                "llm_predictor_gpt4 = LLMPredictor(llm=OpenAIChat(temperature=0, model_name=\"gpt-4\"))\n",
+                "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")\n",
                 "service_context = ServiceContext.from_defaults(\n",
-                "    llm_predictor=llm_predictor_gpt4, chunk_size=1024\n",
+                "    llm=gpt4, chunk_size=1024\n",
                 ")"
             ]
         },
diff --git a/docs/examples/composable_indices/city_analysis/City_Analysis.ipynb b/docs/examples/composable_indices/city_analysis/City_Analysis.ipynb
index 40531466fd..e7e3fe1009 100644
--- a/docs/examples/composable_indices/city_analysis/City_Analysis.ipynb
+++ b/docs/examples/composable_indices/city_analysis/City_Analysis.ipynb
@@ -1,6 +1,7 @@
 {
     "cells": [
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "cfb64210-9c6b-47d7-81f4-67dbdab68e4c",
             "metadata": {
@@ -213,11 +214,12 @@
                 "    LLMPredictor,\n",
                 "    ServiceContext\n",
                 ")\n",
-                "from langchain.llms.openai import OpenAIChat, OpenAI\n",
+                "from llama_index.llms import OpenAI\n",
                 "import requests"
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "49e0d841-680f-4a0c-b455-788b54978ebf",
             "metadata": {
@@ -292,6 +294,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "f1782198-c0de-4679-8951-1297c21b8639",
             "metadata": {
@@ -312,12 +315,12 @@
             "outputs": [],
             "source": [
                 "# LLM Predictor (text-davinci-003)\n",
-                "llm_predictor_davinci = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\"))\n",
-                "service_context_davinci = ServiceContext.from_defaults(llm_predictor=llm_predictor_davinci)\n",
+                "davinci = OpenAI(temperature=0, model=\"text-davinci-003\")\n",
+                "service_context_davinci = ServiceContext.from_defaults(llm=davinci)\n",
                 "\n",
                 "# # LLM Predictor (gpt-3.5-turbo)\n",
-                "llm_predictor_chatgpt = LLMPredictor(llm=OpenAIChat(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-                "service_context_chatgpt = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt)"
+                "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+                "service_context_chatgpt = ServiceContext.from_defaults(llm=chatgpt)"
             ]
         },
         {
@@ -410,6 +413,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "d4d3cd8b-4134-4cfa-8002-e0a34694d2e1",
             "metadata": {
@@ -463,6 +467,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "49c900ee-a31f-4fcd-bb44-ff2cd12a41eb",
             "metadata": {
@@ -473,6 +478,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "e0a8fa6a-e96e-4341-bb43-7547415f766e",
             "metadata": {
@@ -594,6 +600,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "018d0a51-3a3f-4dc5-9e1d-f2e79eb0cc43",
             "metadata": {
@@ -669,6 +676,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "d3cb4d7b-7bcc-46bf-b7d6-d0230c3d7fdd",
             "metadata": {
@@ -744,6 +752,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "53f527c8-0d53-4b29-8f1f-7b5bf22ca55e",
             "metadata": {
@@ -817,6 +826,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "7b299ebe-cdbd-4abf-9015-4894f6aa94ba",
             "metadata": {
@@ -2296,4 +2306,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/docs/examples/composable_indices/city_analysis/PineconeDemo-CityAnalysis.ipynb b/docs/examples/composable_indices/city_analysis/PineconeDemo-CityAnalysis.ipynb
index b91a4acdef..c35233df74 100644
--- a/docs/examples/composable_indices/city_analysis/PineconeDemo-CityAnalysis.ipynb
+++ b/docs/examples/composable_indices/city_analysis/PineconeDemo-CityAnalysis.ipynb
@@ -1,6 +1,7 @@
 {
     "cells": [
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "cfb64210-9c6b-47d7-81f4-67dbdab68e4c",
             "metadata": {
@@ -78,10 +79,11 @@
                 "    ServiceContext\n",
                 ")\n",
                 "from llama_index.vector_stores import PineconeVectorStore\n",
-                "from langchain.llms.openai import OpenAIChat"
+                "from llama_index.llms import OpenAI"
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "49e0d841-680f-4a0c-b455-788b54978ebf",
             "metadata": {
@@ -160,6 +162,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "84bfcaa1-db15-45ba-8af1-fee548354965",
             "metadata": {},
@@ -213,14 +216,12 @@
             },
             "outputs": [],
             "source": [
-                "# LLM Predictor (gpt-3.5-turbo)\n",
-                "llm_predictor_chatgpt = LLMPredictor(\n",
-                "    llm=OpenAIChat(temperature=0, model_name=\"gpt-3.5-turbo\")\n",
-                ")\n",
-                "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt)"
+                "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+                "service_context = ServiceContext.from_defaults(llm=llm)"
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "b373db78",
             "metadata": {},
@@ -258,6 +259,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "b1d69b03",
             "metadata": {},
@@ -315,6 +317,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "17605939-09ce-4405-a92a-8f296c941893",
             "metadata": {},
@@ -350,6 +353,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "d4d3cd8b-4134-4cfa-8002-e0a34694d2e1",
             "metadata": {
@@ -426,6 +430,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "49c900ee-a31f-4fcd-bb44-ff2cd12a41eb",
             "metadata": {
@@ -436,6 +441,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "e0a8fa6a-e96e-4341-bb43-7547415f766e",
             "metadata": {
@@ -1883,4 +1889,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/docs/examples/customization/llms/AzureOpenAI.ipynb b/docs/examples/customization/llms/AzureOpenAI.ipynb
index 84b1fd8afd..d5765cf47f 100644
--- a/docs/examples/customization/llms/AzureOpenAI.ipynb
+++ b/docs/examples/customization/llms/AzureOpenAI.ipynb
@@ -36,7 +36,6 @@
                 "from llama_index import (\n",
                 "    VectorStoreIndex,\n",
                 "    SimpleDirectoryReader, \n",
-                "    LLMPredictor,\n",
                 "    ServiceContext\n",
                 ")\n",
                 "import logging\n",
@@ -91,7 +90,6 @@
                 "    \"api_type\": openai.api_type,\n",
                 "    \"api_version\": openai.api_version,\n",
                 "})\n",
-                "llm_predictor = LLMPredictor(llm=llm)\n",
                 "\n",
                 "# You need to deploy your own embedding model as well as your own chat completion model\n",
                 "embedding_llm = LangchainEmbedding(\n",
@@ -117,7 +115,7 @@
             "outputs": [],
             "source": [
                 "service_context = ServiceContext.from_defaults(\n",
-                "    llm_predictor=llm_predictor,\n",
+                "    llm=llm,\n",
                 "    embed_model=embedding_llm,\n",
                 ")"
             ]
diff --git a/docs/examples/customization/llms/SimpleIndexDemo-ChatGPT.ipynb b/docs/examples/customization/llms/SimpleIndexDemo-ChatGPT.ipynb
index 7b20933081..ae4662258e 100644
--- a/docs/examples/customization/llms/SimpleIndexDemo-ChatGPT.ipynb
+++ b/docs/examples/customization/llms/SimpleIndexDemo-ChatGPT.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05",
    "metadata": {},
@@ -9,6 +10,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119",
    "metadata": {},
@@ -32,7 +34,7 @@
     "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
     "\n",
     "from llama_index import VectorStoreIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "from IPython.display import Markdown, display"
    ]
   },
@@ -58,9 +60,9 @@
    },
    "outputs": [],
    "source": [
-    "# LLM Predictor (gpt-3.5-turbo) + service context\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\", streaming=True))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size=512)"
+    "# setup service context\n",
+    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+    "service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)"
    ]
   },
   {
@@ -77,6 +79,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "b6caf93b-6345-4c65-a346-a95b0f1746c4",
    "metadata": {},
@@ -85,6 +88,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "83e2905e-3789-4793-82b9-0ac488246824",
    "metadata": {},
@@ -203,6 +207,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "88ca1808-d112-4c28-b110-b65dcc9b7207",
    "metadata": {},
@@ -235,6 +240,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "6cb664e8-f53f-4d6c-a086-1f2784cc1dc8",
    "metadata": {},
@@ -288,63 +294,6 @@
     "response.print_response_stream()"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "2e024521-97b5-417f-8c27-950983f52cda",
-   "metadata": {},
-   "source": [
-    "### [Beta] Use ChatGPTLLMPredictor\n",
-    "\n",
-    "Very simple GPT-Index-native ChatGPT wrapper. Note: this is a beta feature. If this doesn't work please\n",
-    "use the suggested flow above."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a49d9a1b-21fb-4153-ad24-191a13513d64",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# use ChatGPT [beta]\n",
-    "from llama_index.llm_predictor.chatgpt import ChatGPTLLMPredictor\n",
-    "from langchain.prompts.chat import SystemMessagePromptTemplate\n",
-    "\n",
-    "llm_predictor = ChatGPTLLMPredictor()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "596af2aa-7ddf-41f2-801b-4a24a4980dd8",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "query_engine = index.as_query_engine(\n",
-    "    service_context=service_context,\n",
-    "    streaming=True,\n",
-    ")\n",
-    "response = query_engine.query(\n",
-    "    \"What did the author do during his time at RISD?\", \n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "771e20ba-ccba-447e-89d6-8d731accc6f3",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "response.print_response_stream()"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/docs/examples/customization/llms/SimpleIndexDemo-Huggingface_camel.ipynb b/docs/examples/customization/llms/SimpleIndexDemo-Huggingface_camel.ipynb
index 659ec809fa..f1af55393a 100644
--- a/docs/examples/customization/llms/SimpleIndexDemo-Huggingface_camel.ipynb
+++ b/docs/examples/customization/llms/SimpleIndexDemo-Huggingface_camel.ipynb
@@ -51,7 +51,7 @@
     "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
     "\n",
     "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
-    "from llama_index.llm_predictor import HuggingFaceLLMPredictor"
+    "from llama_index.llms import HuggingFaceLLM"
    ]
   },
   {
@@ -104,7 +104,7 @@
    ],
    "source": [
     "import torch\n",
-    "hf_predictor = HuggingFaceLLMPredictor(\n",
+    "llm = HuggingFaceLLM(\n",
     "    context_window=2048, \n",
     "    max_new_tokens=256,\n",
     "    temperature=0.25,\n",
@@ -117,7 +117,7 @@
     "    # uncomment this if using CUDA to reduce memory usage\n",
     "    # model_kwargs={\"torch_dtype\": torch.float16}\n",
     ")\n",
-    "service_context = ServiceContext.from_defaults(chunk_size=512, llm_predictor=hf_predictor)"
+    "service_context = ServiceContext.from_defaults(chunk_size=512, llm=llm)"
    ]
   },
   {
diff --git a/docs/examples/customization/llms/SimpleIndexDemo-Huggingface_stablelm.ipynb b/docs/examples/customization/llms/SimpleIndexDemo-Huggingface_stablelm.ipynb
index 613ef0b774..eb464e755c 100644
--- a/docs/examples/customization/llms/SimpleIndexDemo-Huggingface_stablelm.ipynb
+++ b/docs/examples/customization/llms/SimpleIndexDemo-Huggingface_stablelm.ipynb
@@ -51,7 +51,7 @@
     "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
     "\n",
     "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
-    "from llama_index.llm_predictor import HuggingFaceLLMPredictor"
+    "from llama_index.llms import HuggingFaceLLM"
    ]
   },
   {
@@ -106,7 +106,7 @@
    ],
    "source": [
     "import torch\n",
-    "hf_predictor = HuggingFaceLLMPredictor(\n",
+    "llm = HuggingFaceLLM(\n",
     "    context_window=4096, \n",
     "    max_new_tokens=256,\n",
     "    temperature=0.7,\n",
@@ -121,7 +121,7 @@
     "    # uncomment this if using CUDA to reduce memory usage\n",
     "    # model_kwargs={\"torch_dtype\": torch.float16}\n",
     ")\n",
-    "service_context = ServiceContext.from_defaults(chunk_size=1024, llm_predictor=hf_predictor)"
+    "service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm)"
    ]
   },
   {
diff --git a/docs/examples/customization/streaming/SimpleIndexDemo-streaming.ipynb b/docs/examples/customization/streaming/SimpleIndexDemo-streaming.ipynb
index bf9e1258f1..cfe629e1e0 100644
--- a/docs/examples/customization/streaming/SimpleIndexDemo-streaming.ipynb
+++ b/docs/examples/customization/streaming/SimpleIndexDemo-streaming.ipynb
@@ -52,8 +52,7 @@
     "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
     "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
     "\n",
-    "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, LLMPredictor\n",
-    "from langchain import OpenAI"
+    "from llama_index import VectorStoreIndex, SimpleDirectoryReader"
    ]
   },
   {
diff --git a/docs/examples/customization/streaming/chat_engine_condense_question_stream_response.ipynb b/docs/examples/customization/streaming/chat_engine_condense_question_stream_response.ipynb
index 8584d3fe97..798fcd1daa 100644
--- a/docs/examples/customization/streaming/chat_engine_condense_question_stream_response.ipynb
+++ b/docs/examples/customization/streaming/chat_engine_condense_question_stream_response.ipynb
@@ -32,6 +32,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "INFO:numexpr.utils:Note: NumExpr detected 12 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
+      "Note: NumExpr detected 12 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
       "INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n",
       "NumExpr defaulting to 8 threads.\n"
      ]
@@ -40,8 +42,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/opt/homebrew/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
+      "/Users/suo/miniconda3/envs/llama/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.7) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
+      "  warnings.warn(\n"
      ]
     }
    ],
@@ -52,13 +54,12 @@
     "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
     "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
     "\n",
-    "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, LLMPredictor\n",
-    "from langchain import OpenAI"
+    "from llama_index import VectorStoreIndex, SimpleDirectoryReader"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "id": "c4a6b55f",
    "metadata": {},
    "outputs": [],
@@ -69,21 +70,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "id": "fa67c282",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
-      "> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "index = VectorStoreIndex.from_documents(documents)"
    ]
@@ -99,7 +89,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "id": "825b5bb3-37ff-4886-be2c-264584ca9eab",
    "metadata": {
     "tags": []
@@ -110,15 +100,7 @@
      "output_type": "stream",
      "text": [
       "INFO:llama_index.chat_engine.condense_question:Querying with: What was the next step in Paul Graham's career after his involvement with Y Combinator?\n",
-      "Querying with: What was the next step in Paul Graham's career after his involvement with Y Combinator?\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
-      "> [retrieve] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 19 tokens\n",
-      "> [retrieve] Total embedding token usage: 19 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 0 tokens\n",
-      "> [get_response] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
-      "> [get_response] Total embedding token usage: 0 tokens\n"
+      "Querying with: What was the next step in Paul Graham's career after his involvement with Y Combinator?\n"
      ]
     }
    ],
@@ -132,7 +114,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "id": "d8fa4310-4dc5-4787-a073-755d2e0b4887",
    "metadata": {
     "tags": []
@@ -143,7 +125,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "Paul Graham's next step in his career after his involvement with Y Combinator was to become an angel investor. He and his partner Robert Morris decided to start their own investment firm and make seed investments in startups. They also wanted to provide the same kind of help to founders that Julian had provided to them, such as setting up a company with bylaws and stock."
+      "Paul Graham's next step in his career after his involvement with Y Combinator was to take up painting. He spent most of the rest of 2014 painting and then in March 2015 he started working on Lisp again.\n"
      ]
     }
    ],
@@ -162,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "id": "f6181319-5d76-48c4-a5d4-23c6e9bc5ccb",
    "metadata": {
     "tags": []
@@ -172,16 +154,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "INFO:llama_index.chat_engine.condense_question:Querying with: What was Paul Graham's next step in his career after founding Y Combinator and becoming an angel investor with his partner Robert Morris?\n",
-      "Querying with: What was Paul Graham's next step in his career after founding Y Combinator and becoming an angel investor with his partner Robert Morris?\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
-      "> [retrieve] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 27 tokens\n",
-      "> [retrieve] Total embedding token usage: 27 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 0 tokens\n",
-      "> [get_response] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
-      "> [get_response] Total embedding token usage: 0 tokens\n"
+      "INFO:llama_index.chat_engine.condense_question:Querying with: What did Paul Graham do after he started working on Lisp again in March 2015?\n",
+      "Querying with: What did Paul Graham do after he started working on Lisp again in March 2015?\n"
      ]
     }
    ],
@@ -191,7 +165,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "id": "95045f5b-7964-4872-bc91-809d9debf1f5",
    "metadata": {
     "tags": []
@@ -202,7 +176,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "Paul Graham's next step in his career after founding Y Combinator and becoming an angel investor with his partner Robert Morris was to continue working on Y Combinator and writing essays. He also started working on a new version of Arc with Robert Morris and wrote Hacker News in it. He gradually stopped working on Arc and focused more on Y Combinator and writing essays. He was eventually encouraged by Robert Morris to make sure Y Combinator wasn't the last cool thing he did, which set him thinking about his future career path."
+      "Paul Graham spent the rest of 2015 writing essays and working on the new dialect of Lisp he called Arc. He also looked for an apartment to buy and started to plan a second still life painting from the same objects.\n"
      ]
     }
    ],
@@ -212,7 +186,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "id": "72cc02dd-90b7-4d63-bdb2-e4c4666f87ef",
    "metadata": {
     "tags": []
@@ -222,16 +196,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "INFO:llama_index.chat_engine.condense_question:Querying with: What did Paul Graham do after founding Y Combinator and becoming an angel investor with his partner Robert Morris?\n",
-      "Querying with: What did Paul Graham do after founding Y Combinator and becoming an angel investor with his partner Robert Morris?\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
-      "> [retrieve] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 22 tokens\n",
-      "> [retrieve] Total embedding token usage: 22 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 0 tokens\n",
-      "> [get_response] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
-      "> [get_response] Total embedding token usage: 0 tokens\n"
+      "INFO:llama_index.chat_engine.condense_question:Querying with: What did Paul Graham do after he started working on the new dialect of Lisp he called Arc in 2015?\n",
+      "Querying with: What did Paul Graham do after he started working on the new dialect of Lisp he called Arc in 2015?\n"
      ]
     }
    ],
@@ -241,7 +207,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "id": "d4f8efbb-fcb0-4c58-b92b-d2264a7e7103",
    "metadata": {
     "tags": []
@@ -252,7 +218,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "Paul Graham continued to write essays and work on Y Combinator. He also started working on a new version of Arc with Robert Morris, and wrote Hacker News in it. He also worked on other projects, such as helping startups with their incorporation and other legal matters. In 2010, Robert Morris suggested that Paul Graham should make sure Y Combinator wasn't the last cool thing he did, which set Paul Graham thinking about his future."
+      "Paul Graham worked on the new dialect of Lisp he called Arc for four years, from March 26, 2015 to October 12, 2019. During this time, he wrote the new Lisp, called Bel, in Arc. He also wrote essays and took his children to the coast on a sunny day in 2015. In the summer of 2016, he and his family moved to England. Finally, in the fall of 2019, he finished the project.\n"
      ]
     }
    ],
@@ -271,7 +237,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "id": "d13cf082-1a91-43c5-8bad-76fa45be96f9",
    "metadata": {
     "tags": []
@@ -283,7 +249,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "id": "627de435-d195-4dad-b314-a68e731979a9",
    "metadata": {
     "tags": []
@@ -294,15 +260,7 @@
      "output_type": "stream",
      "text": [
       "INFO:llama_index.chat_engine.condense_question:Querying with: What happens after the current situation?\n",
-      "Querying with: What happens after the current situation?\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
-      "> [retrieve] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 7 tokens\n",
-      "> [retrieve] Total embedding token usage: 7 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 0 tokens\n",
-      "> [get_response] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
-      "> [get_response] Total embedding token usage: 0 tokens\n"
+      "Querying with: What happens after the current situation?\n"
      ]
     }
    ],
@@ -312,7 +270,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "id": "75ef9e31-3cdb-4129-92f7-e61be201ea36",
    "metadata": {
     "tags": []
@@ -323,7 +281,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "After the current situation, the narrator continues to look for an apartment to buy and to experiment with a new kind of still life painting. He also continues to talk to founders who are leaving after selling their companies and gives them advice to take a vacation. He also continues to talk to alumni and new startups working on things he is interested in."
+      "After the current situation, the narrator resumes painting and experimenting with a new kind of still life. He also resumes his old life in New York, now that he is rich. He is able to take taxis and eat in restaurants, which is exciting for a while. He also starts to connect with other people who are trying to paint in New York.\n"
      ]
     }
    ],
@@ -348,7 +306,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.9.16"
   }
  },
  "nbformat": 4,
diff --git a/docs/examples/docstore/MongoDocstoreDemo.ipynb b/docs/examples/docstore/MongoDocstoreDemo.ipynb
index f5aeaa3cf2..b047b86f49 100644
--- a/docs/examples/docstore/MongoDocstoreDemo.ipynb
+++ b/docs/examples/docstore/MongoDocstoreDemo.ipynb
@@ -45,11 +45,12 @@
                 "from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor, StorageContext\n",
                 "from llama_index import VectorStoreIndex, ListIndex, SimpleKeywordTableIndex\n",
                 "from llama_index.composability import ComposableGraph\n",
-                "from langchain.chat_models import ChatOpenAI\n",
+                "from llama_index.llms import OpenAI\n",
                 "from llama_index.response.notebook_utils import display_response"
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "f6dd9d5f-a601-4097-894e-fe98a0c35a5b",
             "metadata": {},
@@ -72,6 +73,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "bae82b55-5c9f-432a-9e06-1fccb6f9fc7f",
             "metadata": {},
@@ -94,6 +96,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "aff4c8e1-b2ba-4ea6-a8df-978c2788fedc",
             "metadata": {},
@@ -150,6 +153,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "528149c1-5bde-4eba-b75a-e8fa1da17d7c",
             "metadata": {},
@@ -213,6 +217,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "365a025b",
             "metadata": {},
@@ -267,6 +272,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "d3bf6aaf-3375-4212-8323-777969a918f7",
             "metadata": {},
@@ -284,8 +290,8 @@
             },
             "outputs": [],
             "source": [
-                "llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-                "service_context_chatgpt = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size=1024)"
+                "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+                "service_context_chatgpt = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)"
             ]
         },
         {
@@ -395,4 +401,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb b/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb
index c62a1c1d41..cd5f6b9087 100644
--- a/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb
+++ b/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "f35495ae",
    "metadata": {},
@@ -72,11 +73,12 @@
     "from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor, StorageContext\n",
     "from llama_index import VectorStoreIndex, ListIndex, SimpleKeywordTableIndex\n",
     "from llama_index.composability import ComposableGraph\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "from llama_index.response.notebook_utils import display_response"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "f6dd9d5f-a601-4097-894e-fe98a0c35a5b",
    "metadata": {},
@@ -99,6 +101,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "bae82b55-5c9f-432a-9e06-1fccb6f9fc7f",
    "metadata": {},
@@ -121,6 +124,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "aff4c8e1-b2ba-4ea6-a8df-978c2788fedc",
    "metadata": {},
@@ -207,6 +211,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "528149c1-5bde-4eba-b75a-e8fa1da17d7c",
    "metadata": {},
@@ -318,6 +323,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "365a025b",
    "metadata": {},
@@ -400,6 +406,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "d3bf6aaf-3375-4212-8323-777969a918f7",
    "metadata": {},
@@ -421,8 +428,8 @@
    },
    "outputs": [],
    "source": [
-    "llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-    "service_context_chatgpt = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size=1024)"
+    "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+    "service_context_chatgpt = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)"
    ]
   },
   {
diff --git a/docs/examples/evaluation/QuestionGeneration.ipynb b/docs/examples/evaluation/QuestionGeneration.ipynb
index 6ec242c28c..76f7505b09 100644
--- a/docs/examples/evaluation/QuestionGeneration.ipynb
+++ b/docs/examples/evaluation/QuestionGeneration.ipynb
@@ -11,7 +11,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 13,
+            "execution_count": 38,
             "id": "9080b39e",
             "metadata": {},
             "outputs": [],
@@ -26,32 +26,32 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 22,
+            "execution_count": 39,
             "id": "8d0b2364-4806-4656-81e7-3f6e4b910b5b",
             "metadata": {},
             "outputs": [],
             "source": [
                 "from llama_index.evaluation import DatasetGenerator, QueryResponseEvaluator\n",
                 "from llama_index import SimpleDirectoryReader, VectorStoreIndex, ServiceContext, LLMPredictor, Response\n",
-                "from langchain.chat_models import ChatOpenAI"
+                "from llama_index.llms import OpenAI"
             ]
         },
         {
             "cell_type": "code",
-            "execution_count": 3,
+            "execution_count": 40,
             "id": "834f4c8c-8c10-4f8d-bf43-444aaa1234b1",
             "metadata": {
                 "tags": []
             },
             "outputs": [],
             "source": [
-                "reader = SimpleDirectoryReader(\"../paul_graham_essay/data\")\n",
+                "reader = SimpleDirectoryReader(\"../data/paul_graham/\")\n",
                 "documents = reader.load_data()"
             ]
         },
         {
             "cell_type": "code",
-            "execution_count": 4,
+            "execution_count": 41,
             "id": "9cc71140-d614-4696-9ade-d5bdc251d398",
             "metadata": {
                 "tags": []
@@ -61,8 +61,11 @@
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        "WARNING:llama_index.llm_predictor.base:Unknown max input size for gpt-3.5-turbo, using defaults.\n",
-                        "Unknown max input size for gpt-3.5-turbo, using defaults.\n"
+                        "WARNING:llama_index.indices.service_context:chunk_size_limit is deprecated, please specify chunk_size instead\n",
+                        "chunk_size_limit is deprecated, please specify chunk_size instead\n",
+                        "chunk_size_limit is deprecated, please specify chunk_size instead\n",
+                        "chunk_size_limit is deprecated, please specify chunk_size instead\n",
+                        "chunk_size_limit is deprecated, please specify chunk_size instead\n"
                     ]
                 }
             ],
@@ -72,7 +75,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 42,
             "id": "f481b532-9be2-4ec3-b551-fd44060099bd",
             "metadata": {
                 "tags": []
@@ -84,7 +87,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 6,
+            "execution_count": 43,
             "id": "63720bd6-c060-4cc2-8a60-a39e935ee3e6",
             "metadata": {
                 "tags": []
@@ -94,78 +97,78 @@
                     "data": {
                         "text/plain": [
                             "['What were the two main things the author worked on before college?',\n",
-                            " 'What language did the author use to write programs on the IBM 1401?',\n",
-                            " \"What was the author's first microcomputer and what did they use it for?\",\n",
-                            " 'What did the author study in college before switching to AI?',\n",
-                            " 'What made the author want to work on AI?',\n",
-                            " 'What language was commonly used for AI in the mid-1980s?',\n",
+                            " 'How did the author describe their early attempts at writing short stories?',\n",
+                            " 'What type of computer did the author first work on for programming?',\n",
+                            " 'What language did the author use for programming on the IBM 1401?',\n",
+                            " \"What was the author's experience with programming on the 1401?\",\n",
+                            " 'What type of computer did the author eventually get for themselves?',\n",
+                            " \"What was the author's initial plan for college?\",\n",
+                            " 'What made the author change their mind about studying philosophy?',\n",
+                            " \"What sparked the author's interest in AI?\",\n",
                             " 'What did the author realize about AI during their first year of grad school?',\n",
-                            " 'What did the author decide to focus on instead of AI?',\n",
-                            " 'What was the problem with systems work according to the author?',\n",
-                            " 'What did the author realize about making art while looking at a painting at the Carnegie Institute?',\n",
-                            " \"What was the topic of the author's dissertation?\",\n",
-                            " 'Which art schools did the author apply to and which one did they end up attending?',\n",
-                            " \"What was the author's experience like at the Accademia di Belli Arti in Florence?\",\n",
-                            " 'What did the author learn about low-end software while working at Interleaf?',\n",
-                            " \"What is the author's opinion on signature styles in painting?\",\n",
-                            " 'How did the author manage to save enough money to pay off their college loans?',\n",
+                            " 'What were the two art schools that the author applied to?',\n",
+                            " 'How did the author end up at RISD?',\n",
+                            " 'What was the purpose of the foundation classes at RISD?',\n",
+                            " 'How did the author manage to pass the entrance exam for the Accademia di Belli Arti?',\n",
+                            " 'What was the arrangement between the students and faculty at the Accademia?',\n",
+                            " \"What was the author's experience painting still lives in Florence?\",\n",
+                            " 'What did the author learn about visual perception while painting still lives?',\n",
+                            " 'Why did the author decide to leave the Accademia and return to the US?',\n",
                             " 'What did the author learn about technology companies while working at Interleaf?',\n",
-                            " \"What was the author's experience like in the color class they took at RISD?\",\n",
-                            " 'What did the author learn about painting still lives?',\n",
-                            " \"What was the author's experience like at RISD and why did they end up dropping out?\",\n",
-                            " 'What is the difference between the tribe of signature style seekers and the earnest students at RISD?',\n",
-                            " 'How did the author end up in New York?',\n",
-                            " \"What was the author's initial plan to make money after dropping out of RISD?\",\n",
-                            " 'Who was Idelle Weber and how did she help the author?',\n",
-                            " \"What was the author's startup idea and why did it fail?\",\n",
-                            " 'What was the main goal of an online store builder and why was it important?',\n",
-                            " 'How did Viaweb differentiate itself from its competitors?',\n",
-                            " 'What did the author learn about retail while building stores for users?',\n",
-                            " \"How did the author's attitude towards business change after getting users?\",\n",
-                            " \"What was the author's vision for software development in the future?\",\n",
-                            " \"What lesson did the author learn about scanning images of men's shirts?\",\n",
-                            " 'Why did the author initially think they needed a \"business person\" to be in charge?',\n",
-                            " 'What is the ultimate test of a startup according to the author?',\n",
-                            " 'Why did the author hire lots more people for their startup?',\n",
-                            " \"What was the author's experience like working at Yahoo after their company was bought?\",\n",
-                            " 'What advice does the author give to founders who are leaving after selling their companies?',\n",
-                            " \"What was the author's idea for a new company after leaving Yahoo?\",\n",
-                            " 'Why did the author decide to build a new dialect of Lisp?',\n",
-                            " 'How did the author realize the potential of publishing essays on the web?',\n",
-                            " \"What was the author's plan for writing essays on the web?\",\n",
-                            " \"What was the author's involvement with building the infrastructure of the web?\",\n",
-                            " 'In the print era, who were the only people allowed to publish essays?',\n",
-                            " 'What did the author realize about online essays and their social perception?',\n",
-                            " 'According to the author, what is a danger for the ambitious?',\n",
-                            " 'What was the turning point for the author in figuring out what to work on?',\n",
-                            " \"What was the idea behind the big party at the author's house in October 2003?\",\n",
-                            " \"What was Jessica Livingston's job before she started compiling a book of interviews with startup founders?\",\n",
-                            " 'What was the most distinctive thing about Y Combinator?',\n",
-                            " 'How did YC solve one of the biggest problems faced by founders?',\n",
-                            " \"What was the author's original plan for YC and how did it change over time?\",\n",
-                            " 'What is the \"YC GDP\" and how has it evolved over time?',\n",
-                            " 'What was the original intention for Y Combinator and how did it change over time?',\n",
-                            " 'What was the purpose of Hacker News and how did it impact YC?',\n",
-                            " 'What were some of the challenges faced by Paul Graham while working at YC?',\n",
-                            " 'What advice did Robert Morris give to Paul Graham and how did it impact his decision to leave YC?',\n",
-                            " 'How did Paul Graham decide to spend his time after leaving YC?',\n",
-                            " 'What is Lisp and how did it originate?',\n",
-                            " 'What was the goal of creating Bel and how was it achieved?',\n",
-                            " \"How did working on Bel impact Paul Graham's life?\",\n",
-                            " 'Why did Paul Graham and his family move to England in 2016?',\n",
-                            " \"What was the author's experience with time-sharing machines?\",\n",
-                            " 'Where did the author live while attending the Accademia in Florence?',\n",
-                            " \"What is the significance of the Y combinator in the author's work?\",\n",
-                            " 'What is Bel and how was it developed?',\n",
-                            " \"What was the author's experience with writing essays in 2020?\",\n",
-                            " 'How did the author choose what to work on in the past?',\n",
-                            " 'What is the difference between rent-controlled and rent-stabilized apartments?',\n",
-                            " \"What was the author's experience with launching an online store builder?\",\n",
-                            " \"What is the author's opinion on customs in rapidly changing fields?\",\n",
-                            " \"What was the author's experience with leaving Y Combinator?\"]"
+                            " 'What lesson did the author learn about the low end and high end in the software industry?',\n",
+                            " \"What was the author's motivation for writing another book on Lisp?\",\n",
+                            " 'How did the author come up with the idea for starting a company to put art galleries online?',\n",
+                            " 'What was the initial reaction of art galleries to the idea of being online?',\n",
+                            " 'How did the author and his team come up with the concept of a web app?',\n",
+                            " 'What were the three main parts of the software developed by the author and his team?',\n",
+                            " 'How did the author and his team learn about retail and improve their software based on user feedback?',\n",
+                            " 'Why did the author initially believe that the absolute number of users was the most important factor for a startup?',\n",
+                            " \"What was the growth rate of the author's company and why was it significant?\",\n",
+                            " \"How did the author's decision to hire more people impact the financial stability of the company?\",\n",
+                            " \"What was the outcome of the company's acquisition by Yahoo in 1998?\",\n",
+                            " \"What was the author's initial reaction when Yahoo bought their startup?\",\n",
+                            " \"How did the author's lifestyle change after Yahoo bought their startup?\",\n",
+                            " 'Why did the author leave Yahoo and what did they plan to do?',\n",
+                            " \"What was the author's experience like when they returned to New York after becoming rich?\",\n",
+                            " 'What idea did the author have in the spring of 2000 and why did they decide to start a new company?',\n",
+                            " \"Why did the author decide to build a subset of the new company's vision as an open source project?\",\n",
+                            " \"How did the author's perception of publishing essays change with the advent of the internet?\",\n",
+                            " \"What is the author's perspective on working on things that are not prestigious?\",\n",
+                            " 'What other projects did the author work on besides writing essays?',\n",
+                            " 'What type of building did the author buy in Cambridge?',\n",
+                            " \"What was the concept behind the big party at the narrator's house in October 2003?\",\n",
+                            " \"How did Jessica Livingston's perception of startups change after meeting friends of the narrator?\",\n",
+                            " 'What were some of the ideas that the narrator shared with Jessica about fixing venture capital?',\n",
+                            " 'How did the idea of starting their own investment firm come about for the narrator and Jessica?',\n",
+                            " 'What was the Summer Founders Program and how did it attract applicants?',\n",
+                            " \"How did Y Combinator's batch model help solve the problem of isolation for startup founders?\",\n",
+                            " \"What advantages did YC's scale bring, both in terms of community and customer acquisition?\",\n",
+                            " 'Why did the narrator consider Hacker News to be a source of stress?',\n",
+                            " \"How did the narrator's role in YC differ from other types of work they had done?\",\n",
+                            " 'What advice did Robert Morris offer the narrator during his visit in 2010?',\n",
+                            " 'What was the advice given to the author by Rtm regarding their involvement with Y Combinator?',\n",
+                            " 'Why did the author decide to hand over Y Combinator to someone else?',\n",
+                            " \"What event in the author's personal life prompted them to reevaluate their priorities?\",\n",
+                            " 'How did the author spend most of 2014?',\n",
+                            " 'What project did the author work on from March 2015 to October 2019?',\n",
+                            " 'How did the author manage to write an interpreter for Lisp in itself?',\n",
+                            " \"What was the author's experience like living in England?\",\n",
+                            " \"When was the author's project, Bel, finally finished?\",\n",
+                            " 'What did the author do during the fall of 2019?',\n",
+                            " \"How would you describe the author's journey and decision-making process throughout the document?\",\n",
+                            " \"How did the author's experience with editing Lisp expressions differ from traditional app editing?\",\n",
+                            " 'Why did the author receive negative comments when claiming that Lisp was better than other languages?',\n",
+                            " 'What is the difference between putting something online and publishing it online?',\n",
+                            " 'How did the customs of venture capital practice and essay writing reflect outdated constraints?',\n",
+                            " 'Why did Y Combinator change its name to avoid a regional association?',\n",
+                            " \"What was the significance of the orange color chosen for Y Combinator's logo?\",\n",
+                            " 'Why did Y Combinator become a fund for a couple of years before returning to self-funding?',\n",
+                            " 'What is the purpose of Y Combinator in relation to the concept of \"deal flow\"?',\n",
+                            " 'How did the combination of running a forum and writing essays lead to a problem for the author?',\n",
+                            " \"What was the author's biggest regret about leaving Y Combinator?\"]"
                         ]
                     },
-                    "execution_count": 6,
+                    "execution_count": 43,
                     "metadata": {},
                     "output_type": "execute_result"
                 }
@@ -176,7 +179,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 15,
+            "execution_count": 44,
             "id": "b9b98f89-d5b8-4d29-92f6-ad76d5060e9f",
             "metadata": {
                 "tags": []
@@ -184,13 +187,13 @@
             "outputs": [],
             "source": [
                 "# gpt-4\n",
-                "llm_predictor_gpt4 = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"))\n",
-                "service_context_gpt4 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt4)"
+                "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")\n",
+                "service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)"
             ]
         },
         {
             "cell_type": "code",
-            "execution_count": 23,
+            "execution_count": 45,
             "id": "8eb3e616-64e5-4bf4-a67b-661e9b3657e7",
             "metadata": {
                 "tags": []
@@ -202,21 +205,10 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 9,
+            "execution_count": 46,
             "id": "41f0e53f-77a6-40d5-94ae-3f81b01af75c",
             "metadata": {},
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-                        "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 17617 tokens\n",
-                        "> [build_index_from_nodes] Total embedding token usage: 17617 tokens\n"
-                    ]
-                }
-            ],
+            "outputs": [],
             "source": [
                 "# create vector index\n",
                 "vector_index = VectorStoreIndex.from_documents(\n",
@@ -227,7 +219,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 28,
+            "execution_count": 50,
             "id": "af730b2e-6949-4865-b7af-bb2bc60a9173",
             "metadata": {
                 "tags": []
@@ -240,7 +232,7 @@
                 "        {\n",
                 "            \"Query\": query,\n",
                 "            \"Response\": str(response), \n",
-                "            \"Source\": response.source_nodes[0].source_text[:1000] + \"...\",\n",
+                "            \"Source\": response.source_nodes[0].node.get_content()[:1000] + \"...\",\n",
                 "            \"Evaluation Result\": eval_result\n",
                 "        },\n",
                 "        index=[0]\n",
@@ -257,7 +249,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": null,
+            "execution_count": 48,
             "id": "180a5d2e-9286-477b-9cd0-a5976d18d845",
             "metadata": {
                 "tags": []
@@ -271,7 +263,7 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 29,
+            "execution_count": 51,
             "id": "c764b8b3-69b1-4ac8-b88b-3f9e204b8bfb",
             "metadata": {
                 "tags": []
@@ -281,29 +273,27 @@
                     "data": {
                         "text/html": [
                             "<style type=\"text/css\">\n",
-                            "#T_f2f69_row0_col1, #T_f2f69_row0_col2 {\n",
+                            "#T_138fa_row0_col1, #T_138fa_row0_col2 {\n",
                             "  inline-size: 600px;\n",
                             "  overflow-wrap: break-word;\n",
                             "}\n",
                             "</style>\n",
-                            "<table id=\"T_f2f69\">\n",
+                            "<table id=\"T_138fa\">\n",
                             "  <thead>\n",
                             "    <tr>\n",
                             "      <th class=\"blank level0\" >&nbsp;</th>\n",
-                            "      <th id=\"T_f2f69_level0_col0\" class=\"col_heading level0 col0\" >Query</th>\n",
-                            "      <th id=\"T_f2f69_level0_col1\" class=\"col_heading level0 col1\" >Response</th>\n",
-                            "      <th id=\"T_f2f69_level0_col2\" class=\"col_heading level0 col2\" >Source</th>\n",
-                            "      <th id=\"T_f2f69_level0_col3\" class=\"col_heading level0 col3\" >Evaluation Result</th>\n",
+                            "      <th id=\"T_138fa_level0_col0\" class=\"col_heading level0 col0\" >Query</th>\n",
+                            "      <th id=\"T_138fa_level0_col1\" class=\"col_heading level0 col1\" >Response</th>\n",
+                            "      <th id=\"T_138fa_level0_col2\" class=\"col_heading level0 col2\" >Source</th>\n",
+                            "      <th id=\"T_138fa_level0_col3\" class=\"col_heading level0 col3\" >Evaluation Result</th>\n",
                             "    </tr>\n",
                             "  </thead>\n",
                             "  <tbody>\n",
                             "    <tr>\n",
-                            "      <th id=\"T_f2f69_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
-                            "      <td id=\"T_f2f69_row0_col0\" class=\"data row0 col0\" >What language did the author use to write programs on the IBM 1401?</td>\n",
-                            "      <td id=\"T_f2f69_row0_col1\" class=\"data row0 col1\" >The author used an early version of Fortran to write programs on the IBM 1401.</td>\n",
-                            "      <td id=\"T_f2f69_row0_col2\" class=\"data row0 col2\" >\t\t\n",
-                            "\n",
-                            "What I Worked On\n",
+                            "      <th id=\"T_138fa_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
+                            "      <td id=\"T_138fa_row0_col0\" class=\"data row0 col0\" >How did the author describe their early attempts at writing short stories?</td>\n",
+                            "      <td id=\"T_138fa_row0_col1\" class=\"data row0 col1\" >The author described their early attempts at writing short stories as awful. They mentioned that their stories had hardly any plot and were mostly about characters with strong feelings, which they thought made the stories deep.</td>\n",
+                            "      <td id=\"T_138fa_row0_col2\" class=\"data row0 col2\" >What I Worked On\n",
                             "\n",
                             "February 2021\n",
                             "\n",
@@ -311,14 +301,14 @@
                             "\n",
                             "The first programs I tried writing were on the IBM 1401 that our school district used for what was then called \"data processing.\" This was in 9th grade, so I was 13 or 14. The school district's 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain's lair down there, with all these alien-looking machines — CPU, disk drives, printer, card reader — sitting up on a raised floor under bright fluorescent lights.\n",
                             "\n",
-                            "The language we used was an early version of Fortran. You had to type programs on punch cards, then stack them in...</td>\n",
-                            "      <td id=\"T_f2f69_row0_col3\" class=\"data row0 col3\" >YES</td>\n",
+                            "The language we used was an early version of Fortran. You had to type programs on punch cards, then stack them in the...</td>\n",
+                            "      <td id=\"T_138fa_row0_col3\" class=\"data row0 col3\" >YES</td>\n",
                             "    </tr>\n",
                             "  </tbody>\n",
                             "</table>\n"
                         ],
                         "text/plain": [
-                            "<pandas.io.formats.style.Styler at 0x281d38100>"
+                            "<pandas.io.formats.style.Styler at 0x7fcb78d7f130>"
                         ]
                     },
                     "metadata": {},
@@ -340,9 +330,9 @@
     ],
     "metadata": {
         "kernelspec": {
-            "display_name": "llama_index",
+            "display_name": "llama",
             "language": "python",
-            "name": "llama_index"
+            "name": "python3"
         },
         "language_info": {
             "codemirror_mode": {
@@ -354,7 +344,7 @@
             "name": "python",
             "nbconvert_exporter": "python",
             "pygments_lexer": "ipython3",
-            "version": "3.10.10"
+            "version": "3.9.16"
         }
     },
     "nbformat": 4,
diff --git a/docs/examples/evaluation/TestNYC-Evaluation-Query.ipynb b/docs/examples/evaluation/TestNYC-Evaluation-Query.ipynb
index fa322b65e6..cb5bc0ddfa 100644
--- a/docs/examples/evaluation/TestNYC-Evaluation-Query.ipynb
+++ b/docs/examples/evaluation/TestNYC-Evaluation-Query.ipynb
@@ -38,8 +38,7 @@
                 "    ServiceContext,\n",
                 "    Response\n",
                 ")\n",
-                "from langchain.chat_models import ChatOpenAI\n",
-                "from langchain.llms import OpenAI\n",
+                "from llama_index.llms import OpenAI\n",
                 "from llama_index.evaluation import QueryResponseEvaluator\n",
                 "import pandas as pd\n",
                 "pd.set_option('display.max_colwidth', 0)"
@@ -55,12 +54,12 @@
             "outputs": [],
             "source": [
                 "# gpt-3 (davinci)\n",
-                "llm_predictor_gpt3 = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\"))\n",
-                "service_context_gpt3 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt3)\n",
+                "gpt3 = OpenAI(temperature=0, model=\"text-davinci-003\")\n",
+                "service_context_gpt3 = ServiceContext.from_defaults(llm=gpt3)\n",
                 "\n",
                 "# gpt-4\n",
-                "llm_predictor_gpt4 = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"))\n",
-                "service_context_gpt4 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt4)"
+                "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")\n",
+                "service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)"
             ]
         },
         {
@@ -144,6 +143,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "4780e16a-aa6c-4143-978d-4a93a4357130",
             "metadata": {},
@@ -492,6 +492,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "0ee6a336-8fd0-46b3-bb8f-7f47a8781c60",
             "metadata": {
diff --git a/docs/examples/evaluation/TestNYC-Evaluation.ipynb b/docs/examples/evaluation/TestNYC-Evaluation.ipynb
index cc87dc616e..a2c215862a 100644
--- a/docs/examples/evaluation/TestNYC-Evaluation.ipynb
+++ b/docs/examples/evaluation/TestNYC-Evaluation.ipynb
@@ -38,8 +38,7 @@
                 "    ServiceContext,\n",
                 "    Response\n",
                 ")\n",
-                "from langchain.chat_models import ChatOpenAI\n",
-                "from langchain.llms import OpenAI\n",
+                "from llama_index.llms import OpenAI\n",
                 "from llama_index.evaluation import ResponseEvaluator\n",
                 "import pandas as pd\n",
                 "pd.set_option('display.max_colwidth', 0)"
@@ -55,12 +54,12 @@
             "outputs": [],
             "source": [
                 "# gpt-3 (davinci)\n",
-                "llm_predictor_gpt3 = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\"))\n",
-                "service_context_gpt3 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt3)\n",
+                "gpt3 = OpenAI(temperature=0, model=\"text-davinci-003\")\n",
+                "service_context_gpt3 = ServiceContext.from_defaults(llm=gpt3)\n",
                 "\n",
                 "# gpt-4\n",
-                "llm_predictor_gpt4 = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"))\n",
-                "service_context_gpt4 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt4)"
+                "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")\n",
+                "service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)"
             ]
         },
         {
diff --git a/docs/examples/index_structs/doc_summary/DocSummary.ipynb b/docs/examples/index_structs/doc_summary/DocSummary.ipynb
index 9e70e6ce16..5593ac0a39 100644
--- a/docs/examples/index_structs/doc_summary/DocSummary.ipynb
+++ b/docs/examples/index_structs/doc_summary/DocSummary.ipynb
@@ -73,7 +73,7 @@
     "    get_response_synthesizer\n",
     ")\n",
     "from llama_index.indices.document_summary import DocumentSummaryIndex\n",
-    "from langchain.chat_models import ChatOpenAI"
+    "from llama_index.llms import OpenAI"
    ]
   },
   {
@@ -176,8 +176,8 @@
    "outputs": [],
    "source": [
     "# # LLM Predictor (gpt-3.5-turbo)\n",
-    "llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size=1024)"
+    "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+    "service_context = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)"
    ]
   },
   {
diff --git a/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb b/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb
index 81b100fbb8..abcd3b549f 100644
--- a/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb
+++ b/docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb
@@ -72,7 +72,7 @@
     "from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext, KnowledgeGraphIndex\n",
     "from llama_index.graph_stores import SimpleGraphStore\n",
     "\n",
-    "from langchain import OpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "from IPython.display import Markdown, display"
    ]
   },
@@ -98,8 +98,8 @@
     "# define LLM\n",
     "# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors\n",
     "\n",
-    "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-002\"))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size=512)"
+    "llm = OpenAI(temperature=0, model=\"text-davinci-002\")\n",
+    "service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)"
    ]
   },
   {
diff --git a/docs/examples/index_structs/knowledge_graph/KnowledgeGraphIndex_vs_VectorStoreIndex_vs_CustomIndex_combined.ipynb b/docs/examples/index_structs/knowledge_graph/KnowledgeGraphIndex_vs_VectorStoreIndex_vs_CustomIndex_combined.ipynb
index 29e13bf69f..2f05592c61 100644
--- a/docs/examples/index_structs/knowledge_graph/KnowledgeGraphIndex_vs_VectorStoreIndex_vs_CustomIndex_combined.ipynb
+++ b/docs/examples/index_structs/knowledge_graph/KnowledgeGraphIndex_vs_VectorStoreIndex_vs_CustomIndex_combined.ipynb
@@ -104,7 +104,6 @@
     "\n",
     "from llama_index import (\n",
     "    KnowledgeGraphIndex,\n",
-    "    LLMPredictor,\n",
     "    ServiceContext,\n",
     "    SimpleDirectoryReader,\n",
     ")\n",
@@ -112,14 +111,14 @@
     "from llama_index.graph_stores import NebulaGraphStore\n",
     "\n",
     "\n",
-    "from langchain import OpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "from IPython.display import Markdown, display\n",
     "\n",
     "\n",
     "# define LLM\n",
     "# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors\n",
-    "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-002\"))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size_limit=512)"
+    "llm = OpenAI(temperature=0, model=\"text-davinci-002\")\n",
+    "service_context = ServiceContext.from_defaults(llm=llm, chunk_size_limit=512)"
    ]
   },
   {
@@ -174,7 +173,6 @@
     "        \"api_version\": openai.api_version,\n",
     "    }\n",
     ")\n",
-    "llm_predictor = LLMPredictor(llm=llm)\n",
     "\n",
     "# You need to deploy your own embedding model as well as your own chat completion model\n",
     "embedding_llm = LangchainEmbedding(\n",
@@ -191,6 +189,7 @@
     "\n",
     "service_context = ServiceContext.from_defaults(\n",
     "    llm_predictor=llm_predictor,\n",
+    "    llm=llm,\n",
     "    embed_model=embedding_llm,\n",
     ")\n"
    ]
diff --git a/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb b/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb
index f006d4b7f9..041a857dd0 100644
--- a/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb
+++ b/docs/examples/index_structs/knowledge_graph/NebulaGraphKGIndexDemo.ipynb
@@ -70,7 +70,7 @@
     "from llama_index.graph_stores import NebulaGraphStore\n",
     "\n",
     "\n",
-    "from langchain import OpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "from IPython.display import Markdown, display"
    ]
   },
@@ -95,8 +95,8 @@
    "source": [
     "# define LLM\n",
     "# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors\n",
-    "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-002\"))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size_limit=512)"
+    "llm = OpenAI(temperature=0, model=\"text-davinci-002\")\n",
+    "service_context = ServiceContext.from_defaults(llm=llm, chunk_size_limit=512)"
    ]
   },
   {
diff --git a/docs/examples/index_structs/struct_indices/SQLIndexDemo.ipynb b/docs/examples/index_structs/struct_indices/SQLIndexDemo.ipynb
index a40a806b7c..112eedd660 100644
--- a/docs/examples/index_structs/struct_indices/SQLIndexDemo.ipynb
+++ b/docs/examples/index_structs/struct_indices/SQLIndexDemo.ipynb
@@ -109,8 +109,7 @@
          "outputs": [],
          "source": [
             "from llama_index import SQLDatabase, ServiceContext\n",
-            "from langchain import OpenAI\n",
-            "from llama_index import LLMPredictor"
+            "from llama_index.llms import OpenAI"
          ]
       },
       {
@@ -122,8 +121,8 @@
          },
          "outputs": [],
          "source": [
-            "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-002\"))\n",
-            "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
+            "llm = OpenAI(temperature=0, model=\"text-davinci-002\")\n",
+            "service_context = ServiceContext.from_defaults(llm=llm)"
          ]
       },
       {
diff --git a/docs/examples/llm/langchain.ipynb b/docs/examples/llm/langchain.ipynb
new file mode 100644
index 0000000000..703bb2bf4f
--- /dev/null
+++ b/docs/examples/llm/langchain.ipynb
@@ -0,0 +1,114 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "343d5ee5-c78d-493d-9df9-2d170dbd71d9",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## LangChain LLM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "703e0586-723b-4a32-81c3-96c1054a998d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.llms import OpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "911bb011-40b3-44d7-9d99-6a0e8a193753",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from llama_index.llms import LangChainLLM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "1470203e-80fd-4013-af89-b2b5ad80a14d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "llm = LangChainLLM(llm=OpenAI())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "59ae3b49-4f62-4248-b2d8-4ec3ab04f7e4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "response_gen = llm.stream_complete('Hi this is')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "8cb35c1e-a04b-4526-ad8c-ddf6e37e9fd2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " a test\n",
+      "\n",
+      "Hello! Welcome to the test. What would you like to learn about?"
+     ]
+    }
+   ],
+   "source": [
+    "for delta in response_gen:\n",
+    "    print(delta.delta, end='')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "435d2a0a-1cf2-4b09-b578-07f053346622",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/node_postprocessor/LLMReranker-Gatsby.ipynb b/docs/examples/node_postprocessor/LLMReranker-Gatsby.ipynb
index 978768b24d..284cb5d952 100644
--- a/docs/examples/node_postprocessor/LLMReranker-Gatsby.ipynb
+++ b/docs/examples/node_postprocessor/LLMReranker-Gatsby.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "f1b23ffc-22d8-4414-b2e4-66d45a03523d",
    "metadata": {
@@ -45,11 +46,12 @@
     "from llama_index.indices.postprocessor import (\n",
     "    LLMRerank\n",
     ")\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "from IPython.display import Markdown, display"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "8a5b0ac6-ec9c-40e1-9120-d20e33c37f80",
    "metadata": {
@@ -78,8 +80,8 @@
    ],
    "source": [
     "# LLM Predictor (gpt-3.5-turbo) + service context\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size=512)"
+    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+    "service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)"
    ]
   },
   {
@@ -128,6 +130,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "805847d9-a2c1-4930-98a9-98126e730000",
    "metadata": {
@@ -600,6 +603,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "23dcdac6-f4dd-469e-9f47-d030f27bacda",
    "metadata": {},
diff --git a/docs/examples/node_postprocessor/LLMReranker-Lyft-10k.ipynb b/docs/examples/node_postprocessor/LLMReranker-Lyft-10k.ipynb
index 6772658cfc..4e93e0dbce 100644
--- a/docs/examples/node_postprocessor/LLMReranker-Lyft-10k.ipynb
+++ b/docs/examples/node_postprocessor/LLMReranker-Lyft-10k.ipynb
@@ -45,7 +45,7 @@
     "    LLMRerank\n",
     ")\n",
     "\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "from IPython.display import Markdown, display"
    ]
   },
@@ -68,13 +68,13 @@
    "outputs": [],
    "source": [
     "# LLM Predictor (gpt-3.5-turbo) + service context\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
+    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
     "\n",
     "chunk_overlap = 0\n",
     "chunk_size = 128\n",
     "\n",
     "service_context = ServiceContext.from_defaults(\n",
-    "    llm_predictor=llm_predictor, \n",
+    "    llm=llm,\n",
     "    chunk_size=chunk_size,\n",
     "    chunk_overlap=chunk_overlap,\n",
     ")"
diff --git a/docs/examples/node_postprocessor/PII.ipynb b/docs/examples/node_postprocessor/PII.ipynb
index 708e9b6c00..d93b8d20bc 100644
--- a/docs/examples/node_postprocessor/PII.ipynb
+++ b/docs/examples/node_postprocessor/PII.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "c04ffe8e-6573-470f-aef5-348522a0de15",
    "metadata": {},
@@ -43,7 +44,7 @@
     "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
     "\n",
     "from llama_index.indices.postprocessor import PIINodePostprocessor, NERPIINodePostprocessor\n",
-    "from llama_index.llm_predictor import HuggingFaceLLMPredictor\n",
+    "from llama_index.llms import HuggingFaceLLM\n",
     "from llama_index import ServiceContext, Document, VectorStoreIndex\n",
     "from llama_index.schema import TextNode"
    ]
@@ -66,6 +67,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "24495d69-d568-4cc7-9445-87692bf77863",
    "metadata": {},
@@ -166,6 +168,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "06ca1e50-eeee-4079-bec6-3621cb760f98",
    "metadata": {},
@@ -253,6 +256,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "3444d895-e2fd-4af9-834a-64acf49f74f8",
    "metadata": {},
diff --git a/docs/examples/output_parsing/df_program.ipynb b/docs/examples/output_parsing/df_program.ipynb
index 2b3d4d2b2a..ef70bb835d 100644
--- a/docs/examples/output_parsing/df_program.ipynb
+++ b/docs/examples/output_parsing/df_program.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "530c973e-916d-4c9e-9365-e2d5306d7e3d",
    "metadata": {
@@ -11,6 +12,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "18461ba1-6978-4b5b-861e-6dceec36857b",
    "metadata": {
@@ -27,6 +29,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "fb240ba9-2f23-4686-8097-4f1d7bdf02cb",
    "metadata": {},
@@ -51,7 +54,7 @@
    "outputs": [],
    "source": [
     "from llama_index.program import OpenAIPydanticProgram, DFFullProgram, DataFrame, DataFrameRowsOnly\n",
-    "from langchain.chat_models import ChatOpenAI"
+    "from llama_index.llms import OpenAI"
    ]
   },
   {
@@ -65,7 +68,7 @@
    "source": [
     "program = OpenAIPydanticProgram.from_defaults(\n",
     "    output_cls=DataFrame,\n",
-    "    llm=ChatOpenAI(temperature=0, model_name=\"gpt-4-0613\"),\n",
+    "    llm=OpenAI(temperature=0, model=\"gpt-4-0613\"),\n",
     "    prompt_template_str=(\n",
     "        \"Please extract the following query into a structured data according to: {input_str}.\"\n",
     "        \"Please extract both the set of column names and a set of rows.\"\n",
@@ -159,7 +162,7 @@
    "source": [
     "program = OpenAIPydanticProgram.from_defaults(\n",
     "    output_cls=DataFrameRowsOnly,\n",
-    "    llm=ChatOpenAI(temperature=0, model_name=\"gpt-4-0613\"),\n",
+    "    llm=OpenAI(temperature=0, model=\"gpt-4-0613\"),\n",
     "    prompt_template_str=(\n",
     "        \"Please extract the following text into a structured data: {input_str}. \"\n",
     "        \"The column names are the following: ['Name', 'Age', 'City', 'Favorite Sport']. \"\n",
@@ -223,6 +226,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "acbcbca8-78f1-47cd-9507-81b2c78ba6fe",
    "metadata": {},
diff --git a/docs/examples/output_parsing/evaporate_program.ipynb b/docs/examples/output_parsing/evaporate_program.ipynb
index d38398fc0b..742840ef29 100644
--- a/docs/examples/output_parsing/evaporate_program.ipynb
+++ b/docs/examples/output_parsing/evaporate_program.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "8cd3f128-866a-4857-a00a-df19f926c952",
    "metadata": {
@@ -43,11 +44,12 @@
     "    LLMPredictor\n",
     ")\n",
     "from llama_index.program.predefined import DFEvaporateProgram, EvaporateExtractor, MultiValueEvaporateProgram\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "import requests"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "da19d340-57b5-439f-9cb1-5ba9576ec304",
    "metadata": {
@@ -60,6 +62,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "a299cad8-af81-4974-a3de-ed43877d3490",
    "metadata": {},
@@ -132,6 +135,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "e7310883-2aeb-4a4d-b101-b3279e670ea8",
    "metadata": {},
@@ -149,10 +153,9 @@
    "outputs": [],
    "source": [
     "# setup service context\n",
-    "# llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
+    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
     "service_context = ServiceContext.from_defaults(\n",
-    "    llm_predictor=llm_predictor, chunk_size=512\n",
+    "    llm=llm, chunk_size=512\n",
     ")"
    ]
   },
@@ -174,6 +177,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "bb369a78-e634-43f4-805e-52f6ea0f3588",
    "metadata": {},
@@ -197,6 +201,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "c548768e-9d4a-4708-9c84-9266503edf01",
    "metadata": {},
@@ -259,6 +264,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "508a442c-d7d8-4a27-8add-1d58f1ecc66b",
    "metadata": {},
@@ -302,6 +308,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "9465ba41-8318-40bb-a202-49df6e3c16e3",
    "metadata": {},
@@ -322,9 +329,9 @@
    },
    "outputs": [],
    "source": [
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"))\n",
+    "llm = OpenAI(temperature=0, model=\"gpt-4\")\n",
     "service_context = ServiceContext.from_defaults(\n",
-    "    llm_predictor=llm_predictor, chunk_size=1024, chunk_overlap=0\n",
+    "    llm=llm, chunk_size=1024, chunk_overlap=0\n",
     ")"
    ]
   },
@@ -659,6 +666,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "820768fe-aa23-4999-bcc1-102e6fc817e5",
    "metadata": {},
diff --git a/docs/examples/output_parsing/guidance_sub_question.ipynb b/docs/examples/output_parsing/guidance_sub_question.ipynb
index 46c9395883..ffd0f6ff01 100644
--- a/docs/examples/output_parsing/guidance_sub_question.ipynb
+++ b/docs/examples/output_parsing/guidance_sub_question.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "c58e17b3-ec09-4e07-8e2e-d19a8e24dd40",
    "metadata": {
@@ -11,6 +12,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "d5637f97-60c3-40bb-840f-fc4e217940a7",
    "metadata": {
@@ -21,6 +23,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "bd3d24c8-5b2b-4acf-a9de-53134453c186",
    "metadata": {},
@@ -30,6 +33,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "afa2db97-2a46-4629-a201-d4eb99480f3d",
    "metadata": {},
@@ -38,6 +42,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "3977e961-fb19-495f-89c5-6a283596b459",
    "metadata": {},
@@ -71,6 +76,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "04039c8c-72df-495d-915c-09d04321bb96",
    "metadata": {},
@@ -147,6 +153,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "69c65ed8-e095-4ad9-9e7b-c18c58b45723",
    "metadata": {},
@@ -155,6 +162,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "d59ca5bb-604f-402b-ac53-fc093141cc52",
    "metadata": {
@@ -176,7 +184,6 @@
    "source": [
     "from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext, VectorStoreIndex\n",
     "from llama_index.response.pprint_utils import pprint_response\n",
-    "from langchain import OpenAI\n",
     "\n",
     "from llama_index.tools import QueryEngineTool, ToolMetadata\n",
     "from llama_index.query_engine import SubQuestionQueryEngine"
@@ -222,6 +229,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "15e5f0da-3d3e-41fb-816f-893f7b71587e",
    "metadata": {},
@@ -250,7 +258,7 @@
     "]\n",
     "\n",
     "s_engine = SubQuestionQueryEngine.from_defaults(\n",
-    "    question_gen=question_gen  # use guidance based question_gen defined above\n",
+    "    question_gen=question_gen,  # use guidance based question_gen defined above\n",
     "    query_engine_tools=query_engine_tools, \n",
     ")"
    ]
diff --git a/docs/examples/query_engine/JointQASummary.ipynb b/docs/examples/query_engine/JointQASummary.ipynb
index acb228d0b6..44d034ca4a 100644
--- a/docs/examples/query_engine/JointQASummary.ipynb
+++ b/docs/examples/query_engine/JointQASummary.ipynb
@@ -50,7 +50,7 @@
                 "from llama_index.composability.joint_qa_summary import QASummaryQueryEngineBuilder\n",
                 "from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor\n",
                 "from llama_index.response.notebook_utils import display_response\n",
-                "from langchain.chat_models import ChatOpenAI"
+                "from llama_index.llms import OpenAI"
             ]
         },
         {
@@ -84,11 +84,11 @@
                 }
             ],
             "source": [
-                "llm_predictor_gpt4 = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"))\n",
-                "service_context_gpt4 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt4, chunk_size=1024)\n",
+                "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")\n",
+                "service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4, chunk_size=1024)\n",
                 "\n",
-                "llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-                "service_context_chatgpt = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size=1024)"
+                "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+                "service_context_chatgpt = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)"
             ]
         },
         {
diff --git a/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb b/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb
index a309f6652d..26f036ec2a 100644
--- a/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb
+++ b/docs/examples/query_engine/SQLAutoVectorQueryEngine.ipynb
@@ -137,12 +137,12 @@
     "from llama_index.storage import StorageContext\n",
     "from llama_index.vector_stores import PineconeVectorStore\n",
     "from llama_index.langchain_helpers.text_splitter import TokenTextSplitter\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "\n",
     "# define node parser and LLM\n",
     "chunk_size = 1024\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\", streaming=True))\n",
-    "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm_predictor=llm_predictor)\n",
+    "llm = OpenAI(temperature=0, model=\"gpt-4\", streaming=True)\n",
+    "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm=llm)\n",
     "text_splitter = TokenTextSplitter(chunk_size=chunk_size)\n",
     "node_parser = SimpleNodeParser(text_splitter=text_splitter)\n",
     "\n",
diff --git a/docs/examples/query_engine/SQLJoinQueryEngine.ipynb b/docs/examples/query_engine/SQLJoinQueryEngine.ipynb
index 78dda3465a..af12c5a070 100644
--- a/docs/examples/query_engine/SQLJoinQueryEngine.ipynb
+++ b/docs/examples/query_engine/SQLJoinQueryEngine.ipynb
@@ -117,12 +117,12 @@
     "from llama_index.storage import StorageContext\n",
     "from llama_index.vector_stores import PineconeVectorStore\n",
     "from llama_index.langchain_helpers.text_splitter import TokenTextSplitter\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "\n",
     "# define node parser and LLM\n",
     "chunk_size = 1024\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\", streaming=True))\n",
-    "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm_predictor=llm_predictor)\n",
+    "llm = OpenAI(temperature=0, model=\"gpt-4\", streaming=True)\n",
+    "service_context = ServiceContext.from_defaults(chunk_size=chunk_size, llm=llm)\n",
     "text_splitter = TokenTextSplitter(chunk_size=chunk_size)\n",
     "node_parser = SimpleNodeParser(text_splitter=text_splitter)\n",
     "\n",
diff --git a/docs/examples/query_engine/citation_query_engine.ipynb b/docs/examples/query_engine/citation_query_engine.ipynb
index 0c7704b1e8..0a646c826f 100644
--- a/docs/examples/query_engine/citation_query_engine.ipynb
+++ b/docs/examples/query_engine/citation_query_engine.ipynb
@@ -39,8 +39,7 @@
    ],
    "source": [
     "import os\n",
-    "from langchain.llms import OpenAI\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "from llama_index.query_engine import CitationQueryEngine\n",
     "from llama_index.retrievers import VectorIndexRetriever\n",
     "from llama_index import (\n",
@@ -61,7 +60,7 @@
    "outputs": [],
    "source": [
     "service_context = ServiceContext.from_defaults(\n",
-    "    llm_predictor=LLMPredictor(llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0))\n",
+    "    llm = OpenAI(model='gpt-3.5-turbo', temperature=0)\n",
     ")"
    ]
   },
diff --git a/docs/examples/query_engine/flare_query_engine.ipynb b/docs/examples/query_engine/flare_query_engine.ipynb
index 2b31712bcf..6858a56dd2 100644
--- a/docs/examples/query_engine/flare_query_engine.ipynb
+++ b/docs/examples/query_engine/flare_query_engine.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "f866cc0a-460e-4dbf-91b7-f541a4c0eda6",
    "metadata": {
@@ -33,8 +34,7 @@
    ],
    "source": [
     "import os\n",
-    "from langchain.llms import OpenAI\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "from llama_index.query_engine import FLAREInstructQueryEngine\n",
     "from llama_index import (\n",
     "    VectorStoreIndex,\n",
@@ -55,8 +55,7 @@
    "outputs": [],
    "source": [
     "service_context = ServiceContext.from_defaults(\n",
-    "    # llm_predictor=LLMPredictor(llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)),\n",
-    "    llm_predictor=LLMPredictor(llm=ChatOpenAI(model_name='gpt-4', temperature=0)),\n",
+    "    llm=OpenAI(model='gpt-4', temperature=0),\n",
     "    chunk_size=512\n",
     ")"
    ]
diff --git a/docs/examples/query_engine/json_query_engine.ipynb b/docs/examples/query_engine/json_query_engine.ipynb
index 6b16dc403a..9adbdd2135 100644
--- a/docs/examples/query_engine/json_query_engine.ipynb
+++ b/docs/examples/query_engine/json_query_engine.ipynb
@@ -233,10 +233,10 @@
             ],
             "source": [
                 "from llama_index.indices.service_context import ServiceContext\n",
-                "from langchain.llms.openai import OpenAI\n",
+                "from llama_index.llms import OpenAI\n",
                 "from llama_index.indices.struct_store import JSONQueryEngine\n",
                 "\n",
-                "llm = OpenAI(model_name=\"text-davinci-003\")\n",
+                "llm = OpenAI(model=\"text-davinci-003\")\n",
                 "service_context = ServiceContext.from_defaults(llm=llm)\n",
                 "nl_query_engine = JSONQueryEngine(json_value=json_value, json_schema=json_schema, service_context=service_context)\n",
                 "raw_query_engine = JSONQueryEngine(json_value=json_value, json_schema=json_schema, service_context=service_context, synthesize_response=False)"
diff --git a/docs/examples/query_transformations/SimpleIndexDemo-multistep.ipynb b/docs/examples/query_transformations/SimpleIndexDemo-multistep.ipynb
index a092424b9d..30eaa54cf4 100644
--- a/docs/examples/query_transformations/SimpleIndexDemo-multistep.ipynb
+++ b/docs/examples/query_transformations/SimpleIndexDemo-multistep.ipynb
@@ -42,8 +42,7 @@
                 "    LLMPredictor,\n",
                 "    ServiceContext\n",
                 ")\n",
-                "from langchain.chat_models import ChatOpenAI\n",
-                "from langchain.llms import OpenAI\n",
+                "from llama_index.llms import OpenAI\n",
                 "from IPython.display import Markdown, display"
             ]
         },
@@ -57,12 +56,12 @@
             "outputs": [],
             "source": [
                 "# LLM Predictor (gpt-3)\n",
-                "llm_predictor_gpt3 = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\"))\n",
-                "service_context_gpt3 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt3)\n",
+                "gpt3 = OpenAI(temperature=0, model=\"text-davinci-003\")\n",
+                "service_context_gpt3 = ServiceContext.from_defaults(llm=gpt3)\n",
                 "\n",
                 "# LLMPredictor (gpt-4)\n",
-                "llm_predictor_gpt4 = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"))\n",
-                "service_context_gpt4 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt4)"
+                "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")\n",
+                "service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)"
             ]
         },
         {
diff --git a/docs/examples/tools/OnDemandLoaderTool.ipynb b/docs/examples/tools/OnDemandLoaderTool.ipynb
index b52bc329a1..63f267b5d0 100644
--- a/docs/examples/tools/OnDemandLoaderTool.ipynb
+++ b/docs/examples/tools/OnDemandLoaderTool.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "318d3e6d-8155-4f86-ac16-bdacaf041bb9",
    "metadata": {},
@@ -31,6 +32,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "e167e7ed-92ba-4662-b6f6-24dc36ea4805",
    "metadata": {},
@@ -73,6 +75,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "0f8e6bb2-b019-4b23-9b4b-bd5e6f436bcb",
    "metadata": {},
@@ -135,6 +138,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "2bac4007-7121-4ab4-91e3-6706e59de6b7",
    "metadata": {},
@@ -189,6 +193,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "56b88449-1291-470c-8b7c-cb49d2edbede",
    "metadata": {},
diff --git a/docs/examples/usecases/10k_sub_question.ipynb b/docs/examples/usecases/10k_sub_question.ipynb
index e152a9e287..800b17dca4 100644
--- a/docs/examples/usecases/10k_sub_question.ipynb
+++ b/docs/examples/usecases/10k_sub_question.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "0f67904b-5fd6-443f-bf10-d49a69b25fcd",
    "metadata": {
@@ -13,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 1,
    "id": "cdf1e9b6-0f51-49df-a11d-50ba6a014f4d",
    "metadata": {
     "tags": []
@@ -26,22 +27,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 2,
    "id": "09fbec4c-1864-4d76-9dbf-3d213ba58fc8",
    "metadata": {
     "tags": []
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/suo/miniconda3/envs/llama/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.7) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
-    "from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext, VectorStoreIndex\n",
-    "from llama_index.response.pprint_utils import pprint_response\n",
-    "from langchain import OpenAI\n",
+    "from llama_index import SimpleDirectoryReader, ServiceContext, VectorStoreIndex\n",
+    "from llama_index.llms import OpenAI\n",
     "\n",
     "from llama_index.tools import QueryEngineTool, ToolMetadata\n",
     "from llama_index.query_engine import SubQuestionQueryEngine"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "c743f504-f28c-4802-89b6-ad152b74b0eb",
    "metadata": {
@@ -53,18 +63,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "id": "c4ec8b0a-d5fa-4f74-a2cc-5cc52e009bc6",
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\", max_tokens=-1, streaming=True))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
+    "llm = OpenAI(temperature=0, model=\"text-davinci-003\", max_tokens=-1)\n",
+    "service_context = ServiceContext.from_defaults(llm=llm)"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "71fddd07-ff4c-44d4-82af-64e2e416e853",
    "metadata": {},
@@ -74,7 +85,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "id": "dd0ba028-1e70-4164-8af1-5f1df0ea76a9",
    "metadata": {
     "tags": []
@@ -86,6 +97,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "fd122d0d-2da6-4f46-aa2a-8a0049ad8694",
    "metadata": {},
@@ -95,7 +107,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "1e0b6e4c-2255-42cf-be88-0fe75a945d85",
    "metadata": {
     "tags": []
@@ -107,7 +119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "a606df9c-ed2d-46fb-943e-ac47d24ba412",
    "metadata": {
     "tags": []
@@ -118,6 +130,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "668dffa8-1eb3-4209-913a-ed7debe7bee8",
    "metadata": {
@@ -129,7 +142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "82466534-c3d8-4619-ab1b-4abcd05c8ba7",
    "metadata": {
     "tags": []
@@ -141,7 +154,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "ff449977-2c7c-433f-b303-ff1d7b66c7b3",
    "metadata": {
     "tags": []
@@ -153,7 +166,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 9,
    "id": "8775650f-b164-478c-8129-9a8e6a0cdc97",
    "metadata": {
     "tags": []
@@ -175,6 +188,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "6981caf5-38bb-4d5e-9068-b4874c62bfc9",
    "metadata": {
@@ -186,7 +200,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 10,
    "id": "edd4bbb7-eef9-4b53-b05d-f91033635ac2",
    "metadata": {
     "tags": []
@@ -198,17 +212,18 @@
      "text": [
       "Generated 4 sub questions.\n",
       "\u001b[36;1m\u001b[1;3m[uber_10k] Q: What customer segments grew the fastest for Uber\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m[uber_10k] A: \n",
-      "Uber's Delivery offering grew the fastest, as it allowed consumers to search for and discover local restaurants, order a meal, and either pick-up at the restaurant or have the meal delivered. Additionally, Delivery also includes offerings for grocery, alcohol and convenience store delivery as well as select other goods.\n",
       "\u001b[0m\u001b[33;1m\u001b[1;3m[uber_10k] Q: What geographies grew the fastest for Uber\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3m[uber_10k] A: \n",
-      "Uber experienced the fastest growth in Mobility Gross Bookings in five metropolitan areas—Chicago, Miami, and New York City in the United States, Sao Paulo in Brazil, and London in the United Kingdom. Additionally, Uber experienced strong competition in large metropolitan areas, which led to the offering of significant Driver incentives and consumer discounts and promotions in these large metropolitan areas.\n",
       "\u001b[0m\u001b[38;5;200m\u001b[1;3m[lyft_10k] Q: What customer segments grew the fastest for Lyft\n",
-      "\u001b[0m\u001b[38;5;200m\u001b[1;3m[lyft_10k] A: \n",
-      "The customer segments that grew the fastest for Lyft were riders and drivers. Riders are diverse and dynamic, representing all adult age groups and backgrounds, and drivers are active members of their communities. Both riders and drivers benefit from Lyft's technology, insurance, community standards, and support.\n",
       "\u001b[0m\u001b[32;1m\u001b[1;3m[lyft_10k] Q: What geographies grew the fastest for Lyft\n",
+      "\u001b[0m\u001b[33;1m\u001b[1;3m[uber_10k] A: \n",
+      "Uber experienced the fastest growth in five metropolitan areas—Chicago, Miami, and New York City in the United States, Sao Paulo in Brazil, and London in the United Kingdom. Additionally, Uber experienced growth in suburban and rural areas, though the network is smaller and less liquid in these areas.\n",
+      "\u001b[0m\u001b[38;5;200m\u001b[1;3m[lyft_10k] A: \n",
+      "Lyft has seen the fastest growth in its ridesharing marketplace, Express Drive, Lyft Rentals, Light Vehicles, Public Transit, and Lyft Autonomous customer segments. These customer segments have seen increased demand due to the convenience and high-quality experience they offer drivers and riders, as well as the investments Lyft has made in its proprietary technology, M&A and strategic partnerships, and brand and marketing efforts.\n",
       "\u001b[0m\u001b[32;1m\u001b[1;3m[lyft_10k] A: \n",
-      "Lyft has grown rapidly in the United States and Canada. It has also expanded its offerings to select cities, such as access to a network of shared bikes and scooters (“Light Vehicles”) for shorter rides and first-mile and last-mile legs of multimodal trips, information about nearby public transit routes, and Lyft Rentals, an offering for renters who want to rent a car for a fixed period of time for personal use. It is likely that these cities have seen the fastest growth for Lyft.\n",
+      "Lyft has grown rapidly in cities across the United States and in select cities in Canada. The ridesharing market grew rapidly prior to the COVID-19 pandemic, and it is uncertain to what extent market acceptance will continue to grow after the pandemic. The market for Lyft's other offerings, such as its network of Light Vehicles, is also new and unproven, and it is uncertain whether demand for bike and scooter sharing will continue to grow.\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3m[uber_10k] A: in 2021?\n",
+      "\n",
+      "The customer segments that grew the fastest for Uber in 2021 were Riders and Eaters, who use the platform for ridesharing services and meal preparation, grocery, and other delivery services, respectively. Additionally, Uber One, Uber Pass, Eats Pass, and Rides Pass membership programs grew significantly in 2021, with over 6 million members.\n",
       "\u001b[0m"
      ]
     }
@@ -219,7 +234,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 11,
    "id": "b631d68b-dd17-4afd-9ed7-da0131041c8b",
    "metadata": {
     "tags": []
@@ -230,9 +245,13 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "Uber experienced the fastest growth in Mobility Gross Bookings in five metropolitan areas—Chicago, Miami, New York City in the United States, Sao Paulo in Brazil, and London in the United Kingdom. The customer segments that grew the fastest for Lyft were riders and drivers. Lyft has grown rapidly in the United States and Canada, and has also expanded its offerings to select cities. \n",
+      "Uber and Lyft both experienced the fastest growth in their respective customer segments and geographies in 2021. \n",
+      "\n",
+      "For Uber, the fastest growing customer segments were Riders and Eaters, who use the platform for ridesharing services and meal preparation, grocery, and other delivery services, respectively. Additionally, Uber One, Uber Pass, Eats Pass, and Rides Pass membership programs grew significantly in 2021, with over 6 million members. Uber experienced the fastest growth in five metropolitan areas—Chicago, Miami, and New York City in the United States, Sao Paulo in Brazil, and London in the United Kingdom. Additionally, Uber experienced growth in suburban and rural areas, though the network is smaller and less liquid in these areas.\n",
+      "\n",
+      "For Lyft, the fastest growing customer segments were ridesharing, Express Drive, Lyft Rentals, Light Vehicles, Public Transit, and Lyft Autonomous. Lyft has grown rapidly in cities across the United States and in select cities in Canada. The ridesharing market grew rapidly prior to the COVID-19 pandemic, and it is uncertain to what extent market acceptance will continue to grow after the pandemic. The market for Lyft's other offerings, such as its network of Light Vehicles, is also new and unproven, and it is uncertain whether demand for bike and scooter sharing will continue to grow.\n",
       "\n",
-      "Both Uber and Lyft have experienced the fastest growth in metropolitan areas, however, Uber's growth has been concentrated in five cities while Lyft's growth has been more widespread across the United States and Canada. Additionally, Uber's customer segments that grew the fastest were related to delivery services, while Lyft's customer segments that grew the fastest were riders and drivers.\n"
+      "Overall, Uber and Lyft experienced the fastest growth in different customer segments and geographies. Uber experienced the fastest growth in Riders and Eaters, as well as in five metropolitan areas, while Lyft experienced the fastest growth in ridesharing, Express Drive, Lyft Rentals, Light Vehicles, Public Transit, and Lyft Autonomous, as well as in cities across the United States and in select cities in Canada.\n"
      ]
     }
    ],
@@ -242,7 +261,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 12,
    "id": "6bbbdd5b-0076-48c8-b233-e2ba43d7a6de",
    "metadata": {},
    "outputs": [
@@ -252,11 +271,11 @@
      "text": [
       "Generated 2 sub questions.\n",
       "\u001b[36;1m\u001b[1;3m[uber_10k] Q: What is the revenue growth of Uber from 2020 to 2021\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m[uber_10k] A: \n",
-      "The revenue growth of Uber from 2020 to 2021 was 57%, or 54% on a constant currency basis.\n",
       "\u001b[0m\u001b[33;1m\u001b[1;3m[lyft_10k] Q: What is the revenue growth of Lyft from 2020 to 2021\n",
       "\u001b[0m\u001b[33;1m\u001b[1;3m[lyft_10k] A: \n",
-      "The revenue growth of Lyft from 2020 to 2021 was 36.7%, increasing from $2,364,681 thousand to $3,208,323 thousand.\n",
+      "The revenue of Lyft grew by 36% from 2020 to 2021.\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3m[uber_10k] A: \n",
+      "The revenue growth of Uber from 2020 to 2021 was 57%, or 54% on a constant currency basis.\n",
       "\u001b[0m"
      ]
     }
@@ -267,7 +286,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 13,
    "id": "fadf421e-5938-4031-81df-cfbfd347b674",
    "metadata": {
     "tags": []
@@ -278,7 +297,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "The revenue growth of Uber from 2020 to 2021 was higher than that of Lyft, at 57% or 54% on a constant currency basis, compared to Lyft's 36.7%.\n"
+      "The revenue growth of Uber from 2020 to 2021 was 57%, or 54% on a constant currency basis, while the revenue of Lyft grew by 36% from 2020 to 2021. Therefore, Uber had a higher revenue growth than Lyft from 2020 to 2021.\n"
      ]
     }
    ],
diff --git a/docs/examples/usecases/10q_fn_agent-react-compare.ipynb b/docs/examples/usecases/10q_fn_agent-react-compare.ipynb
index cf398b239c..2ded3cbb18 100644
--- a/docs/examples/usecases/10q_fn_agent-react-compare.ipynb
+++ b/docs/examples/usecases/10q_fn_agent-react-compare.ipynb
@@ -51,8 +51,7 @@
    "source": [
     "from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext, GPTVectorStoreIndex\n",
     "from llama_index.response.pprint_utils import pprint_response\n",
-    "from langchain import OpenAI\n",
-    "from langchain.chat_models import ChatOpenAI"
+    "from llama_index.llms import OpenAI"
    ]
   },
   {
@@ -65,10 +64,10 @@
    "outputs": [],
    "source": [
     "# llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\", max_tokens=-1, streaming=True))\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\", streaming=True))\n",
-    "llm_predictor_gpt4 = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\", streaming=True))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)\n",
-    "service_context_gpt4 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt4)"
+    "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+    "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")\n",
+    "service_context = ServiceContext.from_defaults(llm=chatgpt)\n",
+    "service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)"
    ]
   },
   {
@@ -143,8 +142,8 @@
    "outputs": [],
    "source": [
     "# define agent LLM\n",
+    "from langchain.chat_models import ChatOpenAI\n",
     "\n",
-    "# llm = OpenAI(temperature=0., max_tokens=-1)\n",
     "llm = ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\")\n",
     "llm_gpt4 = ChatOpenAI(temperature=0, model_name=\"gpt-4\")"
    ]
diff --git a/docs/examples/usecases/10q_sub_question.ipynb b/docs/examples/usecases/10q_sub_question.ipynb
index ed4f8989e4..a7ee2458cc 100644
--- a/docs/examples/usecases/10q_sub_question.ipynb
+++ b/docs/examples/usecases/10q_sub_question.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "64971178-177d-48ff-9c11-201f3306a6b6",
    "metadata": {
@@ -33,15 +34,16 @@
    },
    "outputs": [],
    "source": [
-    "from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext, VectorStoreIndex\n",
+    "from llama_index import SimpleDirectoryReader, ServiceContext, VectorStoreIndex\n",
     "from llama_index.response.pprint_utils import pprint_response\n",
-    "from langchain import OpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "\n",
     "from llama_index.tools import QueryEngineTool, ToolMetadata\n",
     "from llama_index.query_engine import SubQuestionQueryEngine"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "f2ffdb6c-b7df-4649-84d5-2b4ce32ad8a9",
    "metadata": {
@@ -60,11 +62,12 @@
    },
    "outputs": [],
    "source": [
-    "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\", max_tokens=-1, streaming=True))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
+    "llm = OpenAI(temperature=0, model=\"text-davinci-003\", max_tokens=-1)\n",
+    "service_context = ServiceContext.from_defaults(llm=llm)"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "e034ed70-9d74-4ded-9797-13f2cc5c5ac8",
    "metadata": {
@@ -89,6 +92,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "210ec44c-f559-48cb-9217-1efaca5e5362",
    "metadata": {},
@@ -111,6 +115,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "04fd6587-db6d-4782-83ad-ffc2fcd0b3a1",
    "metadata": {
@@ -172,6 +177,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "0888e0b1-bcc8-4ea0-8162-f4a3eea97475",
    "metadata": {
diff --git a/docs/examples/vector_stores/SimpleIndexDemoMMR.ipynb b/docs/examples/vector_stores/SimpleIndexDemoMMR.ipynb
index 6a5db122b0..1ca1257bd3 100644
--- a/docs/examples/vector_stores/SimpleIndexDemoMMR.ipynb
+++ b/docs/examples/vector_stores/SimpleIndexDemoMMR.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -8,6 +9,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -79,6 +81,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -123,6 +126,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -144,10 +148,10 @@
    "source": [
     "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, LLMPredictor\n",
     "from llama_index.response.notebook_utils import display_source_node\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "\n",
-    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size_limit=64)"
+    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+    "service_context = ServiceContext.from_defaults(llm=llm, chunk_size_limit=64)"
    ]
   },
   {
diff --git a/docs/how_to/customization/llms_migration_guide.md b/docs/how_to/customization/llms_migration_guide.md
index f74510a9af..7d1bd5312b 100644
--- a/docs/how_to/customization/llms_migration_guide.md
+++ b/docs/how_to/customization/llms_migration_guide.md
@@ -2,7 +2,7 @@
 
 We have made some changes to the configuration of LLMs in LLamaIndex to improve its functionality and ease of use.
 
-Previously, the primary abstraction for an LLM was the `LLMPredictor`. However, we have upgraded to a new abstraction called `LLM`, which offers a cleaner and more user-friendly interface.
+Previously, the primary abstraction for an LLM was the `LLMPredictor`. Now, we have upgraded to a new abstraction called `LLM`, which offers a cleaner and more user-friendly interface.
 
 These changes will only affect you if you were using the `ChatGPTLLMPredictor`, `HuggingFaceLLMPredictor`, or a custom implementation subclassing `LLMPredictor`.
 
@@ -10,7 +10,7 @@ These changes will only affect you if you were using the `ChatGPTLLMPredictor`,
 We have removed the `ChatGPTLLMPredictor`, but you can still achieve the same functionality using our new `OpenAI` class.
 
 ## If you were using `HuggingFaceLLMPredictor`:
-We have updated the Hugging Face support to utilize the latest `LLM` abstraction through `HuggingFaceLLM`. To use it, initialize the `HuggingFaceLLM` in the same way as before. Instead of passing it as the `llm_predictor` argument to the service context, you now need to pass it as the `llm` argument.
+We have updated the HuggingFace support to utilize the latest `LLM` abstraction through `HuggingFaceLLM`. To use it, initialize the `HuggingFaceLLM` in the same way as before. Instead of passing it as the `llm_predictor` argument to the service context, you now need to pass it as the `llm` argument.
 
 Old:
 ```python
@@ -25,7 +25,7 @@ service_context = ServiceContext.from_defaults(llm=llm)
 ```
 
 ## If you were subclassing `LLMPredictor`:
-We have refactored the `LLMPredictor` class and removed some outdated logic, which may impact your custom class. The recommended approach now is to implement the `llama_index.llms.base.LLM` interface when defining a custom LLM. Alternatively, you can subclass the simpler `llama_index.llms.custom.CustomLLM` interface.
+We have refactored the `LLMPredictor` class and removed some outdated logic, which may impact your custom class. The recommended approach now is to implement the `llama_index.llms.base.LLM` interface when defining a custom LLM. Alternatively, you can subclass the `llama_index.llms.custom.CustomLLM` base class for a simpler implementation.
 
 Here's an example:
 
@@ -47,7 +47,7 @@ class YourLLM(CustomLLM):
         # completion endpoint
         pass
 
-    def stream_complete(self, prompt: str, **kwargs: Any) -> StreamCompletionResponse:
+    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
         # streaming completion endpoint
         pass
 ```
diff --git a/examples/async/AsyncComposableIndicesSEC.ipynb b/examples/async/AsyncComposableIndicesSEC.ipynb
index eb3cda210d..21551454af 100644
--- a/examples/async/AsyncComposableIndicesSEC.ipynb
+++ b/examples/async/AsyncComposableIndicesSEC.ipynb
@@ -171,7 +171,7 @@
             "outputs": [],
             "source": [
                 "from llama_index import ListIndex, LLMPredictor\n",
-                "from langchain import OpenAI\n",
+                "from llama_index.llms import OpenAI\n",
                 "from llama_index.composability import ComposableGraph"
             ]
         },
@@ -200,8 +200,8 @@
             "outputs": [],
             "source": [
                 "# set number of output tokens\n",
-                "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, max_tokens=512))\n",
-                "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
+                "llm = OpenAI(temperature=0, max_tokens=512)\n",
+                "service_context = ServiceContext.from_defaults(llm=llm)"
             ]
         },
         {
diff --git a/examples/docstore/DocstoreDemo.ipynb b/examples/docstore/DocstoreDemo.ipynb
index d1173258f6..a78f6e2a36 100644
--- a/examples/docstore/DocstoreDemo.ipynb
+++ b/examples/docstore/DocstoreDemo.ipynb
@@ -41,10 +41,11 @@
                 "from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor\n",
                 "from llama_index import VectorStoreIndex, ListIndex, SimpleKeywordTableIndex\n",
                 "from llama_index.composability import ComposableGraph\n",
-                "from langchain.chat_models import ChatOpenAI"
+                "from llama_index.llms import OpenAI"
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "f6dd9d5f-a601-4097-894e-fe98a0c35a5b",
             "metadata": {},
@@ -66,6 +67,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "bae82b55-5c9f-432a-9e06-1fccb6f9fc7f",
             "metadata": {},
@@ -87,6 +89,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "aff4c8e1-b2ba-4ea6-a8df-978c2788fedc",
             "metadata": {},
@@ -109,6 +112,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "528149c1-5bde-4eba-b75a-e8fa1da17d7c",
             "metadata": {},
@@ -161,6 +165,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "d3bf6aaf-3375-4212-8323-777969a918f7",
             "metadata": {},
@@ -186,8 +191,8 @@
                 }
             ],
             "source": [
-                "llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-                "service_context_chatgpt = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size=1024)"
+                "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+                "service_context_chatgpt = ServiceContext.from_defaults(llm=llm, chunk_size=1024)"
             ]
         },
         {
@@ -277,4 +282,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/examples/docstore/DynamoDBDocstoreDemo.ipynb b/examples/docstore/DynamoDBDocstoreDemo.ipynb
index d184d8ca59..d28a650eba 100644
--- a/examples/docstore/DynamoDBDocstoreDemo.ipynb
+++ b/examples/docstore/DynamoDBDocstoreDemo.ipynb
@@ -42,11 +42,12 @@
                 "from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor, StorageContext\n",
                 "from llama_index import VectorStoreIndex, ListIndex, SimpleKeywordTableIndex\n",
                 "from llama_index.composability import ComposableGraph\n",
-                "from langchain.chat_models import ChatOpenAI\n",
+                "from llama_index.llms import OpenAI\n",
                 "from llama_index.response.notebook_utils import display_response"
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "f6dd9d5f-a601-4097-894e-fe98a0c35a5b",
             "metadata": {},
@@ -68,6 +69,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "bae82b55-5c9f-432a-9e06-1fccb6f9fc7f",
             "metadata": {},
@@ -89,6 +91,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "aff4c8e1-b2ba-4ea6-a8df-978c2788fedc",
             "metadata": {},
@@ -145,6 +148,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "528149c1-5bde-4eba-b75a-e8fa1da17d7c",
             "metadata": {},
@@ -204,6 +208,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "1059ec3c",
             "metadata": {
@@ -265,6 +270,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "c5bc40a7",
             "metadata": {
@@ -283,8 +289,8 @@
             },
             "outputs": [],
             "source": [
-                "llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-                "service_context_chatgpt = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size=1024)"
+                "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+                "service_context_chatgpt = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)"
             ]
         },
         {
@@ -384,4 +390,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/examples/docstore/MongoDocstoreDemo.ipynb b/examples/docstore/MongoDocstoreDemo.ipynb
index f5aeaa3cf2..0c5736b2f3 100644
--- a/examples/docstore/MongoDocstoreDemo.ipynb
+++ b/examples/docstore/MongoDocstoreDemo.ipynb
@@ -42,14 +42,15 @@
             },
             "outputs": [],
             "source": [
-                "from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor, StorageContext\n",
+                "from llama_index import SimpleDirectoryReader, ServiceContext, StorageContext\n",
                 "from llama_index import VectorStoreIndex, ListIndex, SimpleKeywordTableIndex\n",
                 "from llama_index.composability import ComposableGraph\n",
-                "from langchain.chat_models import ChatOpenAI\n",
+                "from llama_index.llms import OpenAI\n",
                 "from llama_index.response.notebook_utils import display_response"
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "f6dd9d5f-a601-4097-894e-fe98a0c35a5b",
             "metadata": {},
@@ -72,6 +73,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "bae82b55-5c9f-432a-9e06-1fccb6f9fc7f",
             "metadata": {},
@@ -94,6 +96,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "aff4c8e1-b2ba-4ea6-a8df-978c2788fedc",
             "metadata": {},
@@ -150,6 +153,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "528149c1-5bde-4eba-b75a-e8fa1da17d7c",
             "metadata": {},
@@ -213,6 +217,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "365a025b",
             "metadata": {},
@@ -267,6 +272,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "d3bf6aaf-3375-4212-8323-777969a918f7",
             "metadata": {},
@@ -284,8 +290,8 @@
             },
             "outputs": [],
             "source": [
-                "llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-                "service_context_chatgpt = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size=1024)"
+                "chat_gpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+                "service_context_chatgpt = ServiceContext.from_defaults(llm=chat_gpt, chunk_size=1024)"
             ]
         },
         {
@@ -395,4 +401,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb b/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb
index bbb92dce55..14095e5851 100644
--- a/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb
+++ b/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "f35495ae",
    "metadata": {},
@@ -72,11 +73,12 @@
     "from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor, StorageContext\n",
     "from llama_index import VectorStoreIndex, ListIndex, SimpleKeywordTableIndex\n",
     "from llama_index.composability import ComposableGraph\n",
-    "from langchain.chat_models import ChatOpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "from llama_index.response.notebook_utils import display_response"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "f6dd9d5f-a601-4097-894e-fe98a0c35a5b",
    "metadata": {},
@@ -99,6 +101,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "bae82b55-5c9f-432a-9e06-1fccb6f9fc7f",
    "metadata": {},
@@ -121,6 +124,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "aff4c8e1-b2ba-4ea6-a8df-978c2788fedc",
    "metadata": {},
@@ -198,6 +202,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "528149c1-5bde-4eba-b75a-e8fa1da17d7c",
    "metadata": {},
@@ -309,6 +314,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "365a025b",
    "metadata": {},
@@ -391,6 +397,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "d3bf6aaf-3375-4212-8323-777969a918f7",
    "metadata": {},
@@ -412,8 +419,8 @@
    },
    "outputs": [],
    "source": [
-    "llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
-    "service_context_chatgpt = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size=1024)"
+    "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
+    "service_context_chatgpt = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)"
    ]
   },
   {
diff --git a/examples/experimental/Evaporate.ipynb b/examples/experimental/Evaporate.ipynb
index fb823d7d45..669c2ee496 100644
--- a/examples/experimental/Evaporate.ipynb
+++ b/examples/experimental/Evaporate.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "8cd3f128-866a-4857-a00a-df19f926c952",
    "metadata": {
@@ -25,11 +26,12 @@
     "    LLMPredictor\n",
     ")\n",
     "from llama_index.experimental.evaporate import EvaporateExtractor\n",
-    "from langchain.llms.openai import OpenAIChat, OpenAI\n",
+    "from llama_index.llms import OpenAI\n",
     "import requests"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "a299cad8-af81-4974-a3de-ed43877d3490",
    "metadata": {},
@@ -108,10 +110,9 @@
    },
    "outputs": [],
    "source": [
-    "# llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name=\"gpt-4\"))\n",
-    "llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name=\"gpt-3.5-turbo\"))\n",
+    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
     "service_context = ServiceContext.from_defaults(\n",
-    "    llm_predictor=llm_predictor, chunk_size=512\n",
+    "    llm=llm, chunk_size=512\n",
     ")"
    ]
   },
@@ -147,6 +148,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "bb369a78-e634-43f4-805e-52f6ea0f3588",
    "metadata": {},
@@ -193,6 +195,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "35173e7a-8e89-4897-a59b-3e31a7ef61e1",
    "metadata": {},
@@ -275,6 +278,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "73bf1c9d-1946-4e6d-992f-b71d2c8ed562",
    "metadata": {},
@@ -460,6 +464,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "f3c15344-77ea-4641-ae7a-50b7b239fd75",
    "metadata": {},
@@ -530,6 +535,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "800c3a9b-5661-4653-b4d8-4db0a54b45fb",
    "metadata": {},
diff --git a/examples/paul_graham_essay/DavinciComparison.ipynb b/examples/paul_graham_essay/DavinciComparison.ipynb
index d42b58d5f5..1b1c1a3881 100644
--- a/examples/paul_graham_essay/DavinciComparison.ipynb
+++ b/examples/paul_graham_essay/DavinciComparison.ipynb
@@ -13,6 +13,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "7096589b-daaf-440a-b89d-b4956f2db4b2",
    "metadata": {
@@ -25,6 +26,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "d8cfbe6f-4c50-4c4f-90f9-03bb91201ef5",
    "metadata": {},
@@ -50,8 +52,8 @@
     }
    ],
    "source": [
-    "from llama_index import KeywordTableIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext\n",
-    "from langchain import OpenAI\n",
+    "from llama_index import KeywordTableIndex, SimpleDirectoryReader, ServiceContext\n",
+    "from llama_index.llms import OpenAI\n",
     "from IPython.display import Markdown, display"
    ]
   },
@@ -63,8 +65,8 @@
    "outputs": [],
    "source": [
     "# create index\n",
-    "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-002\"))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)\n",
+    "llm = OpenAI(temperature=0, model=\"text-davinci-002\")\n",
+    "service_context = ServiceContext.from_defaults(llm=llm)\n",
     "\n",
     "documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()\n",
     "index = KeywordTableIndex.from_documents(documents=documents, service_context=service_context)"
@@ -109,6 +111,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "5ad95ede-b0ef-46e9-b6d7-cd397d54afc3",
    "metadata": {},
@@ -134,8 +137,7 @@
     }
    ],
    "source": [
-    "from llama_index import KeywordTableIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext\n",
-    "from langchain import OpenAI\n",
+    "from llama_index import KeywordTableIndex, SimpleDirectoryReader, ServiceContext\n",
     "from IPython.display import Markdown, display"
    ]
   },
@@ -147,8 +149,8 @@
    "outputs": [],
    "source": [
     "# load index\n",
-    "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\"))\n",
-    "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)\n",
+    "llm = OpenAI(temperature=0, model=\"text-davinci-003\")\n",
+    "service_context = ServiceContext.from_defaults(llm=llm)\n",
     "\n",
     "documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()\n",
     "index = KeywordTableIndex.from_documents(documents=documents, service_context=service_context)"
diff --git a/examples/paul_graham_essay/GPT4Comparison.ipynb b/examples/paul_graham_essay/GPT4Comparison.ipynb
index 83ee37f178..2a78d560b5 100644
--- a/examples/paul_graham_essay/GPT4Comparison.ipynb
+++ b/examples/paul_graham_essay/GPT4Comparison.ipynb
@@ -9,8 +9,7 @@
             "source": [
                 "from llama_index import ListIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext\n",
                 "from llama_index.response.notebook_utils import display_response\n",
-                "from langchain import OpenAI\n",
-                "from langchain.chat_models import ChatOpenAI\n",
+                "from llama_index.llms import OpenAI\n",
                 "from IPython.display import Markdown, display"
             ]
         },
@@ -40,8 +39,8 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\"))\n",
-                "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
+                "llm = OpenAI(temperature=0, model=\"text-davinci-003\")\n",
+                "service_context = ServiceContext.from_defaults(llm=llm)"
             ]
         },
         {
@@ -375,8 +374,8 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"))\n",
-                "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
+                "llm = OpenAI(temperature=0, model=\"gpt-4\")\n",
+                "service_context = ServiceContext.from_defaults(llm=llm)"
             ]
         },
         {
@@ -606,8 +605,8 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4-32k\"))\n",
-                "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
+                "llm = OpenAI(temperature=0, model_name=\"gpt-4-32k\")\n",
+                "service_context = ServiceContext.from_defaults(llm=llm)"
             ]
         },
         {
@@ -617,6 +616,9 @@
             "metadata": {},
             "outputs": [],
             "source": [
+                "from llama_index import VectorStoreIndex\n",
+                "\n",
+                "\n",
                 "gpt4_32k_index = VectorStoreIndex.from_documents(documents, service_context=service_context)"
             ]
         },
diff --git a/examples/test_wiki/TestNYC-Benchmark-GPT4.ipynb b/examples/test_wiki/TestNYC-Benchmark-GPT4.ipynb
index f5affc6f9a..7e695e4453 100644
--- a/examples/test_wiki/TestNYC-Benchmark-GPT4.ipynb
+++ b/examples/test_wiki/TestNYC-Benchmark-GPT4.ipynb
@@ -48,9 +48,8 @@
             "source": [
                 "from llama_index import TreeIndex, SimpleDirectoryReader, LLMPredictor, VectorStoreIndex, ListIndex, Prompt, ServiceContext\n",
                 "from llama_index.indices.base import BaseIndex\n",
-                "from llama_index.langchain_helpers.text_splitter import TokenTextSplitter\n",
-                "from langchain.chat_models import ChatOpenAI\n",
-                "from langchain.llms import OpenAI\n",
+                "from llama_index.llms.base import LLM\n",
+                "from llama_index.llms import OpenAI\n",
                 "from llama_index.response.schema import Response\n",
                 "import pandas as pd\n",
                 "from typing import Tuple"
@@ -192,9 +191,9 @@
                 "    def __init__(self, tests: List[TestCase]) -> None:\n",
                 "        self._tests = tests\n",
                 "    \n",
-                "    def test(self, index: BaseIndex, llm_predictor: LLMPredictor, **kwargs) -> List[TestOutcome]:\n",
+                "    def test(self, index: BaseIndex, llm: LLM, **kwargs) -> List[TestOutcome]:\n",
                 "        outcomes: List[TestOutcome] = []\n",
-                "        service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)\n",
+                "        service_context = ServiceContext.from_defaults(llm=llm)\n",
                 "        for test in self._tests:\n",
                 "            query_engine = index.as_query_engine(\n",
                 "                service_context=service_context,\n",
@@ -415,9 +414,7 @@
             "outputs": [],
             "source": [
                 "# gpt-4\n",
-                "llm_predictor_gpt4 = LLMPredictor(\n",
-                "    llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\")\n",
-                ")"
+                "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")"
             ]
         },
         {
@@ -428,7 +425,7 @@
             "outputs": [],
             "source": [
                 "# gpt-3 (text-davinci-003)\n",
-                "llm_predictor_gpt3 = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\"))"
+                "gpt3 = OpenAI(temperature=0, model=\"text-davinci-003\")"
             ]
         },
         {
@@ -439,7 +436,7 @@
             "outputs": [],
             "source": [
                 "# chatgpt (gpt-3.5-turbo)\n",
-                "llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\"))"
+                "chatgpt = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")"
             ]
         },
         {
@@ -467,7 +464,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "outcomes_tree_gpt4 = bm.test(tree_index, llm_predictor_gpt4)"
+                "outcomes_tree_gpt4 = bm.test(tree_index, gpt4)"
             ]
         },
         {
@@ -578,7 +575,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "outcomes_tree_gpt3 = bm.test(tree_index, llm_predictor_gpt3)"
+                "outcomes_tree_gpt3 = bm.test(tree_index, gpt3)"
             ]
         },
         {
@@ -691,7 +688,7 @@
             },
             "outputs": [],
             "source": [
-                "outcomes_list_gpt4 = bm.test(list_index, llm_predictor_gpt4, response_mode=\"tree_summarize\", use_async=True)"
+                "outcomes_list_gpt4 = bm.test(list_index, gpt4, response_mode=\"tree_summarize\", use_async=True)"
             ]
         },
         {
@@ -802,7 +799,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "outcomes_list_gpt3 = bm.test(list_index, llm_predictor_gpt3, response_mode=\"tree_summarize\", use_async=True)"
+                "outcomes_list_gpt3 = bm.test(list_index, gpt3, response_mode=\"tree_summarize\", use_async=True)"
             ]
         },
         {
@@ -913,7 +910,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "outcomes_list_chatgpt = bm.test(list_index, llm_predictor_chatgpt, response_mode=\"tree_summarize\", use_async=True)"
+                "outcomes_list_chatgpt = bm.test(list_index, chatgpt, response_mode=\"tree_summarize\", use_async=True)"
             ]
         },
         {
@@ -1024,7 +1021,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "outcomes_vector_gpt4 = bm.test(vector_index, llm_predictor_gpt4)"
+                "outcomes_vector_gpt4 = bm.test(vector_index, gpt4)"
             ]
         },
         {
@@ -1135,7 +1132,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "outcomes_vector_gpt3 = bm.test(vector_index, llm_predictor_gpt3)"
+                "outcomes_vector_gpt3 = bm.test(vector_index, gpt3)"
             ]
         },
         {
@@ -1338,7 +1335,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "eval_gpt4 = analyze_outcome_llm(outcomes_vector_gpt3, llm_predictor_gpt4)"
+                "eval_gpt4 = analyze_outcome_llm(outcomes_vector_gpt3, gpt4)"
             ]
         },
         {
@@ -1446,7 +1443,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "eval_chatgpt = analyze_outcome_llm(outcomes_vector_gpt3, llm_predictor_chatgpt)"
+                "eval_chatgpt = analyze_outcome_llm(outcomes_vector_gpt3, chatgpt)"
             ]
         },
         {
@@ -1556,7 +1553,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "eval_gpt3 = analyze_outcome_llm(outcomes_vector_gpt3, llm_predictor_gpt3)"
+                "eval_gpt3 = analyze_outcome_llm(outcomes_vector_gpt3, gpt3)"
             ]
         },
         {
diff --git a/examples/test_wiki/TestNYC-Tree-GPT4.ipynb b/examples/test_wiki/TestNYC-Tree-GPT4.ipynb
index 87d80f27bd..354bede546 100644
--- a/examples/test_wiki/TestNYC-Tree-GPT4.ipynb
+++ b/examples/test_wiki/TestNYC-Tree-GPT4.ipynb
@@ -73,8 +73,7 @@
    "source": [
     "from llama_index import TreeIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext\n",
     "from llama_index.logger import LlamaLogger\n",
-    "from langchain.chat_models import ChatOpenAI\n",
-    "from langchain.llms import OpenAI"
+    "from llama_index.llms import OpenAI"
    ]
   },
   {
@@ -87,12 +86,12 @@
    "outputs": [],
    "source": [
     "# gpt-3 (davinci)\n",
-    "llm_predictor_gpt3 = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\"))\n",
-    "service_context_gpt3 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt3)\n",
+    "gpt3 = OpenAI(temperature=0, model=\"text-davinci-003\")\n",
+    "service_context_gpt3 = ServiceContext.from_defaults(llm=gpt3)\n",
     "\n",
     "# gpt-4\n",
-    "llm_predictor_gpt4 = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"))\n",
-    "service_context_gpt4 = ServiceContext.from_defaults(llm_predictor=llm_predictor_gpt4)"
+    "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")\n",
+    "service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)"
    ]
   },
   {
diff --git a/experimental/cli/configuration.py b/experimental/cli/configuration.py
index a13c98e6cd..cb0835c8b8 100644
--- a/experimental/cli/configuration.py
+++ b/experimental/cli/configuration.py
@@ -2,8 +2,6 @@ import os
 from configparser import ConfigParser, SectionProxy
 from typing import Any, Type
 from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.bridge.langchain import OpenAI
-from llama_index.bridge.langchain import BaseLanguageModel
 from llama_index.indices.base import BaseIndex
 from llama_index.embeddings.base import BaseEmbedding
 from llama_index import (
@@ -14,6 +12,8 @@ from llama_index import (
 )
 from llama_index.indices.loading import load_index_from_storage
 from llama_index.llm_predictor import StructuredLLMPredictor
+from llama_index.llms.base import LLM
+from llama_index.llms.openai import OpenAI
 from llama_index.storage.storage_context import StorageContext
 
 
@@ -102,7 +102,7 @@ def _load_llm_predictor(config: ConfigParser) -> LLMPredictor:
         raise KeyError("llm_predictor.type")
 
 
-def _load_llm(section: SectionProxy) -> BaseLanguageModel:
+def _load_llm(section: SectionProxy) -> LLM:
     if "engine" in section:
         return OpenAI(engine=section["engine"])
     else:
diff --git a/llama_index/agent/context_retriever_agent.py b/llama_index/agent/context_retriever_agent.py
index 71293555f4..eddae07b95 100644
--- a/llama_index/agent/context_retriever_agent.py
+++ b/llama_index/agent/context_retriever_agent.py
@@ -43,7 +43,7 @@ class ContextRetrieverOpenAIAgent(BaseOpenAIAgent):
         retriever (BaseRetriever): A retriever.
         qa_prompt (Optional[QuestionAnswerPrompt]): A QA prompt.
         context_separator (str): A context separator.
-        llm (Optional[OpenAI]): An LLM.
+        llm (Optional[OpenAI]): An OpenAI LLM.
         chat_history (Optional[List[ChatMessage]]): A chat history.
         verbose (bool): Whether to print debug statements.
         max_function_calls (int): Maximum number of function calls.
@@ -94,7 +94,7 @@ class ContextRetrieverOpenAIAgent(BaseOpenAIAgent):
             retriever (BaseRetriever): A retriever.
             qa_prompt (Optional[QuestionAnswerPrompt]): A QA prompt.
             context_separator (str): A context separator.
-            llm (Optional[ChatOpenAI]): An LLM.
+            llm (Optional[OpenAI]): An OpenAI LLM.
             chat_history (Optional[ChatMessageHistory]): A chat history.
             verbose (bool): Whether to print debug statements.
             max_function_calls (int): Maximum number of function calls.
diff --git a/llama_index/evaluation/dataset_generation.py b/llama_index/evaluation/dataset_generation.py
index 8194cb8e8d..566ca81490 100644
--- a/llama_index/evaluation/dataset_generation.py
+++ b/llama_index/evaluation/dataset_generation.py
@@ -4,15 +4,14 @@ from __future__ import annotations
 import re
 from typing import List, Optional
 
-from llama_index.bridge.langchain import ChatOpenAI
 
 from llama_index import (
     Document,
     ListIndex,
-    LLMPredictor,
     QuestionAnswerPrompt,
     ServiceContext,
 )
+from llama_index.llms.openai import OpenAI
 from llama_index.schema import BaseNode, NodeWithScore, MetadataMode
 from llama_index.indices.postprocessor.node import KeywordNodePostprocessor
 
@@ -26,12 +25,8 @@ DEFAULT_QUESTION_GENERATION_PROMPT = """Context information is below.\n"
 
 def _get_default_service_context() -> ServiceContext:
     """Get default service context."""
-    llm_predictor = LLMPredictor(
-        llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
-    )
-    service_context = ServiceContext.from_defaults(
-        llm_predictor=llm_predictor, chunk_size_limit=3000
-    )
+    llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
+    service_context = ServiceContext.from_defaults(llm=llm, chunk_size_limit=3000)
     return service_context
 
 
diff --git a/llama_index/indices/base.py b/llama_index/indices/base.py
index 6cfed1ecfd..1a1f9eaac4 100644
--- a/llama_index/indices/base.py
+++ b/llama_index/indices/base.py
@@ -8,8 +8,7 @@ from llama_index.data_structs.data_structs import IndexStruct
 from llama_index.indices.base_retriever import BaseRetriever
 from llama_index.indices.query.base import BaseQueryEngine
 from llama_index.indices.service_context import ServiceContext
-from llama_index.schema import Document
-from llama_index.schema import BaseNode
+from llama_index.schema import BaseNode, Document
 from llama_index.storage.docstore.types import BaseDocumentStore, RefDocInfo
 from llama_index.storage.storage_context import StorageContext
 
@@ -341,9 +340,10 @@ class BaseIndex(Generic[IS], ABC):
             from llama_index.chat_engine import CondenseQuestionChatEngine
 
             query_engine = self.as_query_engine(**kwargs)
+            if "service_context" not in kwargs:
+                kwargs["service_context"] = self._service_context
             return CondenseQuestionChatEngine.from_defaults(
                 query_engine=query_engine,
-                service_context=self.service_context,
                 **kwargs,
             )
         elif chat_mode == ChatMode.REACT:
@@ -351,9 +351,10 @@ class BaseIndex(Generic[IS], ABC):
             from llama_index.chat_engine import ReActChatEngine
 
             query_engine = self.as_query_engine(**kwargs)
+            if "service_context" not in kwargs:
+                kwargs["service_context"] = self._service_context
             return ReActChatEngine.from_query_engine(
                 query_engine=query_engine,
-                service_context=self.service_context,
                 **kwargs,
             )
         else:
diff --git a/llama_index/indices/service_context.py b/llama_index/indices/service_context.py
index c705308d1b..ccf3135810 100644
--- a/llama_index/indices/service_context.py
+++ b/llama_index/indices/service_context.py
@@ -3,8 +3,6 @@ import logging
 from dataclasses import dataclass
 from typing import Optional
 
-from llama_index.bridge.langchain import BaseLanguageModel
-
 import llama_index
 from llama_index.callbacks.base import CallbackManager
 from llama_index.embeddings.base import BaseEmbedding
@@ -12,6 +10,7 @@ from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.indices.prompt_helper import PromptHelper
 from llama_index.llm_predictor import LLMPredictor
 from llama_index.llm_predictor.base import BaseLLMPredictor, LLMMetadata
+from llama_index.llms.base import LLM
 from llama_index.llms.utils import LLMType
 from llama_index.logger import LlamaLogger
 from llama_index.node_parser.interface import NodeParser
@@ -166,7 +165,7 @@ class ServiceContext:
         cls,
         service_context: "ServiceContext",
         llm_predictor: Optional[BaseLLMPredictor] = None,
-        llm: Optional[BaseLanguageModel] = None,
+        llm: Optional[LLM] = None,
         prompt_helper: Optional[PromptHelper] = None,
         embed_model: Optional[BaseEmbedding] = None,
         node_parser: Optional[NodeParser] = None,
diff --git a/llama_index/program/llm_program.py b/llama_index/program/llm_program.py
index 780bd6cf42..96c71f8c10 100644
--- a/llama_index/program/llm_program.py
+++ b/llama_index/program/llm_program.py
@@ -1,13 +1,11 @@
 from typing import Any, Dict, Optional, Type, Union
 
-from llama_index.bridge.langchain import ChatOpenAI
 
 from pydantic import BaseModel
+from llama_index.llms.base import LLM
+from llama_index.llms.openai import OpenAI
 from llama_index.program.base_program import BasePydanticProgram
 from llama_index.prompts.base import Prompt
-from llama_index.bridge.langchain import (
-    BaseLanguageModel,
-)
 from llama_index.output_parsers.pydantic import PydanticOutputParser
 
 
@@ -23,7 +21,7 @@ class LLMTextCompletionProgram(BasePydanticProgram[BaseModel]):
         self,
         output_parser: PydanticOutputParser,
         prompt: Prompt,
-        llm: BaseLanguageModel,
+        llm: LLM,
         function_call: Union[str, Dict[str, Any]],
         verbose: bool = False,
     ) -> None:
@@ -38,12 +36,12 @@ class LLMTextCompletionProgram(BasePydanticProgram[BaseModel]):
         cls,
         output_parser: PydanticOutputParser,
         prompt_template_str: str,
-        llm: Optional[BaseLanguageModel] = None,
+        llm: Optional[LLM] = None,
         verbose: bool = False,
         function_call: Optional[Union[str, Dict[str, Any]]] = None,
         **kwargs: Any,
     ) -> "LLMTextCompletionProgram":
-        llm = llm or ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0613")
+        llm = llm or OpenAI(temperature=0, model="gpt-3.5-turbo-0613")
         prompt = Prompt(prompt_template_str)
         function_call = function_call or {
             "name": output_parser.output_cls.schema()["title"]
@@ -72,6 +70,7 @@ class LLMTextCompletionProgram(BasePydanticProgram[BaseModel]):
 
         formatted_prompt = prompt_with_parse_instrs.format(**kwargs)
 
-        raw_output = self._llm.predict(formatted_prompt)
+        response = self._llm.complete(formatted_prompt)
+        raw_output = response.text
         model_output = self._output_parser.parse(raw_output)
         return model_output
diff --git a/tests/program/test_llm_program.py b/tests/program/test_llm_program.py
index b532f7e96b..a0fdb0d6fe 100644
--- a/tests/program/test_llm_program.py
+++ b/tests/program/test_llm_program.py
@@ -1,16 +1,20 @@
 """Test LLM program."""
 
-from llama_index.program.llm_program import LLMTextCompletionProgram
-from llama_index.output_parsers.pydantic import PydanticOutputParser
+import json
 from unittest.mock import MagicMock
+
 from pydantic import BaseModel
-import json
+
+from llama_index.llms.base import CompletionResponse
+from llama_index.output_parsers.pydantic import PydanticOutputParser
+from llama_index.program.llm_program import LLMTextCompletionProgram
 
 
 class MockLLM(MagicMock):
-    def predict(self, prompt: str) -> str:
+    def complete(self, prompt: str) -> CompletionResponse:
         test_object = {"hello": "world"}
-        return json.dumps(test_object)
+        text = json.dumps(test_object)
+        return CompletionResponse(text=text)
 
 
 class TestModel(BaseModel):
-- 
GitLab