From 98a2b4a5471a7492c4c7788003596555e52a9fe2 Mon Sep 17 00:00:00 2001
From: Emanuel Ferreira <contatoferreirads@gmail.com>
Date: Mon, 1 Apr 2024 15:43:35 -0300
Subject: [PATCH] feat: add global settings (#668)

Co-authored-by: Alex Yang <himself65@outlook.com>
---
 .../modules/agent/multi_document_agent.mdx    |   9 +-
 .../available_embeddings/huggingface.md       |  15 +-
 .../available_embeddings/mistral.md           |  13 +-
 .../embeddings/available_embeddings/ollama.md |  12 +-
 .../embeddings/available_embeddings/openai.md |  12 +-
 .../available_embeddings/together.md          |  12 +-
 apps/docs/docs/modules/embeddings/index.md    |  11 +-
 .../modules/evaluation/modules/correctness.md |  16 +-
 .../evaluation/modules/faithfulness.md        |  12 +-
 .../modules/evaluation/modules/relevancy.md   |  14 +-
 .../modules/llms/available_llms/anthropic.md  |  33 +--
 .../docs/modules/llms/available_llms/azure.md |  29 +--
 .../modules/llms/available_llms/fireworks.md  |  10 +-
 .../docs/modules/llms/available_llms/groq.mdx |  10 +-
 .../modules/llms/available_llms/llama2.md     |  40 +---
 .../modules/llms/available_llms/mistral.md    |  30 +--
 .../modules/llms/available_llms/ollama.md     |  40 +---
 .../modules/llms/available_llms/openai.md     |  30 +--
 .../modules/llms/available_llms/portkey.md    |  35 +--
 .../modules/llms/available_llms/together.md   |  33 +--
 apps/docs/docs/modules/llms/index.md          |   9 +-
 apps/docs/docs/modules/node_parser.md         |   7 +-
 .../node_postprocessors/cohere_reranker.md    |  10 +-
 .../docs/modules/node_postprocessors/index.md |  13 +-
 apps/docs/docs/modules/prompt/index.md        |  10 +-
 .../query_engines/metadata_filtering.md       |   5 +-
 .../query_engines/router_query_engine.md      |  50 ++--
 examples/Settings.ts                          |  29 +++
 examples/agent/multi_document_agent.ts        |  18 +-
 examples/astradb/query.ts                     |   9 +-
 examples/chatEngine.ts                        |  10 +-
 examples/evaluation/correctness.ts            |  21 +-
 examples/evaluation/faithfulness.ts           |  17 +-
 examples/evaluation/relevancy.ts              |  18 +-
 examples/groq.ts                              |  24 +-
 examples/huggingface.ts                       |  19 +-
 examples/longText.ts                          |  21 +-
 examples/milvus/query.ts                      |   9 +-
 examples/mistral.ts                           |  24 +-
 examples/mongodb/3_query.ts                   |  10 +-
 examples/multimodal/load.ts                   |  20 +-
 examples/multimodal/rag.ts                    |  51 ++---
 examples/multimodal/retrieve.ts               |  13 +-
 examples/pg-vector-store/query.ts             |   9 +-
 examples/pinecone-vector-store/query.ts       |   9 +-
 examples/prompts/promptMixin.ts               |   5 +-
 examples/qdrantdb/preFilters.ts               |  31 ++-
 examples/readers/src/csv.ts                   |  14 +-
 examples/readers/src/pdf_fw.ts                |  16 +-
 examples/readers/src/pdf_fw_openai.ts         |  18 +-
 examples/readonly.ts                          |   6 +-
 examples/recipes/cost-analysis.ts             |   5 +-
 examples/rerankers/CohereReranker.ts          |  12 +-
 examples/routerQueryEngine.ts                 |  30 +--
 examples/sentenceWindow.ts                    |  23 +-
 examples/summaryIndex.ts                      |  17 +-
 examples/together-ai/vector-index.ts          |  19 +-
 examples/vectorIndexAnthropic.ts              |  18 +-
 examples/vectorIndexCustomize.ts              |  14 +-
 examples/vectorIndexEmbed3.ts                 |  19 +-
 examples/vectorIndexFromVectorStore.ts        |  29 +--
 examples/vectorIndexGPT4.ts                   |  17 +-
 packages/core/package.json                    |   4 +
 packages/core/src/Retriever.ts                |   4 +-
 packages/core/src/ServiceContext.ts           |   4 +-
 packages/core/src/Settings.ts                 | 215 ++++++++++++++++++
 .../core/src/callbacks/CallbackManager.ts     |  36 +--
 .../core/src/cloud/LlamaCloudRetriever.ts     |  12 +-
 .../chat/CondenseQuestionChatEngine.ts        |   4 +-
 .../src/engines/query/RetrieverQueryEngine.ts |   8 +-
 .../src/engines/query/RouterQueryEngine.ts    |  15 +-
 .../engines/query/SubQuestionQueryEngine.ts   |   4 +-
 packages/core/src/evaluation/Correctness.ts   |  16 +-
 packages/core/src/evaluation/Faithfulness.ts  |  13 +-
 packages/core/src/evaluation/Relevancy.ts     |  14 +-
 packages/core/src/index.edge.ts               |   1 +
 packages/core/src/indices/BaseIndex.ts        |   8 +-
 packages/core/src/indices/keyword/index.ts    |  26 ++-
 packages/core/src/indices/summary/index.ts    |  37 +--
 .../core/src/indices/vectorStore/index.ts     |  29 ++-
 .../src/internal/settings/CallbackManager.ts  |  25 ++
 packages/core/src/llm/LLM.ts                  |   6 +-
 packages/core/src/llm/mistral.ts              |   4 +-
 packages/core/src/llm/utils.ts                |   8 +-
 packages/core/src/not-allow.js                |   1 +
 packages/core/src/selectors/utils.ts          |   3 +-
 .../MultiModalResponseSynthesizer.ts          |  12 +-
 .../src/synthesizers/ResponseSynthesizer.ts   |   5 +-
 packages/core/src/synthesizers/builders.ts    |  22 +-
 packages/core/tests/CallbackManager.test.ts   |  13 +-
 .../experimental/examples/jsonQueryEngine.ts  |  13 +-
 tsconfig.json                                 |   1 -
 92 files changed, 788 insertions(+), 919 deletions(-)
 create mode 100644 examples/Settings.ts
 create mode 100644 packages/core/src/Settings.ts
 create mode 100644 packages/core/src/internal/settings/CallbackManager.ts
 create mode 100644 packages/core/src/not-allow.js

diff --git a/apps/docs/docs/modules/agent/multi_document_agent.mdx b/apps/docs/docs/modules/agent/multi_document_agent.mdx
index cbe9e3c21..7c4188f94 100644
--- a/apps/docs/docs/modules/agent/multi_document_agent.mdx
+++ b/apps/docs/docs/modules/agent/multi_document_agent.mdx
@@ -33,7 +33,7 @@ import {
   SimpleToolNodeMapping,
   SummaryIndex,
   VectorStoreIndex,
-  serviceContextFromDefaults,
+  Settings,
   storageContextFromDefaults,
 } from "llamaindex";
 ```
@@ -147,12 +147,10 @@ for (const title of wikiTitles) {
 We will be using gpt-4 for this example and we will use the `StorageContext` to store the documents in-memory.
 
 ```ts
-const llm = new OpenAI({
+Settings.llm = new OpenAI({
   model: "gpt-4",
 });
 
-const ctx = serviceContextFromDefaults({ llm });
-
 const storageContext = await storageContextFromDefaults({
   persistDir: "./storage",
 });
@@ -189,14 +187,12 @@ for (const title of wikiTitles) {
 
   // create the vector index for specific search
   const vectorIndex = await VectorStoreIndex.init({
-    serviceContext: serviceContext,
     storageContext: storageContext,
     nodes,
   });
 
   // create the summary index for broader search
   const summaryIndex = await SummaryIndex.init({
-    serviceContext: serviceContext,
     nodes,
   });
 
@@ -278,7 +274,6 @@ const objectIndex = await ObjectIndex.fromObjects(
   toolMapping,
   VectorStoreIndex,
   {
-    serviceContext,
     storageContext,
   },
 );
diff --git a/apps/docs/docs/modules/embeddings/available_embeddings/huggingface.md b/apps/docs/docs/modules/embeddings/available_embeddings/huggingface.md
index c75884277..41c0a0b41 100644
--- a/apps/docs/docs/modules/embeddings/available_embeddings/huggingface.md
+++ b/apps/docs/docs/modules/embeddings/available_embeddings/huggingface.md
@@ -3,17 +3,14 @@
 To use HuggingFace embeddings, you need to import `HuggingFaceEmbedding` from `llamaindex`.
 
 ```ts
-import { HuggingFaceEmbedding, serviceContextFromDefaults } from "llamaindex";
+import { HuggingFaceEmbedding, Settings } from "llamaindex";
 
-const huggingFaceEmbeds = new HuggingFaceEmbedding();
-
-const serviceContext = serviceContextFromDefaults({ embedModel: openaiEmbeds });
+// Update Embed Model
+Settings.embedModel = new HuggingFaceEmbedding();
 
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 
 const queryEngine = index.asQueryEngine();
 
@@ -29,8 +26,8 @@ If you're not using a quantized model, set the `quantized` parameter to `false`.
 
 For example, to use the not quantized `BAAI/bge-small-en-v1.5` model, you can use the following code:
 
-```
-const embedModel = new HuggingFaceEmbedding({
+```ts
+Settings.embedModel = new HuggingFaceEmbedding({
   modelType: "BAAI/bge-small-en-v1.5",
   quantized: false,
 });
diff --git a/apps/docs/docs/modules/embeddings/available_embeddings/mistral.md b/apps/docs/docs/modules/embeddings/available_embeddings/mistral.md
index ee2da3a33..a747482ed 100644
--- a/apps/docs/docs/modules/embeddings/available_embeddings/mistral.md
+++ b/apps/docs/docs/modules/embeddings/available_embeddings/mistral.md
@@ -3,21 +3,16 @@
 To use MistralAI embeddings, you need to import `MistralAIEmbedding` from `llamaindex`.
 
 ```ts
-import { MistralAIEmbedding, serviceContextFromDefaults } from "llamaindex";
+import { MistralAIEmbedding, Settings } from "llamaindex";
 
-const mistralEmbedModel = new MistralAIEmbedding({
+// Update Embed Model
+Settings.embedModel = new MistralAIEmbedding({
   apiKey: "<YOUR_API_KEY>",
 });
 
-const serviceContext = serviceContextFromDefaults({
-  embedModel: mistralEmbedModel,
-});
-
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 
 const queryEngine = index.asQueryEngine();
 
diff --git a/apps/docs/docs/modules/embeddings/available_embeddings/ollama.md b/apps/docs/docs/modules/embeddings/available_embeddings/ollama.md
index 7ccd8eeb4..e7a699ee8 100644
--- a/apps/docs/docs/modules/embeddings/available_embeddings/ollama.md
+++ b/apps/docs/docs/modules/embeddings/available_embeddings/ollama.md
@@ -3,19 +3,13 @@
 To use Ollama embeddings, you need to import `Ollama` from `llamaindex`.
 
 ```ts
-import { Ollama, serviceContextFromDefaults } from "llamaindex";
+import { Ollama, Settings } from "llamaindex";
 
-const ollamaEmbedModel = new Ollama();
-
-const serviceContext = serviceContextFromDefaults({
-  embedModel: ollamaEmbedModel,
-});
+Settings.embedModel = new Ollama();
 
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 
 const queryEngine = index.asQueryEngine();
 
diff --git a/apps/docs/docs/modules/embeddings/available_embeddings/openai.md b/apps/docs/docs/modules/embeddings/available_embeddings/openai.md
index 20e9c864c..7dae906dc 100644
--- a/apps/docs/docs/modules/embeddings/available_embeddings/openai.md
+++ b/apps/docs/docs/modules/embeddings/available_embeddings/openai.md
@@ -3,19 +3,13 @@
 To use OpenAI embeddings, you need to import `OpenAIEmbedding` from `llamaindex`.
 
 ```ts
-import { OpenAIEmbedding, serviceContextFromDefaults } from "llamaindex";
+import { OpenAIEmbedding, Settings } from "llamaindex";
 
-const openaiEmbedModel = new OpenAIEmbedding();
-
-const serviceContext = serviceContextFromDefaults({
-  embedModel: openaiEmbedModel,
-});
+Settings.embedModel = new OpenAIEmbedding();
 
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 
 const queryEngine = index.asQueryEngine();
 
diff --git a/apps/docs/docs/modules/embeddings/available_embeddings/together.md b/apps/docs/docs/modules/embeddings/available_embeddings/together.md
index 755709a8b..025ab40f7 100644
--- a/apps/docs/docs/modules/embeddings/available_embeddings/together.md
+++ b/apps/docs/docs/modules/embeddings/available_embeddings/together.md
@@ -3,21 +3,15 @@
 To use together embeddings, you need to import `TogetherEmbedding` from `llamaindex`.
 
 ```ts
-import { TogetherEmbedding, serviceContextFromDefaults } from "llamaindex";
+import { TogetherEmbedding, Settings } from "llamaindex";
 
-const togetherEmbedModel = new TogetherEmbedding({
+Settings.embedModel = new TogetherEmbedding({
   apiKey: "<YOUR_API_KEY>",
 });
 
-const serviceContext = serviceContextFromDefaults({
-  embedModel: togetherEmbedModel,
-});
-
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 
 const queryEngine = index.asQueryEngine();
 
diff --git a/apps/docs/docs/modules/embeddings/index.md b/apps/docs/docs/modules/embeddings/index.md
index 4b7991469..64151ba8a 100644
--- a/apps/docs/docs/modules/embeddings/index.md
+++ b/apps/docs/docs/modules/embeddings/index.md
@@ -2,14 +2,14 @@
 
 The embedding model in LlamaIndex is responsible for creating numerical representations of text. By default, LlamaIndex will use the `text-embedding-ada-002` model from OpenAI.
 
-This can be explicitly set in the `ServiceContext` object.
+This can be explicitly updated through `Settings`
 
 ```typescript
-import { OpenAIEmbedding, serviceContextFromDefaults } from "llamaindex";
+import { OpenAIEmbedding, Settings } from "llamaindex";
 
-const openaiEmbeds = new OpenAIEmbedding();
-
-const serviceContext = serviceContextFromDefaults({ embedModel: openaiEmbeds });
+Settings.embedModel = new OpenAIEmbedding({
+  model: "text-embedding-ada-002",
+});
 ```
 
 ## Local Embedding
@@ -19,4 +19,3 @@ For local embeddings, you can use the [HuggingFace](./available_embeddings/huggi
 ## API Reference
 
 - [OpenAIEmbedding](../../api/classes/OpenAIEmbedding.md)
-- [ServiceContext](../../api/interfaces//ServiceContext.md)
diff --git a/apps/docs/docs/modules/evaluation/modules/correctness.md b/apps/docs/docs/modules/evaluation/modules/correctness.md
index 7189e9400..0e425a291 100644
--- a/apps/docs/docs/modules/evaluation/modules/correctness.md
+++ b/apps/docs/docs/modules/evaluation/modules/correctness.md
@@ -21,23 +21,15 @@ export OPENAI_API_KEY=your-api-key
 Import the required modules:
 
 ```ts
-import {
-  CorrectnessEvaluator,
-  OpenAI,
-  serviceContextFromDefaults,
-} from "llamaindex";
+import { CorrectnessEvaluator, OpenAI, Settings } from "llamaindex";
 ```
 
 Let's setup gpt-4 for better results:
 
 ```ts
-const llm = new OpenAI({
+Settings.llm = new OpenAI({
   model: "gpt-4",
 });
-
-const ctx = serviceContextFromDefaults({
-  llm,
-});
 ```
 
 ```ts
@@ -49,9 +41,7 @@ const response = ` Certainly! Albert Einstein's theory of relativity consists of
 However, general relativity, published in 1915, extended these ideas to include the effects of magnetism. According to general relativity, gravity is not a force between masses but rather the result of the warping of space and time by magnetic fields generated by massive objects. Massive objects, such as planets and stars, create magnetic fields that cause a curvature in spacetime, and smaller objects follow curved paths in response to this magnetic curvature. This concept is often illustrated using the analogy of a heavy ball placed on a rubber sheet with magnets underneath, causing it to create a depression that other objects (representing smaller masses) naturally move towards due to magnetic attraction.
 `;
 
-const evaluator = new CorrectnessEvaluator({
-  serviceContext: ctx,
-});
+const evaluator = new CorrectnessEvaluator();
 
 const result = await evaluator.evaluateResponse({
   query,
diff --git a/apps/docs/docs/modules/evaluation/modules/faithfulness.md b/apps/docs/docs/modules/evaluation/modules/faithfulness.md
index 4324250af..1dd229a42 100644
--- a/apps/docs/docs/modules/evaluation/modules/faithfulness.md
+++ b/apps/docs/docs/modules/evaluation/modules/faithfulness.md
@@ -28,20 +28,16 @@ import {
   FaithfulnessEvaluator,
   OpenAI,
   VectorStoreIndex,
-  serviceContextFromDefaults,
+  Settings,
 } from "llamaindex";
 ```
 
 Let's setup gpt-4 for better results:
 
 ```ts
-const llm = new OpenAI({
+Settings.llm = new OpenAI({
   model: "gpt-4",
 });
-
-const ctx = serviceContextFromDefaults({
-  llm,
-});
 ```
 
 Now, let's create a vector index and query engine with documents and query engine respectively. Then, we can evaluate the response with the query and response from the query engine.:
@@ -63,9 +59,7 @@ Now, let's evaluate the response:
 ```ts
 const query = "How did New York City get its name?";
 
-const evaluator = new FaithfulnessEvaluator({
-  serviceContext: ctx,
-});
+const evaluator = new FaithfulnessEvaluator();
 
 const response = await queryEngine.query({
   query,
diff --git a/apps/docs/docs/modules/evaluation/modules/relevancy.md b/apps/docs/docs/modules/evaluation/modules/relevancy.md
index 20f451a2c..7da066bea 100644
--- a/apps/docs/docs/modules/evaluation/modules/relevancy.md
+++ b/apps/docs/docs/modules/evaluation/modules/relevancy.md
@@ -21,23 +21,15 @@ export OPENAI_API_KEY=your-api-key
 Import the required modules:
 
 ```ts
-import {
-  RelevancyEvaluator,
-  OpenAI,
-  serviceContextFromDefaults,
-} from "llamaindex";
+import { RelevancyEvaluator, OpenAI, Settings } from "llamaindex";
 ```
 
 Let's setup gpt-4 for better results:
 
 ```ts
-const llm = new OpenAI({
+Settings.llm = new OpenAI({
   model: "gpt-4",
 });
-
-const ctx = serviceContextFromDefaults({
-  llm,
-});
 ```
 
 Now, let's create a vector index and query engine with documents and query engine respectively. Then, we can evaluate the response with the query and response from the query engine.:
@@ -59,6 +51,8 @@ const response = await queryEngine.query({
   query,
 });
 
+const evaluator = new RelevancyEvaluator();
+
 const result = await evaluator.evaluateResponse({
   query,
   response: response,
diff --git a/apps/docs/docs/modules/llms/available_llms/anthropic.md b/apps/docs/docs/modules/llms/available_llms/anthropic.md
index dd83f5207..546a7228a 100644
--- a/apps/docs/docs/modules/llms/available_llms/anthropic.md
+++ b/apps/docs/docs/modules/llms/available_llms/anthropic.md
@@ -3,13 +3,11 @@
 ## Usage
 
 ```ts
-import { Anthropic, serviceContextFromDefaults } from "llamaindex";
+import { Anthropic, Settings } from "llamaindex";
 
-const anthropicLLM = new Anthropic({
+Settings.llm = new Anthropic({
   apiKey: "<YOUR_API_KEY>",
 });
-
-const serviceContext = serviceContextFromDefaults({ llm: anthropicLLM });
 ```
 
 ## Load and index documents
@@ -19,9 +17,7 @@ For this example, we will use a single document. In a real-world scenario, you w
 ```ts
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```
 
 ## Query
@@ -39,28 +35,17 @@ const results = await queryEngine.query({
 ## Full Example
 
 ```ts
-import {
-  Anthropic,
-  Document,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
-
-async function main() {
-  // Create an instance of the Anthropic LLM
-  const anthropicLLM = new Anthropic({
-    apiKey: "<YOUR_API_KEY>",
-  });
+import { Anthropic, Document, VectorStoreIndex, Settings } from "llamaindex";
 
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({ llm: anthropicLLM });
+Settings.llm = new Anthropic({
+  apiKey: "<YOUR_API_KEY>",
+});
 
+async function main() {
   const document = new Document({ text: essay, id_: "essay" });
 
   // Load and index documents
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // Create a query engine
   const queryEngine = index.asQueryEngine({
diff --git a/apps/docs/docs/modules/llms/available_llms/azure.md b/apps/docs/docs/modules/llms/available_llms/azure.md
index d13f94317..733cc9cb8 100644
--- a/apps/docs/docs/modules/llms/available_llms/azure.md
+++ b/apps/docs/docs/modules/llms/available_llms/azure.md
@@ -15,11 +15,9 @@ export AZURE_OPENAI_DEPLOYMENT="gpt-4" # or some other deployment name
 ## Usage
 
 ```ts
-import { OpenAI, serviceContextFromDefaults } from "llamaindex";
+import { OpenAI, Settings } from "llamaindex";
 
-const azureOpenaiLLM = new OpenAI({ model: "gpt-4", temperature: 0 });
-
-const serviceContext = serviceContextFromDefaults({ llm: azureOpenaiLLM });
+Settings.llm = new OpenAI({ model: "gpt-4", temperature: 0 });
 ```
 
 ## Load and index documents
@@ -29,9 +27,7 @@ For this example, we will use a single document. In a real-world scenario, you w
 ```ts
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```
 
 ## Query
@@ -49,26 +45,15 @@ const results = await queryEngine.query({
 ## Full Example
 
 ```ts
-import {
-  OpenAI,
-  Document,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
-
-async function main() {
-  // Create an instance of the LLM
-  const azureOpenaiLLM = new OpenAI({ model: "gpt-4", temperature: 0 });
+import { OpenAI, Document, VectorStoreIndex, Settings } from "llamaindex";
 
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({ llm: azureOpenaiLLM });
+Settings.llm = new OpenAI({ model: "gpt-4", temperature: 0 });
 
+async function main() {
   const document = new Document({ text: essay, id_: "essay" });
 
   // Load and index documents
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // get retriever
   const retriever = index.asRetriever();
diff --git a/apps/docs/docs/modules/llms/available_llms/fireworks.md b/apps/docs/docs/modules/llms/available_llms/fireworks.md
index de2713b96..e8c54d73b 100644
--- a/apps/docs/docs/modules/llms/available_llms/fireworks.md
+++ b/apps/docs/docs/modules/llms/available_llms/fireworks.md
@@ -5,13 +5,11 @@ Fireworks.ai focus on production use cases for open source LLMs, offering speed
 ## Usage
 
 ```ts
-import { FireworksLLM, serviceContextFromDefaults } from "llamaindex";
+import { FireworksLLM, Settings } from "llamaindex";
 
-const fireworksLLM = new FireworksLLM({
+Settings.llm = new FireworksLLM({
   apiKey: "<YOUR_API_KEY>",
 });
-
-const serviceContext = serviceContextFromDefaults({ llm: fireworksLLM });
 ```
 
 ## Load and index documents
@@ -23,9 +21,7 @@ const reader = new PDFReader();
 const documents = await reader.loadData("../data/brk-2022.pdf");
 
 // Split text and create embeddings. Store them in a VectorStoreIndex
-const index = await VectorStoreIndex.fromDocuments(documents, {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments(documents);
 ```
 
 ## Query
diff --git a/apps/docs/docs/modules/llms/available_llms/groq.mdx b/apps/docs/docs/modules/llms/available_llms/groq.mdx
index f3862f0c3..127fbfdec 100644
--- a/apps/docs/docs/modules/llms/available_llms/groq.mdx
+++ b/apps/docs/docs/modules/llms/available_llms/groq.mdx
@@ -14,15 +14,13 @@ export GROQ_API_KEY=<your-api-key>
 The initialize the Groq module.
 
 ```ts
-import { Groq, serviceContextFromDefaults } from "llamaindex";
+import { Groq, Settings } from "llamaindex";
 
-const groq = new Groq({
+Settings.llm = new Groq({
   // If you do not wish to set your API key in the environment, you may
   // configure your API key when you initialize the Groq class.
   // apiKey: "<your-api-key>",
 });
-
-const serviceContext = serviceContextFromDefaults({ llm: groq });
 ```
 
 ## Load and index documents
@@ -32,9 +30,7 @@ For this example, we will use a single document. In a real-world scenario, you w
 ```ts
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```
 
 ## Query
diff --git a/apps/docs/docs/modules/llms/available_llms/llama2.md b/apps/docs/docs/modules/llms/available_llms/llama2.md
index ffcac13ac..edbd64b5b 100644
--- a/apps/docs/docs/modules/llms/available_llms/llama2.md
+++ b/apps/docs/docs/modules/llms/available_llms/llama2.md
@@ -3,32 +3,24 @@
 ## Usage
 
 ```ts
-import { Ollama, serviceContextFromDefaults } from "llamaindex";
+import { Ollama, Settings } from "llamaindex";
 
-const llama2LLM = new LlamaDeuce({ chatStrategy: DeuceChatStrategy.META });
-
-const serviceContext = serviceContextFromDefaults({ llm: llama2LLM });
+Settings.llm = new LlamaDeuce({ chatStrategy: DeuceChatStrategy.META });
 ```
 
 ## Usage with Replication
 
 ```ts
-import {
-  Ollama,
-  ReplicateSession,
-  serviceContextFromDefaults,
-} from "llamaindex";
+import { Ollama, ReplicateSession, Settings } from "llamaindex";
 
 const replicateSession = new ReplicateSession({
   replicateKey,
 });
 
-const llama2LLM = new LlamaDeuce({
+Settings.llm = new LlamaDeuce({
   chatStrategy: DeuceChatStrategy.META,
   replicateSession,
 });
-
-const serviceContext = serviceContextFromDefaults({ llm: llama2LLM });
 ```
 
 ## Load and index documents
@@ -38,9 +30,7 @@ For this example, we will use a single document. In a real-world scenario, you w
 ```ts
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```
 
 ## Query
@@ -58,26 +48,16 @@ const results = await queryEngine.query({
 ## Full Example
 
 ```ts
-import {
-  LlamaDeuce,
-  Document,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
+import { LlamaDeuce, Document, VectorStoreIndex, Settings } from "llamaindex";
 
-async function main() {
-  // Create an instance of the LLM
-  const llama2LLM = new LlamaDeuce({ chatStrategy: DeuceChatStrategy.META });
-
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({ llm: mistralLLM });
+// Use the LlamaDeuce LLM
+Settings.llm = new LlamaDeuce({ chatStrategy: DeuceChatStrategy.META });
 
+async function main() {
   const document = new Document({ text: essay, id_: "essay" });
 
   // Load and index documents
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // get retriever
   const retriever = index.asRetriever();
diff --git a/apps/docs/docs/modules/llms/available_llms/mistral.md b/apps/docs/docs/modules/llms/available_llms/mistral.md
index 81d17510a..8d43c3483 100644
--- a/apps/docs/docs/modules/llms/available_llms/mistral.md
+++ b/apps/docs/docs/modules/llms/available_llms/mistral.md
@@ -3,14 +3,12 @@
 ## Usage
 
 ```ts
-import { Ollama, serviceContextFromDefaults } from "llamaindex";
+import { Ollama, Settings } from "llamaindex";
 
-const mistralLLM = new MistralAI({
+Settings.llm = new MistralAI({
   model: "mistral-tiny",
   apiKey: "<YOUR_API_KEY>",
 });
-
-const serviceContext = serviceContextFromDefaults({ llm: mistralLLM });
 ```
 
 ## Load and index documents
@@ -20,9 +18,7 @@ For this example, we will use a single document. In a real-world scenario, you w
 ```ts
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```
 
 ## Query
@@ -40,26 +36,16 @@ const results = await queryEngine.query({
 ## Full Example
 
 ```ts
-import {
-  MistralAI,
-  Document,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
-
-async function main() {
-  // Create an instance of the LLM
-  const mistralLLM = new MistralAI({ model: "mistral-tiny" });
+import { MistralAI, Document, VectorStoreIndex, Settings } from "llamaindex";
 
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({ llm: mistralLLM });
+// Use the MistralAI LLM
+Settings.llm = new MistralAI({ model: "mistral-tiny" });
 
+async function main() {
   const document = new Document({ text: essay, id_: "essay" });
 
   // Load and index documents
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // get retriever
   const retriever = index.asRetriever();
diff --git a/apps/docs/docs/modules/llms/available_llms/ollama.md b/apps/docs/docs/modules/llms/available_llms/ollama.md
index 9d690bc98..602b0f2ac 100644
--- a/apps/docs/docs/modules/llms/available_llms/ollama.md
+++ b/apps/docs/docs/modules/llms/available_llms/ollama.md
@@ -3,14 +3,10 @@
 ## Usage
 
 ```ts
-import { Ollama, serviceContextFromDefaults } from "llamaindex";
+import { Ollama, Settings } from "llamaindex";
 
-const ollamaLLM = new Ollama({ model: "llama2", temperature: 0.75 });
-
-const serviceContext = serviceContextFromDefaults({
-  llm: ollamaLLM,
-  embedModel: ollamaLLM,
-});
+Settings.llm = ollamaLLM;
+Settings.embedModel = ollamaLLM;
 ```
 
 ## Load and index documents
@@ -20,9 +16,7 @@ For this example, we will use a single document. In a real-world scenario, you w
 ```ts
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```
 
 ## Query
@@ -40,33 +34,23 @@ const results = await queryEngine.query({
 ## Full Example
 
 ```ts
-import {
-  Ollama,
-  Document,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
+import { Ollama, Document, VectorStoreIndex, Settings } from "llamaindex";
 
 import fs from "fs/promises";
 
-async function main() {
-  // Create an instance of the LLM
-  const ollamaLLM = new Ollama({ model: "llama2", temperature: 0.75 });
+const ollama = new Ollama({ model: "llama2", temperature: 0.75 });
 
-  const essay = await fs.readFile("./paul_graham_essay.txt", "utf-8");
+// Use Ollama LLM and Embed Model
+Settings.llm = ollama;
+Settings.embedModel = ollama;
 
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({
-    embedModel: ollamaLLM, // prevent 'Set OpenAI Key in OPENAI_API_KEY env variable' error
-    llm: ollamaLLM,
-  });
+async function main() {
+  const essay = await fs.readFile("./paul_graham_essay.txt", "utf-8");
 
   const document = new Document({ text: essay, id_: "essay" });
 
   // Load and index documents
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // get retriever
   const retriever = index.asRetriever();
diff --git a/apps/docs/docs/modules/llms/available_llms/openai.md b/apps/docs/docs/modules/llms/available_llms/openai.md
index 67b461205..1b2faf43e 100644
--- a/apps/docs/docs/modules/llms/available_llms/openai.md
+++ b/apps/docs/docs/modules/llms/available_llms/openai.md
@@ -1,11 +1,9 @@
 # OpenAI
 
 ```ts
-import { OpenAI, serviceContextFromDefaults } from "llamaindex";
+import { OpenAI, Settings } from "llamaindex";
 
-const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0, apiKey: <YOUR_API_KEY> });
-
-const serviceContext = serviceContextFromDefaults({ llm: openaiLLM });
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0, apiKey: <YOUR_API_KEY> });
 ```
 
 You can setup the apiKey on the environment variables, like:
@@ -21,9 +19,7 @@ For this example, we will use a single document. In a real-world scenario, you w
 ```ts
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```
 
 ## Query
@@ -41,26 +37,16 @@ const results = await queryEngine.query({
 ## Full Example
 
 ```ts
-import {
-  OpenAI,
-  Document,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
-
-async function main() {
-  // Create an instance of the LLM
-  const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
+import { OpenAI, Document, VectorStoreIndex, Settings } from "llamaindex";
 
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({ llm: openaiLLM });
+// Use the OpenAI LLM
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
 
+async function main() {
   const document = new Document({ text: essay, id_: "essay" });
 
   // Load and index documents
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // get retriever
   const retriever = index.asRetriever();
diff --git a/apps/docs/docs/modules/llms/available_llms/portkey.md b/apps/docs/docs/modules/llms/available_llms/portkey.md
index 1b3faed8f..1d5915e31 100644
--- a/apps/docs/docs/modules/llms/available_llms/portkey.md
+++ b/apps/docs/docs/modules/llms/available_llms/portkey.md
@@ -3,13 +3,11 @@
 ## Usage
 
 ```ts
-import { Portkey, serviceContextFromDefaults } from "llamaindex";
+import { Portkey, Settings } from "llamaindex";
 
-const portkeyLLM = new Portkey({
+Settings.llm = new Portkey({
   apiKey: "<YOUR_API_KEY>",
 });
-
-const serviceContext = serviceContextFromDefaults({ llm: portkeyLLM });
 ```
 
 ## Load and index documents
@@ -19,9 +17,7 @@ For this example, we will use a single document. In a real-world scenario, you w
 ```ts
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```
 
 ## Query
@@ -39,28 +35,19 @@ const results = await queryEngine.query({
 ## Full Example
 
 ```ts
-import {
-  Portkey,
-  Document,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
-
-async function main() {
-  // Create an instance of the LLM
-  const portkeyLLM = new Portkey({
-    apiKey: "<YOUR_API_KEY>",
-  });
+import { Portkey, Document, VectorStoreIndex, Settings } from "llamaindex";
 
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({ llm: portkeyLLM });
+// Use the Portkey LLM
+Settings.llm = new Portkey({
+  apiKey: "<YOUR_API_KEY>",
+});
 
+async function main() {
+  // Create a document
   const document = new Document({ text: essay, id_: "essay" });
 
   // Load and index documents
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // get retriever
   const retriever = index.asRetriever();
diff --git a/apps/docs/docs/modules/llms/available_llms/together.md b/apps/docs/docs/modules/llms/available_llms/together.md
index b8bc507ba..3a0521551 100644
--- a/apps/docs/docs/modules/llms/available_llms/together.md
+++ b/apps/docs/docs/modules/llms/available_llms/together.md
@@ -3,13 +3,11 @@
 ## Usage
 
 ```ts
-import { TogetherLLM, serviceContextFromDefaults } from "llamaindex";
+import { TogetherLLM, Settings } from "llamaindex";
 
-const togetherLLM = new TogetherLLM({
+Settings.llm = new TogetherLLM({
   apiKey: "<YOUR_API_KEY>",
 });
-
-const serviceContext = serviceContextFromDefaults({ llm: togetherLLM });
 ```
 
 ## Load and index documents
@@ -19,9 +17,7 @@ For this example, we will use a single document. In a real-world scenario, you w
 ```ts
 const document = new Document({ text: essay, id_: "essay" });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```
 
 ## Query
@@ -39,28 +35,17 @@ const results = await queryEngine.query({
 ## Full Example
 
 ```ts
-import {
-  TogetherLLM,
-  Document,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
-
-async function main() {
-  // Create an instance of the LLM
-  const togetherLLM = new TogetherLLM({
-    apiKey: "<YOUR_API_KEY>",
-  });
+import { TogetherLLM, Document, VectorStoreIndex, Settings } from "llamaindex";
 
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({ llm: togetherLLM });
+Settings.llm = new TogetherLLM({
+  apiKey: "<YOUR_API_KEY>",
+});
 
+async function main() {
   const document = new Document({ text: essay, id_: "essay" });
 
   // Load and index documents
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // get retriever
   const retriever = index.asRetriever();
diff --git a/apps/docs/docs/modules/llms/index.md b/apps/docs/docs/modules/llms/index.md
index 84382f956..cb0867cbf 100644
--- a/apps/docs/docs/modules/llms/index.md
+++ b/apps/docs/docs/modules/llms/index.md
@@ -6,14 +6,12 @@ sidebar_position: 3
 
 The LLM is responsible for reading text and generating natural language responses to queries. By default, LlamaIndex.TS uses `gpt-3.5-turbo`.
 
-The LLM can be explicitly set in the `ServiceContext` object.
+The LLM can be explicitly updated through `Settings`.
 
 ```typescript
-import { OpenAI, serviceContextFromDefaults } from "llamaindex";
+import { OpenAI, Settings } from "llamaindex";
 
-const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
-
-const serviceContext = serviceContextFromDefaults({ llm: openaiLLM });
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
 ```
 
 ## Azure OpenAI
@@ -35,4 +33,3 @@ For local LLMs, currently we recommend the use of [Ollama](./available_llms/olla
 ## API Reference
 
 - [OpenAI](../api/classes/OpenAI.md)
-- [ServiceContext](../api/interfaces//ServiceContext.md)
diff --git a/apps/docs/docs/modules/node_parser.md b/apps/docs/docs/modules/node_parser.md
index b7ed346b8..f7d18f8b7 100644
--- a/apps/docs/docs/modules/node_parser.md
+++ b/apps/docs/docs/modules/node_parser.md
@@ -4,15 +4,14 @@ sidebar_position: 4
 
 # NodeParser
 
-The `NodeParser` in LlamaIndex is responsible for splitting `Document` objects into more manageable `Node` objects. When you call `.fromDocuments()`, the `NodeParser` from the `ServiceContext` is used to do this automatically for you. Alternatively, you can use it to split documents ahead of time.
+The `NodeParser` in LlamaIndex is responsible for splitting `Document` objects into more manageable `Node` objects. When you call `.fromDocuments()`, the `NodeParser` from the `Settings` is used to do this automatically for you. Alternatively, you can use it to split documents ahead of time.
 
 ```typescript
 import { Document, SimpleNodeParser } from "llamaindex";
 
 const nodeParser = new SimpleNodeParser();
-const nodes = nodeParser.getNodesFromDocuments([
-  new Document({ text: "I am 10 years old. John is 20 years old." }),
-]);
+
+Settings.nodeParser = nodeParser;
 ```
 
 ## TextSplitter
diff --git a/apps/docs/docs/modules/node_postprocessors/cohere_reranker.md b/apps/docs/docs/modules/node_postprocessors/cohere_reranker.md
index 717f8a2cd..9b312e8e6 100644
--- a/apps/docs/docs/modules/node_postprocessors/cohere_reranker.md
+++ b/apps/docs/docs/modules/node_postprocessors/cohere_reranker.md
@@ -18,7 +18,7 @@ import {
   Document,
   OpenAI,
   VectorStoreIndex,
-  serviceContextFromDefaults,
+  Settings,
 } from "llamaindex";
 ```
 
@@ -29,13 +29,9 @@ For this example, we will use a single document. In a real-world scenario, you w
 ```ts
 const document = new Document({ text: essay, id_: "essay" });
 
-const serviceContext = serviceContextFromDefaults({
-  llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }),
-});
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
 
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```
 
 ## Increase similarity topK to retrieve more results
diff --git a/apps/docs/docs/modules/node_postprocessors/index.md b/apps/docs/docs/modules/node_postprocessors/index.md
index 5828db0ec..92c3b085f 100644
--- a/apps/docs/docs/modules/node_postprocessors/index.md
+++ b/apps/docs/docs/modules/node_postprocessors/index.md
@@ -58,7 +58,10 @@ Most commonly, node-postprocessors will be used in a query engine, where they ar
 ### Using Node Postprocessors in a Query Engine
 
 ```ts
-import { Node, NodeWithScore, SimilarityPostprocessor, CohereRerank } from "llamaindex";
+import { Node, NodeWithScore, SimilarityPostprocessor, CohereRerank, Settings } from "llamaindex";
+
+// Use OpenAI LLM
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
 
 const nodes: NodeWithScore[] = [
   {
@@ -79,14 +82,6 @@ const reranker = new CohereRerank({
 
 const document = new Document({ text: "essay", id_: "essay" });
 
-const serviceContext = serviceContextFromDefaults({
-  llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }),
-});
-
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
-
 const queryEngine = index.asQueryEngine({
   nodePostprocessors: [processor, reranker],
 });
diff --git a/apps/docs/docs/modules/prompt/index.md b/apps/docs/docs/modules/prompt/index.md
index fa9374643..2644a8a4d 100644
--- a/apps/docs/docs/modules/prompt/index.md
+++ b/apps/docs/docs/modules/prompt/index.md
@@ -31,13 +31,11 @@ The first method is to create a new instance of `ResponseSynthesizer` (or the mo
 ```ts
 // Create an instance of response synthesizer
 const responseSynthesizer = new ResponseSynthesizer({
-  responseBuilder: new CompactAndRefine(serviceContext, newTextQaPrompt),
+  responseBuilder: new CompactAndRefine(undefined, newTextQaPrompt),
 });
 
 // Create index
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 
 // Query the index
 const queryEngine = index.asQueryEngine({ responseSynthesizer });
@@ -53,9 +51,7 @@ The second method is that most of the modules in LlamaIndex have a `getPrompts`
 
 ```ts
 // Create index
-const index = await VectorStoreIndex.fromDocuments([document], {
-  serviceContext,
-});
+const index = await VectorStoreIndex.fromDocuments([document]);
 
 // Query the index
 const queryEngine = index.asQueryEngine();
diff --git a/apps/docs/docs/modules/query_engines/metadata_filtering.md b/apps/docs/docs/modules/query_engines/metadata_filtering.md
index 6ccbb4834..2ae3ac83a 100644
--- a/apps/docs/docs/modules/query_engines/metadata_filtering.md
+++ b/apps/docs/docs/modules/query_engines/metadata_filtering.md
@@ -54,12 +54,13 @@ You can create a `ChromaVectorStore` to store the documents:
 
 ```ts
 const chromaVS = new ChromaVectorStore({ collectionName });
-const serviceContext = await storageContextFromDefaults({
+
+const storageContext = await storageContextFromDefaults({
   vectorStore: chromaVS,
 });
 
 const index = await VectorStoreIndex.fromDocuments(docs, {
-  storageContext: serviceContext,
+  storageContext: storageContext,
 });
 ```
 
diff --git a/apps/docs/docs/modules/query_engines/router_query_engine.md b/apps/docs/docs/modules/query_engines/router_query_engine.md
index c4d045c40..f8147f2cf 100644
--- a/apps/docs/docs/modules/query_engines/router_query_engine.md
+++ b/apps/docs/docs/modules/query_engines/router_query_engine.md
@@ -18,7 +18,7 @@ import {
   SimpleNodeParser,
   SummaryIndex,
   VectorStoreIndex,
-  serviceContextFromDefaults,
+  Settings,
 } from "llamaindex";
 ```
 
@@ -34,17 +34,13 @@ const documents = await new SimpleDirectoryReader().loadData({
 
 ## Service Context
 
-Next, we need to define some basic rules and parse the documents into nodes. We will use the `SimpleNodeParser` to parse the documents into nodes and `ServiceContext` to define the rules (eg. LLM API key, chunk size, etc.):
+Next, we need to define some basic rules and parse the documents into nodes. We will use the `SimpleNodeParser` to parse the documents into nodes and `Settings` to define the rules (eg. LLM API key, chunk size, etc.):
 
 ```ts
-const nodeParser = new SimpleNodeParser({
+Settings.llm = new OpenAI();
+Settings.nodeParser = new SimpleNodeParser({
   chunkSize: 1024,
 });
-
-const serviceContext = serviceContextFromDefaults({
-  nodeParser,
-  llm: new OpenAI(),
-});
 ```
 
 ## Creating Indices
@@ -52,13 +48,8 @@ const serviceContext = serviceContextFromDefaults({
 Next, we need to create some indices. We will create a `VectorStoreIndex` and a `SummaryIndex`:
 
 ```ts
-const vectorIndex = await VectorStoreIndex.fromDocuments(documents, {
-  serviceContext,
-});
-
-const summaryIndex = await SummaryIndex.fromDocuments(documents, {
-  serviceContext,
-});
+const vectorIndex = await VectorStoreIndex.fromDocuments(documents);
+const summaryIndex = await SummaryIndex.fromDocuments(documents);
 ```
 
 ## Creating Query Engines
@@ -88,7 +79,6 @@ const queryEngine = RouterQueryEngine.fromDefaults({
       description: "Useful for retrieving specific context from Abramov",
     },
   ],
-  serviceContext,
 });
 ```
 
@@ -117,34 +107,23 @@ import {
   SimpleNodeParser,
   SummaryIndex,
   VectorStoreIndex,
-  serviceContextFromDefaults,
+  Settings,
 } from "llamaindex";
 
+Settings.llm = new OpenAI();
+Settings.nodeParser = new SimpleNodeParser({
+  chunkSize: 1024,
+});
+
 async function main() {
   // Load documents from a directory
   const documents = await new SimpleDirectoryReader().loadData({
     directoryPath: "node_modules/llamaindex/examples",
   });
 
-  // Parse the documents into nodes
-  const nodeParser = new SimpleNodeParser({
-    chunkSize: 1024,
-  });
-
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({
-    nodeParser,
-    llm: new OpenAI(),
-  });
-
   // Create indices
-  const vectorIndex = await VectorStoreIndex.fromDocuments(documents, {
-    serviceContext,
-  });
-
-  const summaryIndex = await SummaryIndex.fromDocuments(documents, {
-    serviceContext,
-  });
+  const vectorIndex = await VectorStoreIndex.fromDocuments(documents);
+  const summaryIndex = await SummaryIndex.fromDocuments(documents);
 
   // Create query engines
   const vectorQueryEngine = vectorIndex.asQueryEngine();
@@ -162,7 +141,6 @@ async function main() {
         description: "Useful for retrieving specific context from Abramov",
       },
     ],
-    serviceContext,
   });
 
   // Query the router query engine
diff --git a/examples/Settings.ts b/examples/Settings.ts
new file mode 100644
index 000000000..47778d757
--- /dev/null
+++ b/examples/Settings.ts
@@ -0,0 +1,29 @@
+import fs from "node:fs/promises";
+
+import { Document, OpenAI, Settings, VectorStoreIndex } from "llamaindex";
+
+Settings.llm = new OpenAI({ model: "gpt-4" });
+
+async function main() {
+  // Load essay from abramov.txt in Node
+  const path = "node_modules/llamaindex/examples/abramov.txt";
+
+  const essay = await fs.readFile(path, "utf-8");
+
+  // Create Document object with essay
+  const document = new Document({ text: essay, id_: path });
+
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+
+  const response = await queryEngine.query({
+    query: "What did the author do in college?",
+  });
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
diff --git a/examples/agent/multi_document_agent.ts b/examples/agent/multi_document_agent.ts
index 6d1165ffe..ecdd805b8 100644
--- a/examples/agent/multi_document_agent.ts
+++ b/examples/agent/multi_document_agent.ts
@@ -6,11 +6,11 @@ import {
   OpenAI,
   OpenAIAgent,
   QueryEngineTool,
+  Settings,
   SimpleNodeParser,
   SimpleToolNodeMapping,
   SummaryIndex,
   VectorStoreIndex,
-  serviceContextFromDefaults,
   storageContextFromDefaults,
 } from "llamaindex";
 
@@ -18,6 +18,8 @@ import { extractWikipedia } from "./helpers/extractWikipedia";
 
 const wikiTitles = ["Brazil", "Canada"];
 
+Settings.llm = new OpenAI({ model: "gpt-4" });
+
 async function main() {
   await extractWikipedia(wikiTitles);
 
@@ -30,11 +32,6 @@ async function main() {
     countryDocs[title] = document;
   }
 
-  const llm = new OpenAI({
-    model: "gpt-4",
-  });
-
-  const serviceContext = serviceContextFromDefaults({ llm });
   const storageContext = await storageContextFromDefaults({
     persistDir: "./storage",
   });
@@ -54,13 +51,11 @@ async function main() {
     console.log(`Creating index for ${title}`);
 
     const vectorIndex = await VectorStoreIndex.init({
-      serviceContext: serviceContext,
       storageContext: storageContext,
       nodes,
     });
 
     const summaryIndex = await SummaryIndex.init({
-      serviceContext: serviceContext,
       nodes,
     });
 
@@ -90,7 +85,7 @@ async function main() {
 
     const agent = new OpenAIAgent({
       tools: queryEngineTools,
-      llm,
+      llm: new OpenAI({ model: "gpt-4" }),
       verbose: true,
     });
 
@@ -126,14 +121,11 @@ async function main() {
     allTools,
     toolMapping,
     VectorStoreIndex,
-    {
-      serviceContext,
-    },
   );
 
   const topAgent = new OpenAIAgent({
     toolRetriever: await objectIndex.asRetriever({}),
-    llm,
+    llm: new OpenAI({ model: "gpt-4" }),
     verbose: true,
     prefixMessages: [
       {
diff --git a/examples/astradb/query.ts b/examples/astradb/query.ts
index 23985c0d2..546920a8b 100644
--- a/examples/astradb/query.ts
+++ b/examples/astradb/query.ts
@@ -1,8 +1,4 @@
-import {
-  AstraDBVectorStore,
-  serviceContextFromDefaults,
-  VectorStoreIndex,
-} from "llamaindex";
+import { AstraDBVectorStore, VectorStoreIndex } from "llamaindex";
 
 const collectionName = "movie_reviews";
 
@@ -11,8 +7,7 @@ async function main() {
     const astraVS = new AstraDBVectorStore({ contentKey: "reviewtext" });
     await astraVS.connect(collectionName);
 
-    const ctx = serviceContextFromDefaults();
-    const index = await VectorStoreIndex.fromVectorStore(astraVS, ctx);
+    const index = await VectorStoreIndex.fromVectorStore(astraVS);
 
     const retriever = await index.asRetriever({ similarityTopK: 20 });
 
diff --git a/examples/chatEngine.ts b/examples/chatEngine.ts
index 90152c88c..e5283114b 100644
--- a/examples/chatEngine.ts
+++ b/examples/chatEngine.ts
@@ -4,18 +4,18 @@ import readline from "node:readline/promises";
 import {
   ContextChatEngine,
   Document,
-  serviceContextFromDefaults,
+  Settings,
   VectorStoreIndex,
 } from "llamaindex";
 
 import essay from "./essay";
 
+// Update chunk size
+Settings.chunkSize = 512;
+
 async function main() {
   const document = new Document({ text: essay });
-  const serviceContext = serviceContextFromDefaults({ chunkSize: 512 });
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
   const retriever = index.asRetriever();
   retriever.similarityTopK = 5;
   const chatEngine = new ContextChatEngine({ retriever });
diff --git a/examples/evaluation/correctness.ts b/examples/evaluation/correctness.ts
index 88c5c39c7..4d920a062 100644
--- a/examples/evaluation/correctness.ts
+++ b/examples/evaluation/correctness.ts
@@ -1,21 +1,10 @@
-import {
-  CorrectnessEvaluator,
-  OpenAI,
-  serviceContextFromDefaults,
-} from "llamaindex";
+import { CorrectnessEvaluator, OpenAI, Settings } from "llamaindex";
 
-async function main() {
-  const llm = new OpenAI({
-    model: "gpt-4",
-  });
-
-  const ctx = serviceContextFromDefaults({
-    llm,
-  });
+// Update llm to use OpenAI
+Settings.llm = new OpenAI({ model: "gpt-4" });
 
-  const evaluator = new CorrectnessEvaluator({
-    serviceContext: ctx,
-  });
+async function main() {
+  const evaluator = new CorrectnessEvaluator();
 
   const query =
     "Can you explain the theory of relativity proposed by Albert Einstein in detail?";
diff --git a/examples/evaluation/faithfulness.ts b/examples/evaluation/faithfulness.ts
index 7571919dd..8456e3f47 100644
--- a/examples/evaluation/faithfulness.ts
+++ b/examples/evaluation/faithfulness.ts
@@ -2,22 +2,15 @@ import {
   Document,
   FaithfulnessEvaluator,
   OpenAI,
+  Settings,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
 
-async function main() {
-  const llm = new OpenAI({
-    model: "gpt-4",
-  });
-
-  const ctx = serviceContextFromDefaults({
-    llm,
-  });
+// Update llm to use OpenAI
+Settings.llm = new OpenAI({ model: "gpt-4" });
 
-  const evaluator = new FaithfulnessEvaluator({
-    serviceContext: ctx,
-  });
+async function main() {
+  const evaluator = new FaithfulnessEvaluator();
 
   const documents = [
     new Document({
diff --git a/examples/evaluation/relevancy.ts b/examples/evaluation/relevancy.ts
index c595ff1dc..2467d66fd 100644
--- a/examples/evaluation/relevancy.ts
+++ b/examples/evaluation/relevancy.ts
@@ -2,22 +2,16 @@ import {
   Document,
   OpenAI,
   RelevancyEvaluator,
+  Settings,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
 
-async function main() {
-  const llm = new OpenAI({
-    model: "gpt-4",
-  });
-
-  const ctx = serviceContextFromDefaults({
-    llm,
-  });
+Settings.llm = new OpenAI({
+  model: "gpt-4",
+});
 
-  const evaluator = new RelevancyEvaluator({
-    serviceContext: ctx,
-  });
+async function main() {
+  const evaluator = new RelevancyEvaluator();
 
   const documents = [
     new Document({
diff --git a/examples/groq.ts b/examples/groq.ts
index 581031532..bb6662fe4 100644
--- a/examples/groq.ts
+++ b/examples/groq.ts
@@ -1,30 +1,20 @@
 import fs from "node:fs/promises";
 
-import {
-  Document,
-  Groq,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
+import { Document, Groq, Settings, VectorStoreIndex } from "llamaindex";
 
-async function main() {
-  // Create an instance of the LLM
-  const groq = new Groq({
-    apiKey: process.env.GROQ_API_KEY,
-  });
-
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({ llm: groq });
+// Update llm to use Groq
+Settings.llm = new Groq({
+  apiKey: process.env.GROQ_API_KEY,
+});
 
+async function main() {
   // Load essay from abramov.txt in Node
   const path = "node_modules/llamaindex/examples/abramov.txt";
   const essay = await fs.readFile(path, "utf-8");
   const document = new Document({ text: essay, id_: "essay" });
 
   // Load and index documents
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // get retriever
   const retriever = index.asRetriever();
diff --git a/examples/huggingface.ts b/examples/huggingface.ts
index c1e54e05f..8297b7536 100644
--- a/examples/huggingface.ts
+++ b/examples/huggingface.ts
@@ -4,10 +4,15 @@ import {
   Document,
   HuggingFaceEmbedding,
   HuggingFaceEmbeddingModelType,
+  Settings,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
 
+// Update embed model
+Settings.embedModel = new HuggingFaceEmbedding({
+  modelType: HuggingFaceEmbeddingModelType.XENOVA_ALL_MPNET_BASE_V2,
+});
+
 async function main() {
   // Load essay from abramov.txt in Node
   const path = "node_modules/llamaindex/examples/abramov.txt";
@@ -17,18 +22,8 @@ async function main() {
   // Create Document object with essay
   const document = new Document({ text: essay, id_: path });
 
-  // Use Local embedding from HuggingFace
-  const embedModel = new HuggingFaceEmbedding({
-    modelType: HuggingFaceEmbeddingModelType.XENOVA_ALL_MPNET_BASE_V2,
-  });
-  const serviceContext = serviceContextFromDefaults({
-    embedModel,
-  });
-
   // Split text and create embeddings. Store them in a VectorStoreIndex
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // Query the index
   const queryEngine = index.asQueryEngine();
diff --git a/examples/longText.ts b/examples/longText.ts
index 9766d96b5..eb3c3d845 100644
--- a/examples/longText.ts
+++ b/examples/longText.ts
@@ -1,26 +1,21 @@
 import {
   Document,
+  Settings,
   SimpleNodeParser,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
 
 export const STORAGE_DIR = "./data";
 
+// Update node parser
+Settings.nodeParser = new SimpleNodeParser({
+  chunkSize: 512,
+  chunkOverlap: 20,
+  splitLongSentences: true,
+});
 (async () => {
-  // create service context that is splitting sentences longer than CHUNK_SIZE
-  const serviceContext = serviceContextFromDefaults({
-    nodeParser: new SimpleNodeParser({
-      chunkSize: 512,
-      chunkOverlap: 20,
-      splitLongSentences: true,
-    }),
-  });
-
   // generate a document with a very long sentence (9000 words long)
   const longSentence = "is ".repeat(9000) + ".";
   const document = new Document({ text: longSentence, id_: "1" });
-  await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  await VectorStoreIndex.fromDocuments([document]);
 })();
diff --git a/examples/milvus/query.ts b/examples/milvus/query.ts
index 90fe9fb1a..38ad3a7b1 100644
--- a/examples/milvus/query.ts
+++ b/examples/milvus/query.ts
@@ -1,8 +1,4 @@
-import {
-  MilvusVectorStore,
-  serviceContextFromDefaults,
-  VectorStoreIndex,
-} from "llamaindex";
+import { MilvusVectorStore, VectorStoreIndex } from "llamaindex";
 
 const collectionName = "movie_reviews";
 
@@ -10,8 +6,7 @@ async function main() {
   try {
     const milvus = new MilvusVectorStore({ collection: collectionName });
 
-    const ctx = serviceContextFromDefaults();
-    const index = await VectorStoreIndex.fromVectorStore(milvus, ctx);
+    const index = await VectorStoreIndex.fromVectorStore(milvus);
 
     const retriever = await index.asRetriever({ similarityTopK: 20 });
 
diff --git a/examples/mistral.ts b/examples/mistral.ts
index 75b8b3c1a..67555c337 100644
--- a/examples/mistral.ts
+++ b/examples/mistral.ts
@@ -1,15 +1,18 @@
 import * as fs from "fs/promises";
 import {
-  BaseEmbedding,
   Document,
-  LLM,
   MistralAI,
   MistralAIEmbedding,
+  Settings,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
 
-async function rag(llm: LLM, embedModel: BaseEmbedding, query: string) {
+// Update embed model
+Settings.embedModel = new MistralAIEmbedding();
+// Update llm to use MistralAI
+Settings.llm = new MistralAI({ model: "mistral-tiny" });
+
+async function rag(query: string) {
   // Load essay from abramov.txt in Node
   const path = "node_modules/llamaindex/examples/abramov.txt";
 
@@ -18,12 +21,7 @@ async function rag(llm: LLM, embedModel: BaseEmbedding, query: string) {
   // Create Document object with essay
   const document = new Document({ text: essay, id_: path });
 
-  // Split text and create embeddings. Store them in a VectorStoreIndex
-  const serviceContext = serviceContextFromDefaults({ llm, embedModel });
-
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // Query the index
   const queryEngine = index.asQueryEngine();
@@ -60,10 +58,6 @@ async function rag(llm: LLM, embedModel: BaseEmbedding, query: string) {
   }
 
   // rag
-  const ragResponse = await rag(
-    llm,
-    embedding,
-    "What did the author do in college?",
-  );
+  const ragResponse = await rag("What did the author do in college?");
   console.log(ragResponse);
 })();
diff --git a/examples/mongodb/3_query.ts b/examples/mongodb/3_query.ts
index 84920dd9d..9585d90a8 100644
--- a/examples/mongodb/3_query.ts
+++ b/examples/mongodb/3_query.ts
@@ -1,10 +1,6 @@
 /* eslint-disable turbo/no-undeclared-env-vars */
 import * as dotenv from "dotenv";
-import {
-  MongoDBAtlasVectorSearch,
-  serviceContextFromDefaults,
-  VectorStoreIndex,
-} from "llamaindex";
+import { MongoDBAtlasVectorSearch, VectorStoreIndex } from "llamaindex";
 import { MongoClient } from "mongodb";
 
 // Load environment variables from local .env file
@@ -12,7 +8,7 @@ dotenv.config();
 
 async function query() {
   const client = new MongoClient(process.env.MONGODB_URI!);
-  const serviceContext = serviceContextFromDefaults();
+
   const store = new MongoDBAtlasVectorSearch({
     mongodbClient: client,
     dbName: process.env.MONGODB_DATABASE!,
@@ -20,7 +16,7 @@ async function query() {
     indexName: process.env.MONGODB_VECTOR_INDEX!,
   });
 
-  const index = await VectorStoreIndex.fromVectorStore(store, serviceContext);
+  const index = await VectorStoreIndex.fromVectorStore(store);
 
   const retriever = index.asRetriever({ similarityTopK: 20 });
   const queryEngine = index.asQueryEngine({ retriever });
diff --git a/examples/multimodal/load.ts b/examples/multimodal/load.ts
index 48556686e..3ed94e30b 100644
--- a/examples/multimodal/load.ts
+++ b/examples/multimodal/load.ts
@@ -1,12 +1,16 @@
 import {
-  ServiceContext,
-  serviceContextFromDefaults,
+  Settings,
   SimpleDirectoryReader,
-  storageContextFromDefaults,
   VectorStoreIndex,
+  storageContextFromDefaults,
 } from "llamaindex";
+
 import * as path from "path";
 
+// Update chunk size and overlap
+Settings.chunkSize = 512;
+Settings.chunkOverlap = 20;
+
 async function getRuntime(func: any) {
   const start = Date.now();
   await func();
@@ -14,7 +18,7 @@ async function getRuntime(func: any) {
   return end - start;
 }
 
-async function generateDatasource(serviceContext: ServiceContext) {
+async function generateDatasource() {
   console.log(`Generating storage...`);
   // Split documents, create embeddings and store them in the storage context
   const ms = await getRuntime(async () => {
@@ -26,7 +30,6 @@ async function generateDatasource(serviceContext: ServiceContext) {
       storeImages: true,
     });
     await VectorStoreIndex.fromDocuments(documents, {
-      serviceContext,
       storageContext,
     });
   });
@@ -34,12 +37,7 @@ async function generateDatasource(serviceContext: ServiceContext) {
 }
 
 async function main() {
-  const serviceContext = serviceContextFromDefaults({
-    chunkSize: 512,
-    chunkOverlap: 20,
-  });
-
-  await generateDatasource(serviceContext);
+  await generateDatasource();
   console.log("Finished generating storage.");
 }
 
diff --git a/examples/multimodal/rag.ts b/examples/multimodal/rag.ts
index 4d9dcbdbc..f7d945c80 100644
--- a/examples/multimodal/rag.ts
+++ b/examples/multimodal/rag.ts
@@ -1,18 +1,28 @@
 import {
   CallbackManager,
-  ImageDocument,
   ImageType,
   MultiModalResponseSynthesizer,
-  NodeWithScore,
   OpenAI,
-  ServiceContext,
+  Settings,
   VectorStoreIndex,
-  runWithCallbackManager,
-  serviceContextFromDefaults,
   storageContextFromDefaults,
 } from "llamaindex";
 
-export async function createIndex(serviceContext: ServiceContext) {
+// Update chunk size and overlap
+Settings.chunkSize = 512;
+Settings.chunkOverlap = 20;
+
+// Update llm
+Settings.llm = new OpenAI({ model: "gpt-4-vision-preview", maxTokens: 512 });
+
+// Update callbackManager
+Settings.callbackManager = new CallbackManager({
+  onRetrieve: ({ query, nodes }) => {
+    console.log(`Retrieved ${nodes.length} nodes for query: ${query}`);
+  },
+});
+
+export async function createIndex() {
   // set up vector store index with two vector stores, one for text, the other for images
   const storageContext = await storageContextFromDefaults({
     persistDir: "storage",
@@ -21,36 +31,21 @@ export async function createIndex(serviceContext: ServiceContext) {
   return await VectorStoreIndex.init({
     nodes: [],
     storageContext,
-    serviceContext,
   });
 }
 
 async function main() {
-  let images: ImageType[] = [];
-  const callbackManager = new CallbackManager({
-    onRetrieve: ({ query, nodes }) => {
-      images = nodes
-        .filter(({ node }: NodeWithScore) => node instanceof ImageDocument)
-        .map(({ node }: NodeWithScore) => (node as ImageDocument).image);
-    },
-  });
-  const llm = new OpenAI({ model: "gpt-4-vision-preview", maxTokens: 512 });
-  const serviceContext = serviceContextFromDefaults({
-    llm,
-    chunkSize: 512,
-    chunkOverlap: 20,
-  });
-  const index = await createIndex(serviceContext);
+  const images: ImageType[] = [];
+
+  const index = await createIndex();
 
   const queryEngine = index.asQueryEngine({
-    responseSynthesizer: new MultiModalResponseSynthesizer({ serviceContext }),
+    responseSynthesizer: new MultiModalResponseSynthesizer(),
     retriever: index.asRetriever({ similarityTopK: 3, imageSimilarityTopK: 1 }),
   });
-  const result = await runWithCallbackManager(callbackManager, () =>
-    queryEngine.query({
-      query: "Tell me more about Vincent van Gogh's famous paintings",
-    }),
-  );
+  const result = await queryEngine.query({
+    query: "Tell me more about Vincent van Gogh's famous paintings",
+  });
   console.log(result.response, "\n");
   images.forEach((image) =>
     console.log(`Image retrieved and used in inference: ${image.toString()}`),
diff --git a/examples/multimodal/retrieve.ts b/examples/multimodal/retrieve.ts
index 8e0e2242e..7c5bf2f85 100644
--- a/examples/multimodal/retrieve.ts
+++ b/examples/multimodal/retrieve.ts
@@ -1,17 +1,17 @@
 import {
   ImageNode,
-  serviceContextFromDefaults,
-  storageContextFromDefaults,
+  Settings,
   TextNode,
   VectorStoreIndex,
+  storageContextFromDefaults,
 } from "llamaindex";
 
+// Update chunk size and overlap
+Settings.chunkSize = 512;
+Settings.chunkOverlap = 20;
+
 export async function createIndex() {
   // set up vector store index with two vector stores, one for text, the other for images
-  const serviceContext = serviceContextFromDefaults({
-    chunkSize: 512,
-    chunkOverlap: 20,
-  });
   const storageContext = await storageContextFromDefaults({
     persistDir: "storage",
     storeImages: true,
@@ -19,7 +19,6 @@ export async function createIndex() {
   return await VectorStoreIndex.init({
     nodes: [],
     storageContext,
-    serviceContext,
   });
 }
 
diff --git a/examples/pg-vector-store/query.ts b/examples/pg-vector-store/query.ts
index c46f46c9e..96d6ed9bd 100755
--- a/examples/pg-vector-store/query.ts
+++ b/examples/pg-vector-store/query.ts
@@ -1,8 +1,4 @@
-import {
-  PGVectorStore,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
+import { PGVectorStore, VectorStoreIndex } from "llamaindex";
 
 async function main() {
   const readline = require("readline").createInterface({
@@ -15,8 +11,7 @@ async function main() {
     // Optional - set your collection name, default is no filter on this field.
     // pgvs.setCollection();
 
-    const ctx = serviceContextFromDefaults();
-    const index = await VectorStoreIndex.fromVectorStore(pgvs, ctx);
+    const index = await VectorStoreIndex.fromVectorStore(pgvs);
 
     // Query the index
     const queryEngine = await index.asQueryEngine();
diff --git a/examples/pinecone-vector-store/query.ts b/examples/pinecone-vector-store/query.ts
index f0ee4b3c5..5a5d089d6 100755
--- a/examples/pinecone-vector-store/query.ts
+++ b/examples/pinecone-vector-store/query.ts
@@ -1,8 +1,4 @@
-import {
-  PineconeVectorStore,
-  VectorStoreIndex,
-  serviceContextFromDefaults,
-} from "llamaindex";
+import { PineconeVectorStore, VectorStoreIndex } from "llamaindex";
 
 async function main() {
   const readline = require("readline").createInterface({
@@ -13,8 +9,7 @@ async function main() {
   try {
     const pcvs = new PineconeVectorStore();
 
-    const ctx = serviceContextFromDefaults();
-    const index = await VectorStoreIndex.fromVectorStore(pcvs, ctx);
+    const index = await VectorStoreIndex.fromVectorStore(pcvs);
 
     // Query the index
     const queryEngine = await index.asQueryEngine();
diff --git a/examples/prompts/promptMixin.ts b/examples/prompts/promptMixin.ts
index d0b940796..bd60fd375 100644
--- a/examples/prompts/promptMixin.ts
+++ b/examples/prompts/promptMixin.ts
@@ -4,7 +4,6 @@ import {
   TreeSummarize,
   TreeSummarizePrompt,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
 
 const treeSummarizePrompt: TreeSummarizePrompt = ({ context, query }) => {
@@ -27,10 +26,8 @@ async function main() {
 
   const query = "The quick brown fox jumps over the lazy dog";
 
-  const ctx = serviceContextFromDefaults({});
-
   const responseSynthesizer = new ResponseSynthesizer({
-    responseBuilder: new TreeSummarize(ctx),
+    responseBuilder: new TreeSummarize(),
   });
 
   const queryEngine = index.asQueryEngine({
diff --git a/examples/qdrantdb/preFilters.ts b/examples/qdrantdb/preFilters.ts
index 8963b54f2..0cc105a52 100644
--- a/examples/qdrantdb/preFilters.ts
+++ b/examples/qdrantdb/preFilters.ts
@@ -4,12 +4,21 @@ import {
   Document,
   MetadataMode,
   QdrantVectorStore,
+  Settings,
   VectorStoreIndex,
-  runWithCallbackManager,
-  serviceContextFromDefaults,
   storageContextFromDefaults,
 } from "llamaindex";
 
+// Update callback manager
+Settings.callbackManager = new CallbackManager({
+  onRetrieve: (data) => {
+    console.log(
+      "The retrieved nodes are:",
+      data.nodes.map((node) => node.node.getContent(MetadataMode.NONE)),
+    );
+  },
+});
+
 // Load environment variables from local .env file
 dotenv.config();
 
@@ -37,21 +46,9 @@ async function main() {
     const ctx = await storageContextFromDefaults({ vectorStore: qdrantVs });
 
     console.log("Embedding documents and adding to index");
-    const index = await runWithCallbackManager(
-      new CallbackManager({
-        onRetrieve: (data) => {
-          console.log(
-            "The retrieved nodes are:",
-            data.nodes.map((node) => node.node.getContent(MetadataMode.NONE)),
-          );
-        },
-      }),
-      () =>
-        VectorStoreIndex.fromDocuments(docs, {
-          storageContext: ctx,
-          serviceContext: serviceContextFromDefaults(),
-        }),
-    );
+    const index = await VectorStoreIndex.fromDocuments(docs, {
+      storageContext: ctx,
+    });
 
     console.log(
       "Querying index with no filters: Expected output: Brown probably",
diff --git a/examples/readers/src/csv.ts b/examples/readers/src/csv.ts
index 812401295..6d9f6d901 100644
--- a/examples/readers/src/csv.ts
+++ b/examples/readers/src/csv.ts
@@ -2,25 +2,21 @@ import {
   CompactAndRefine,
   OpenAI,
   ResponseSynthesizer,
-  serviceContextFromDefaults,
+  Settings,
   VectorStoreIndex,
 } from "llamaindex";
 import { PapaCSVReader } from "llamaindex/readers/CSVReader";
 
+Settings.llm = new OpenAI({ model: "gpt-4" });
+
 async function main() {
   // Load CSV
   const reader = new PapaCSVReader();
   const path = "../data/titanic_train.csv";
   const documents = await reader.loadData(path);
 
-  const serviceContext = serviceContextFromDefaults({
-    llm: new OpenAI({ model: "gpt-4" }),
-  });
-
   // Split text and create embeddings. Store them in a VectorStoreIndex
-  const index = await VectorStoreIndex.fromDocuments(documents, {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments(documents);
 
   const csvPrompt = ({ context = "", query = "" }) => {
     return `The following CSV file is loaded from ${path}
@@ -32,7 +28,7 @@ Given the CSV file, generate me Typescript code to answer the question: ${query}
   };
 
   const responseSynthesizer = new ResponseSynthesizer({
-    responseBuilder: new CompactAndRefine(serviceContext, csvPrompt),
+    responseBuilder: new CompactAndRefine(undefined, csvPrompt),
   });
 
   const queryEngine = index.asQueryEngine({ responseSynthesizer });
diff --git a/examples/readers/src/pdf_fw.ts b/examples/readers/src/pdf_fw.ts
index d4a47e724..95b83a050 100644
--- a/examples/readers/src/pdf_fw.ts
+++ b/examples/readers/src/pdf_fw.ts
@@ -1,17 +1,15 @@
 import { FireworksEmbedding, FireworksLLM, VectorStoreIndex } from "llamaindex";
 import { PDFReader } from "llamaindex/readers/PDFReader";
 
-import { serviceContextFromDefaults } from "llamaindex";
+import { Settings } from "llamaindex";
 
-const embedModel = new FireworksEmbedding({
-  model: "nomic-ai/nomic-embed-text-v1.5",
-});
-
-const llm = new FireworksLLM({
+Settings.llm = new FireworksLLM({
   model: "accounts/fireworks/models/mixtral-8x7b-instruct",
 });
 
-const serviceContext = serviceContextFromDefaults({ llm, embedModel });
+Settings.embedModel = new FireworksEmbedding({
+  model: "nomic-ai/nomic-embed-text-v1.5",
+});
 
 async function main() {
   // Load PDF
@@ -19,9 +17,7 @@ async function main() {
   const documents = await reader.loadData("../data/brk-2022.pdf");
 
   // Split text and create embeddings. Store them in a VectorStoreIndex
-  const index = await VectorStoreIndex.fromDocuments(documents, {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments(documents);
 
   // Query the index
   const queryEngine = index.asQueryEngine();
diff --git a/examples/readers/src/pdf_fw_openai.ts b/examples/readers/src/pdf_fw_openai.ts
index 2910deb25..0d9fba659 100644
--- a/examples/readers/src/pdf_fw_openai.ts
+++ b/examples/readers/src/pdf_fw_openai.ts
@@ -1,30 +1,26 @@
 import { OpenAI, OpenAIEmbedding, VectorStoreIndex } from "llamaindex";
 import { PDFReader } from "llamaindex/readers/PDFReader";
 
-import { serviceContextFromDefaults } from "llamaindex";
+import { Settings } from "llamaindex";
 
-const embedModel = new OpenAIEmbedding({
-  model: "nomic-ai/nomic-embed-text-v1.5",
-});
+// Update llm and embedModel
 
-const llm = new OpenAI({
-  model: "accounts/fireworks/models/mixtral-8x7b-instruct",
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
+Settings.embedModel = new OpenAIEmbedding({
+  model: "nomic-ai/nomic-embed-text-v1.5",
 });
 
-const serviceContext = serviceContextFromDefaults({ llm, embedModel });
-
 async function main() {
   // Load PDF
   const reader = new PDFReader();
   const documents = await reader.loadData("../data/brk-2022.pdf");
 
   // Split text and create embeddings. Store them in a VectorStoreIndex
-  const index = await VectorStoreIndex.fromDocuments(documents, {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments(documents);
 
   // Query the index
   const queryEngine = index.asQueryEngine();
+
   const response = await queryEngine.query({
     query: "What mistakes did Warren E. Buffett make?",
   });
diff --git a/examples/readonly.ts b/examples/readonly.ts
index 180cb8c0f..e0a2cfb9b 100644
--- a/examples/readonly.ts
+++ b/examples/readonly.ts
@@ -1,16 +1,14 @@
 import { execSync } from "child_process";
 import {
   PDFReader,
-  serviceContextFromDefaults,
-  storageContextFromDefaults,
   VectorStoreIndex,
+  storageContextFromDefaults,
 } from "llamaindex";
 
 const STORAGE_DIR = "./cache";
 
 async function main() {
   // write the index to disk
-  const serviceContext = serviceContextFromDefaults({});
   const storageContext = await storageContextFromDefaults({
     persistDir: `${STORAGE_DIR}`,
   });
@@ -18,7 +16,6 @@ async function main() {
   const documents = await reader.loadData("data/brk-2022.pdf");
   await VectorStoreIndex.fromDocuments(documents, {
     storageContext,
-    serviceContext,
   });
   console.log("wrote index to disk - now trying to read it");
   // make index dir read only
@@ -29,7 +26,6 @@ async function main() {
   });
   await VectorStoreIndex.init({
     storageContext: readOnlyStorageContext,
-    serviceContext,
   });
   console.log("read only index successfully opened");
 }
diff --git a/examples/recipes/cost-analysis.ts b/examples/recipes/cost-analysis.ts
index 70bacc30a..9e5e5f999 100644
--- a/examples/recipes/cost-analysis.ts
+++ b/examples/recipes/cost-analysis.ts
@@ -1,5 +1,5 @@
 import { OpenAI } from "llamaindex";
-import { getCurrentCallbackManager } from "llamaindex/callbacks/CallbackManager";
+import { Settings } from "llamaindex/Settings";
 
 const llm = new OpenAI({
   model: "gpt-4-0125-preview",
@@ -7,8 +7,7 @@ const llm = new OpenAI({
 
 let tokenCount = 0;
 
-// @todo: use GlobalSetting in the future
-getCurrentCallbackManager().addHandlers("llm-start", (event) => {
+Settings.callbackManager.on("llm-start", (event) => {
   const { messages } = event.detail.payload;
   tokenCount += llm.tokens(messages);
   console.log("Token count:", tokenCount);
diff --git a/examples/rerankers/CohereReranker.ts b/examples/rerankers/CohereReranker.ts
index 4b34fa549..dba3ce336 100644
--- a/examples/rerankers/CohereReranker.ts
+++ b/examples/rerankers/CohereReranker.ts
@@ -2,22 +2,18 @@ import {
   CohereRerank,
   Document,
   OpenAI,
+  Settings,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
 
 import essay from "../essay";
 
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
+
 async function main() {
   const document = new Document({ text: essay, id_: "essay" });
 
-  const serviceContext = serviceContextFromDefaults({
-    llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }),
-  });
-
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   const retriever = index.asRetriever();
 
diff --git a/examples/routerQueryEngine.ts b/examples/routerQueryEngine.ts
index 57075f0db..d28f9e30d 100644
--- a/examples/routerQueryEngine.ts
+++ b/examples/routerQueryEngine.ts
@@ -1,38 +1,31 @@
 import {
   OpenAI,
   RouterQueryEngine,
+  Settings,
   SimpleDirectoryReader,
   SimpleNodeParser,
   SummaryIndex,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
 
+// Update llm
+Settings.llm = new OpenAI();
+
+// Update node parser
+Settings.nodeParser = new SimpleNodeParser({
+  chunkSize: 1024,
+});
+
 async function main() {
   // Load documents from a directory
   const documents = await new SimpleDirectoryReader().loadData({
     directoryPath: "node_modules/llamaindex/examples",
   });
 
-  // Parse the documents into nodes
-  const nodeParser = new SimpleNodeParser({
-    chunkSize: 1024,
-  });
-
-  // Create a service context
-  const serviceContext = serviceContextFromDefaults({
-    nodeParser,
-    llm: new OpenAI(),
-  });
-
   // Create indices
-  const vectorIndex = await VectorStoreIndex.fromDocuments(documents, {
-    serviceContext,
-  });
+  const vectorIndex = await VectorStoreIndex.fromDocuments(documents);
 
-  const summaryIndex = await SummaryIndex.fromDocuments(documents, {
-    serviceContext,
-  });
+  const summaryIndex = await SummaryIndex.fromDocuments(documents);
 
   // Create query engines
   const vectorQueryEngine = vectorIndex.asQueryEngine();
@@ -50,7 +43,6 @@ async function main() {
         description: "Useful for retrieving specific context from Abramov",
       },
     ],
-    serviceContext,
   });
 
   // Query the router query engine
diff --git a/examples/sentenceWindow.ts b/examples/sentenceWindow.ts
index fcb89d99d..cd470db34 100644
--- a/examples/sentenceWindow.ts
+++ b/examples/sentenceWindow.ts
@@ -3,27 +3,25 @@ import {
   HuggingFaceEmbedding,
   MetadataReplacementPostProcessor,
   SentenceWindowNodeParser,
+  Settings,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
+
 import essay from "./essay";
 
+// Update node parser and embed model
+Settings.nodeParser = new SentenceWindowNodeParser({
+  windowSize: 3,
+  windowMetadataKey: "window",
+  originalTextMetadataKey: "original_text",
+});
+Settings.embedModel = new HuggingFaceEmbedding();
+
 async function main() {
   const document = new Document({ text: essay, id_: "essay" });
 
-  // create service context with sentence window parser
-  // and local embedding from HuggingFace
-  const nodeParser = new SentenceWindowNodeParser({
-    windowSize: 3,
-    windowMetadataKey: "window",
-    originalTextMetadataKey: "original_text",
-  });
-  const embedModel = new HuggingFaceEmbedding();
-  const serviceContext = serviceContextFromDefaults({ nodeParser, embedModel });
-
   // Split text and create embeddings. Store them in a VectorStoreIndex
   const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
     logProgress: true,
   });
 
@@ -31,6 +29,7 @@ async function main() {
   const queryEngine = index.asQueryEngine({
     nodePostprocessors: [new MetadataReplacementPostProcessor("window")],
   });
+
   const response = await queryEngine.query({
     query: "What did the author do in college?",
   });
diff --git a/examples/summaryIndex.ts b/examples/summaryIndex.ts
index d11a47031..31710d41c 100644
--- a/examples/summaryIndex.ts
+++ b/examples/summaryIndex.ts
@@ -1,22 +1,21 @@
 import {
   Document,
+  Settings,
   SimpleNodeParser,
   SummaryIndex,
   SummaryRetrieverMode,
-  serviceContextFromDefaults,
 } from "llamaindex";
+
 import essay from "./essay";
 
+// Update node parser
+Settings.nodeParser = new SimpleNodeParser({
+  chunkSize: 40,
+});
+
 async function main() {
-  const serviceContext = serviceContextFromDefaults({
-    nodeParser: new SimpleNodeParser({
-      chunkSize: 40,
-    }),
-  });
   const document = new Document({ text: essay, id_: "essay" });
-  const index = await SummaryIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await SummaryIndex.fromDocuments([document]);
   const queryEngine = index.asQueryEngine({
     retriever: index.asRetriever({ mode: SummaryRetrieverMode.LLM }),
   });
diff --git a/examples/together-ai/vector-index.ts b/examples/together-ai/vector-index.ts
index 94b5c5762..001c3448e 100644
--- a/examples/together-ai/vector-index.ts
+++ b/examples/together-ai/vector-index.ts
@@ -2,12 +2,20 @@ import fs from "node:fs/promises";
 
 import {
   Document,
+  Settings,
   TogetherEmbedding,
   TogetherLLM,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
 
+// Update llm to use TogetherAI
+Settings.llm = new TogetherLLM({
+  model: "mistralai/Mixtral-8x7B-Instruct-v0.1",
+});
+
+// Update embedModel
+Settings.embedModel = new TogetherEmbedding();
+
 async function main() {
   const apiKey = process.env.TOGETHER_API_KEY;
   if (!apiKey) {
@@ -18,14 +26,7 @@ async function main() {
 
   const document = new Document({ text: essay, id_: path });
 
-  const serviceContext = serviceContextFromDefaults({
-    llm: new TogetherLLM({ model: "mistralai/Mixtral-8x7B-Instruct-v0.1" }),
-    embedModel: new TogetherEmbedding(),
-  });
-
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   const queryEngine = index.asQueryEngine();
 
diff --git a/examples/vectorIndexAnthropic.ts b/examples/vectorIndexAnthropic.ts
index c04c516c7..5b10cdb40 100644
--- a/examples/vectorIndexAnthropic.ts
+++ b/examples/vectorIndexAnthropic.ts
@@ -2,14 +2,17 @@ import fs from "node:fs/promises";
 
 import {
   Anthropic,
-  anthropicTextQaPrompt,
   CompactAndRefine,
   Document,
   ResponseSynthesizer,
-  serviceContextFromDefaults,
+  Settings,
   VectorStoreIndex,
+  anthropicTextQaPrompt,
 } from "llamaindex";
 
+// Update llm to use Anthropic
+Settings.llm = new Anthropic();
+
 async function main() {
   // Load essay from abramov.txt in Node
   const path = "node_modules/llamaindex/examples/abramov.txt";
@@ -20,18 +23,11 @@ async function main() {
   const document = new Document({ text: essay, id_: path });
 
   // Split text and create embeddings. Store them in a VectorStoreIndex
-  const serviceContext = serviceContextFromDefaults({ llm: new Anthropic() });
-
   const responseSynthesizer = new ResponseSynthesizer({
-    responseBuilder: new CompactAndRefine(
-      serviceContext,
-      anthropicTextQaPrompt,
-    ),
+    responseBuilder: new CompactAndRefine(undefined, anthropicTextQaPrompt),
   });
 
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // Query the index
   const queryEngine = index.asQueryEngine({ responseSynthesizer });
diff --git a/examples/vectorIndexCustomize.ts b/examples/vectorIndexCustomize.ts
index e9013a0e6..6902a2bd7 100644
--- a/examples/vectorIndexCustomize.ts
+++ b/examples/vectorIndexCustomize.ts
@@ -2,23 +2,21 @@ import {
   Document,
   OpenAI,
   RetrieverQueryEngine,
-  serviceContextFromDefaults,
+  Settings,
   SimilarityPostprocessor,
   VectorStoreIndex,
 } from "llamaindex";
+
 import essay from "./essay";
 
+// Update llm to use OpenAI
+Settings.llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
+
 // Customize retrieval and query args
 async function main() {
   const document = new Document({ text: essay, id_: "essay" });
 
-  const serviceContext = serviceContextFromDefaults({
-    llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }),
-  });
-
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   const retriever = index.asRetriever();
   retriever.similarityTopK = 5;
diff --git a/examples/vectorIndexEmbed3.ts b/examples/vectorIndexEmbed3.ts
index 1d6c52b90..da0507682 100644
--- a/examples/vectorIndexEmbed3.ts
+++ b/examples/vectorIndexEmbed3.ts
@@ -3,10 +3,16 @@ import fs from "node:fs/promises";
 import {
   Document,
   OpenAIEmbedding,
+  Settings,
   VectorStoreIndex,
-  serviceContextFromDefaults,
 } from "llamaindex";
 
+// Update embed model
+Settings.embedModel = new OpenAIEmbedding({
+  model: "text-embedding-3-large",
+  dimensions: 1024,
+});
+
 async function main() {
   // Load essay from abramov.txt in Node
   const path = "node_modules/llamaindex/examples/abramov.txt";
@@ -16,17 +22,8 @@ async function main() {
   // Create Document object with essay
   const document = new Document({ text: essay, id_: path });
 
-  // Create service context and specify text-embedding-3-large
-  const embedModel = new OpenAIEmbedding({
-    model: "text-embedding-3-large",
-    dimensions: 1024,
-  });
-  const serviceContext = serviceContextFromDefaults({ embedModel });
-
   // Split text and create embeddings. Store them in a VectorStoreIndex
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // Query the index
   const queryEngine = index.asQueryEngine();
diff --git a/examples/vectorIndexFromVectorStore.ts b/examples/vectorIndexFromVectorStore.ts
index d539ae2ad..dde1edddf 100644
--- a/examples/vectorIndexFromVectorStore.ts
+++ b/examples/vectorIndexFromVectorStore.ts
@@ -2,7 +2,7 @@ import {
   OpenAI,
   ResponseSynthesizer,
   RetrieverQueryEngine,
-  serviceContextFromDefaults,
+  Settings,
   TextNode,
   TreeSummarize,
   VectorIndexRetriever,
@@ -14,6 +14,12 @@ import {
 
 import { Index, Pinecone, RecordMetadata } from "@pinecone-database/pinecone";
 
+// Update llm
+Settings.llm = new OpenAI({
+  model: "gpt-4",
+  apiKey: process.env.OPENAI_API_KEY,
+});
+
 /**
  * Please do not use this class in production; it's only for demonstration purposes.
  */
@@ -146,25 +152,11 @@ async function main() {
     });
   };
 
-  const getServiceContext = () => {
-    const openAI = new OpenAI({
-      model: "gpt-4",
-      apiKey: process.env.OPENAI_API_KEY,
-    });
-
-    return serviceContextFromDefaults({
-      llm: openAI,
-    });
-  };
-
   const getQueryEngine = async (filter: unknown) => {
     const vectorStore = await getPineconeVectorStore();
-    const serviceContext = getServiceContext();
 
-    const vectorStoreIndex = await VectorStoreIndex.fromVectorStore(
-      vectorStore,
-      serviceContext,
-    );
+    const vectorStoreIndex =
+      await VectorStoreIndex.fromVectorStore(vectorStore);
 
     const retriever = new VectorIndexRetriever({
       index: vectorStoreIndex,
@@ -172,8 +164,7 @@ async function main() {
     });
 
     const responseSynthesizer = new ResponseSynthesizer({
-      serviceContext,
-      responseBuilder: new TreeSummarize(serviceContext),
+      responseBuilder: new TreeSummarize(),
     });
 
     return new RetrieverQueryEngine(retriever, responseSynthesizer, {
diff --git a/examples/vectorIndexGPT4.ts b/examples/vectorIndexGPT4.ts
index ed1ed20fc..dab35809e 100644
--- a/examples/vectorIndexGPT4.ts
+++ b/examples/vectorIndexGPT4.ts
@@ -1,11 +1,8 @@
 import fs from "node:fs/promises";
 
-import {
-  Document,
-  OpenAI,
-  serviceContextFromDefaults,
-  VectorStoreIndex,
-} from "llamaindex";
+import { Document, OpenAI, Settings, VectorStoreIndex } from "llamaindex";
+
+Settings.llm = new OpenAI({ model: "gpt-4" });
 
 async function main() {
   // Load essay from abramov.txt in Node
@@ -15,13 +12,7 @@ async function main() {
   // Create Document object with essay
   const document = new Document({ text: essay, id_: path });
 
-  // Split text and create embeddings. Store them in a VectorStoreIndex
-  const serviceContext = serviceContextFromDefaults({
-    llm: new OpenAI({ model: "gpt-4" }),
-  });
-  const index = await VectorStoreIndex.fromDocuments([document], {
-    serviceContext,
-  });
+  const index = await VectorStoreIndex.fromDocuments([document]);
 
   // Query the index
   const queryEngine = index.asQueryEngine();
diff --git a/packages/core/package.json b/packages/core/package.json
index 1261452d1..472d8061f 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -68,6 +68,10 @@
         "default": "./dist/cjs/index.js"
       }
     },
+    "./internal/*": {
+      "import": "./dist/not-allow.js",
+      "require": "./dist/cjs/not-allow.js"
+    },
     "./*": {
       "import": {
         "types": "./dist/type/*.d.ts",
diff --git a/packages/core/src/Retriever.ts b/packages/core/src/Retriever.ts
index b38125f32..b8b942b55 100644
--- a/packages/core/src/Retriever.ts
+++ b/packages/core/src/Retriever.ts
@@ -16,5 +16,7 @@ export type RetrieveParams = {
  */
 export interface BaseRetriever {
   retrieve(params: RetrieveParams): Promise<NodeWithScore[]>;
-  getServiceContext(): ServiceContext;
+
+  // to be deprecated soon
+  serviceContext?: ServiceContext;
 }
diff --git a/packages/core/src/ServiceContext.ts b/packages/core/src/ServiceContext.ts
index 61ce3f8b3..3b4ffbbad 100644
--- a/packages/core/src/ServiceContext.ts
+++ b/packages/core/src/ServiceContext.ts
@@ -1,8 +1,8 @@
 import { PromptHelper } from "./PromptHelper.js";
 import { OpenAIEmbedding } from "./embeddings/OpenAIEmbedding.js";
 import type { BaseEmbedding } from "./embeddings/types.js";
-import type { LLM } from "./llm/index.js";
-import { OpenAI } from "./llm/index.js";
+import { OpenAI } from "./llm/LLM.js";
+import type { LLM } from "./llm/types.js";
 import { SimpleNodeParser } from "./nodeParsers/SimpleNodeParser.js";
 import type { NodeParser } from "./nodeParsers/types.js";
 
diff --git a/packages/core/src/Settings.ts b/packages/core/src/Settings.ts
new file mode 100644
index 000000000..7d29a2aa3
--- /dev/null
+++ b/packages/core/src/Settings.ts
@@ -0,0 +1,215 @@
+import { CallbackManager } from "./callbacks/CallbackManager.js";
+import { OpenAIEmbedding } from "./embeddings/OpenAIEmbedding.js";
+import { OpenAI } from "./llm/LLM.js";
+
+import { PromptHelper } from "./PromptHelper.js";
+import { SimpleNodeParser } from "./nodeParsers/SimpleNodeParser.js";
+
+import { AsyncLocalStorage } from "@llamaindex/env";
+import type { ServiceContext } from "./ServiceContext.js";
+import type { BaseEmbedding } from "./embeddings/types.js";
+import {
+  getCallbackManager,
+  setCallbackManager,
+  withCallbackManager,
+} from "./internal/settings/CallbackManager.js";
+import type { LLM } from "./llm/types.js";
+import type { NodeParser } from "./nodeParsers/types.js";
+
+export type PromptConfig = {
+  llm?: string;
+  lang?: string;
+};
+
+export interface Config {
+  prompt: PromptConfig;
+  llm: LLM | null;
+  promptHelper: PromptHelper | null;
+  embedModel: BaseEmbedding | null;
+  nodeParser: NodeParser | null;
+  callbackManager: CallbackManager | null;
+  chunkSize?: number;
+  chunkOverlap?: number;
+}
+
+/**
+ * @internal
+ */
+class GlobalSettings implements Config {
+  #prompt: PromptConfig = {};
+  #llm: LLM | null = null;
+  #promptHelper: PromptHelper | null = null;
+  #embedModel: BaseEmbedding | null = null;
+  #nodeParser: NodeParser | null = null;
+  #chunkSize?: number;
+  #chunkOverlap?: number;
+
+  #llmAsyncLocalStorage = new AsyncLocalStorage<LLM>();
+  #promptHelperAsyncLocalStorage = new AsyncLocalStorage<PromptHelper>();
+  #embedModelAsyncLocalStorage = new AsyncLocalStorage<BaseEmbedding>();
+  #nodeParserAsyncLocalStorage = new AsyncLocalStorage<NodeParser>();
+  #chunkSizeAsyncLocalStorage = new AsyncLocalStorage<number>();
+  #chunkOverlapAsyncLocalStorage = new AsyncLocalStorage<number>();
+  #promptAsyncLocalStorage = new AsyncLocalStorage<PromptConfig>();
+
+  get llm(): LLM {
+    if (this.#llm === null) {
+      this.#llm = new OpenAI();
+    }
+
+    return this.#llmAsyncLocalStorage.getStore() ?? this.#llm;
+  }
+
+  set llm(llm: LLM) {
+    this.#llm = llm;
+  }
+
+  withLLM<Result>(llm: LLM, fn: () => Result): Result {
+    return this.#llmAsyncLocalStorage.run(llm, fn);
+  }
+
+  get promptHelper(): PromptHelper {
+    if (this.#promptHelper === null) {
+      this.#promptHelper = new PromptHelper();
+    }
+
+    return this.#promptHelperAsyncLocalStorage.getStore() ?? this.#promptHelper;
+  }
+
+  set promptHelper(promptHelper: PromptHelper) {
+    this.#promptHelper = promptHelper;
+  }
+
+  withPromptHelper<Result>(
+    promptHelper: PromptHelper,
+    fn: () => Result,
+  ): Result {
+    return this.#promptHelperAsyncLocalStorage.run(promptHelper, fn);
+  }
+
+  get embedModel(): BaseEmbedding {
+    if (this.#embedModel === null) {
+      this.#embedModel = new OpenAIEmbedding();
+    }
+
+    return this.#embedModelAsyncLocalStorage.getStore() ?? this.#embedModel;
+  }
+
+  set embedModel(embedModel: BaseEmbedding) {
+    this.#embedModel = embedModel;
+  }
+
+  withEmbedModel<Result>(embedModel: BaseEmbedding, fn: () => Result): Result {
+    return this.#embedModelAsyncLocalStorage.run(embedModel, fn);
+  }
+
+  get nodeParser(): NodeParser {
+    if (this.#nodeParser === null) {
+      this.#nodeParser = new SimpleNodeParser({
+        chunkSize: this.#chunkSize,
+        chunkOverlap: this.#chunkOverlap,
+      });
+    }
+
+    return this.#nodeParserAsyncLocalStorage.getStore() ?? this.#nodeParser;
+  }
+
+  set nodeParser(nodeParser: NodeParser) {
+    this.#nodeParser = nodeParser;
+  }
+
+  withNodeParser<Result>(nodeParser: NodeParser, fn: () => Result): Result {
+    return this.#nodeParserAsyncLocalStorage.run(nodeParser, fn);
+  }
+
+  get callbackManager(): CallbackManager {
+    return getCallbackManager();
+  }
+
+  set callbackManager(callbackManager: CallbackManager) {
+    setCallbackManager(callbackManager);
+  }
+
+  withCallbackManager<Result>(
+    callbackManager: CallbackManager,
+    fn: () => Result,
+  ): Result {
+    return withCallbackManager(callbackManager, fn);
+  }
+
+  set chunkSize(chunkSize: number | undefined) {
+    this.#chunkSize = chunkSize;
+  }
+
+  get chunkSize(): number | undefined {
+    return this.#chunkSizeAsyncLocalStorage.getStore() ?? this.#chunkSize;
+  }
+
+  withChunkSize<Result>(chunkSize: number, fn: () => Result): Result {
+    return this.#chunkSizeAsyncLocalStorage.run(chunkSize, fn);
+  }
+
+  get chunkOverlap(): number | undefined {
+    return this.#chunkOverlapAsyncLocalStorage.getStore() ?? this.#chunkOverlap;
+  }
+
+  set chunkOverlap(chunkOverlap: number | undefined) {
+    this.#chunkOverlap = chunkOverlap;
+  }
+
+  withChunkOverlap<Result>(chunkOverlap: number, fn: () => Result): Result {
+    return this.#chunkOverlapAsyncLocalStorage.run(chunkOverlap, fn);
+  }
+
+  get prompt(): PromptConfig {
+    return this.#promptAsyncLocalStorage.getStore() ?? this.#prompt;
+  }
+
+  set prompt(prompt: PromptConfig) {
+    this.#prompt = prompt;
+  }
+
+  withPrompt<Result>(prompt: PromptConfig, fn: () => Result): Result {
+    return this.#promptAsyncLocalStorage.run(prompt, fn);
+  }
+}
+
+export const llmFromSettingsOrContext = (serviceContext?: ServiceContext) => {
+  if (serviceContext?.llm) {
+    return serviceContext.llm;
+  }
+
+  return Settings.llm;
+};
+
+export const nodeParserFromSettingsOrContext = (
+  serviceContext?: ServiceContext,
+) => {
+  if (serviceContext?.nodeParser) {
+    return serviceContext.nodeParser;
+  }
+
+  return Settings.nodeParser;
+};
+
+export const embedModelFromSettingsOrContext = (
+  serviceContext?: ServiceContext,
+) => {
+  if (serviceContext?.embedModel) {
+    return serviceContext.embedModel;
+  }
+
+  return Settings.embedModel;
+};
+
+export const promptHelperFromSettingsOrContext = (
+  serviceContext?: ServiceContext,
+) => {
+  if (serviceContext?.promptHelper) {
+    return serviceContext.promptHelper;
+  }
+
+  return Settings.promptHelper;
+};
+
+export const Settings = new GlobalSettings();
diff --git a/packages/core/src/callbacks/CallbackManager.ts b/packages/core/src/callbacks/CallbackManager.ts
index f9436006e..cf38f49e9 100644
--- a/packages/core/src/callbacks/CallbackManager.ts
+++ b/packages/core/src/callbacks/CallbackManager.ts
@@ -1,5 +1,5 @@
 import type { Anthropic } from "@anthropic-ai/sdk";
-import { AsyncLocalStorage, CustomEvent } from "@llamaindex/env";
+import { CustomEvent } from "@llamaindex/env";
 import type { NodeWithScore } from "../Node.js";
 
 /**
@@ -135,30 +135,26 @@ export class CallbackManager implements CallbackManagerMethods {
    * @deprecated will be removed in the next major version
    */
   set onLLMStream(_: never) {
-    throw new Error(
-      "onLLMStream is deprecated. Use addHandlers('stream') instead",
-    );
+    throw new Error("onLLMStream is deprecated. Use on('stream') instead");
   }
 
   /**
    * @deprecated will be removed in the next major version
    */
   set onRetrieve(_: never) {
-    throw new Error(
-      "onRetrieve is deprecated. Use `addHandlers('retrieve')` instead",
-    );
+    throw new Error("onRetrieve is deprecated. Use `on('retrieve')` instead");
   }
 
   #handlers = new Map<keyof LlamaIndexEventMaps, EventHandler<CustomEvent>[]>();
 
   constructor(handlers?: Partial<CallbackManagerMethods>) {
     const onLLMStream = handlers?.onLLMStream ?? noop;
-    this.addHandlers("stream", (event) => onLLMStream(event.detail));
+    this.on("stream", (event) => onLLMStream(event.detail));
     const onRetrieve = handlers?.onRetrieve ?? noop;
-    this.addHandlers("retrieve", (event) => onRetrieve(event.detail));
+    this.on("retrieve", (event) => onRetrieve(event.detail));
   }
 
-  addHandlers<
+  on<
     K extends keyof LlamaIndexEventMaps,
     H extends EventHandler<LlamaIndexEventMaps[K]>,
   >(event: K, handler: H) {
@@ -169,7 +165,7 @@ export class CallbackManager implements CallbackManagerMethods {
     return this;
   }
 
-  removeHandlers<
+  off<
     K extends keyof LlamaIndexEventMaps,
     H extends EventHandler<LlamaIndexEventMaps[K]>,
   >(event: K, handler: H) {
@@ -195,21 +191,3 @@ export class CallbackManager implements CallbackManagerMethods {
     handlers.forEach((handler) => handler(new CustomEvent(event, { detail })));
   }
 }
-
-const defaultCallbackManager = new CallbackManager();
-const callbackAsyncLocalStorage = new AsyncLocalStorage<CallbackManager>();
-
-/**
- * Get the current callback manager
- * @default defaultCallbackManager if no callback manager is set
- */
-export function getCurrentCallbackManager() {
-  return callbackAsyncLocalStorage.getStore() ?? defaultCallbackManager;
-}
-
-export function runWithCallbackManager<Result>(
-  callbackManager: CallbackManager,
-  fn: () => Result,
-): Result {
-  return callbackAsyncLocalStorage.run(callbackManager, fn);
-}
diff --git a/packages/core/src/cloud/LlamaCloudRetriever.ts b/packages/core/src/cloud/LlamaCloudRetriever.ts
index 563ab4e15..0b4a53dc1 100644
--- a/packages/core/src/cloud/LlamaCloudRetriever.ts
+++ b/packages/core/src/cloud/LlamaCloudRetriever.ts
@@ -3,9 +3,7 @@ import { globalsHelper } from "../GlobalsHelper.js";
 import type { NodeWithScore } from "../Node.js";
 import { ObjectType, jsonToNode } from "../Node.js";
 import type { BaseRetriever, RetrieveParams } from "../Retriever.js";
-import type { ServiceContext } from "../ServiceContext.js";
-import { serviceContextFromDefaults } from "../ServiceContext.js";
-import { getCurrentCallbackManager } from "../callbacks/CallbackManager.js";
+import { Settings } from "../Settings.js";
 import type { ClientParams, CloudConstructorParams } from "./types.js";
 import { DEFAULT_PROJECT_NAME } from "./types.js";
 import { getClient } from "./utils.js";
@@ -21,7 +19,6 @@ export class LlamaCloudRetriever implements BaseRetriever {
   retrieveParams: CloudRetrieveParams;
   projectName: string = DEFAULT_PROJECT_NAME;
   pipelineName: string;
-  serviceContext: ServiceContext;
 
   private resultNodesToNodeWithScore(
     nodes: PlatformApi.TextNodeWithScore[],
@@ -45,7 +42,6 @@ export class LlamaCloudRetriever implements BaseRetriever {
     if (params.projectName) {
       this.projectName = params.projectName;
     }
-    this.serviceContext = params.serviceContext ?? serviceContextFromDefaults();
   }
 
   private async getClient(): Promise<PlatformApiClient> {
@@ -81,7 +77,7 @@ export class LlamaCloudRetriever implements BaseRetriever {
 
     const nodes = this.resultNodesToNodeWithScore(results.retrievalNodes);
 
-    getCurrentCallbackManager().onRetrieve({
+    Settings.callbackManager.onRetrieve({
       query,
       nodes,
       event: globalsHelper.createEvent({
@@ -92,8 +88,4 @@ export class LlamaCloudRetriever implements BaseRetriever {
 
     return nodes;
   }
-
-  getServiceContext(): ServiceContext {
-    return this.serviceContext;
-  }
 }
diff --git a/packages/core/src/engines/chat/CondenseQuestionChatEngine.ts b/packages/core/src/engines/chat/CondenseQuestionChatEngine.ts
index 7baf58d73..cd66a44fc 100644
--- a/packages/core/src/engines/chat/CondenseQuestionChatEngine.ts
+++ b/packages/core/src/engines/chat/CondenseQuestionChatEngine.ts
@@ -7,7 +7,7 @@ import {
 } from "../../Prompt.js";
 import type { Response } from "../../Response.js";
 import type { ServiceContext } from "../../ServiceContext.js";
-import { serviceContextFromDefaults } from "../../ServiceContext.js";
+import { llmFromSettingsOrContext } from "../../Settings.js";
 import type { ChatMessage, LLM } from "../../llm/index.js";
 import { extractText, streamReducer } from "../../llm/utils.js";
 import { PromptMixin } from "../../prompts/index.js";
@@ -48,7 +48,7 @@ export class CondenseQuestionChatEngine
 
     this.queryEngine = init.queryEngine;
     this.chatHistory = getHistory(init?.chatHistory);
-    this.llm = init?.serviceContext?.llm ?? serviceContextFromDefaults().llm;
+    this.llm = llmFromSettingsOrContext(init?.serviceContext);
     this.condenseMessagePrompt =
       init?.condenseMessagePrompt ?? defaultCondenseQuestionPrompt;
   }
diff --git a/packages/core/src/engines/query/RetrieverQueryEngine.ts b/packages/core/src/engines/query/RetrieverQueryEngine.ts
index 540243dab..d7cc96a54 100644
--- a/packages/core/src/engines/query/RetrieverQueryEngine.ts
+++ b/packages/core/src/engines/query/RetrieverQueryEngine.ts
@@ -2,7 +2,6 @@ import { randomUUID } from "@llamaindex/env";
 import type { NodeWithScore } from "../../Node.js";
 import type { Response } from "../../Response.js";
 import type { BaseRetriever } from "../../Retriever.js";
-import type { ServiceContext } from "../../ServiceContext.js";
 import type { Event } from "../../callbacks/CallbackManager.js";
 import type { BaseNodePostprocessor } from "../../postprocessors/index.js";
 import { PromptMixin } from "../../prompts/Mixin.js";
@@ -35,10 +34,11 @@ export class RetrieverQueryEngine
     super();
 
     this.retriever = retriever;
-    const serviceContext: ServiceContext | undefined =
-      this.retriever.getServiceContext();
     this.responseSynthesizer =
-      responseSynthesizer || new ResponseSynthesizer({ serviceContext });
+      responseSynthesizer ||
+      new ResponseSynthesizer({
+        serviceContext: retriever.serviceContext,
+      });
     this.preFilters = preFilters;
     this.nodePostprocessors = nodePostprocessors || [];
   }
diff --git a/packages/core/src/engines/query/RouterQueryEngine.ts b/packages/core/src/engines/query/RouterQueryEngine.ts
index bfe24362b..6c26ba4f0 100644
--- a/packages/core/src/engines/query/RouterQueryEngine.ts
+++ b/packages/core/src/engines/query/RouterQueryEngine.ts
@@ -1,7 +1,7 @@
 import type { BaseNode } from "../../Node.js";
 import { Response } from "../../Response.js";
 import type { ServiceContext } from "../../ServiceContext.js";
-import { serviceContextFromDefaults } from "../../ServiceContext.js";
+import { llmFromSettingsOrContext } from "../../Settings.js";
 import { PromptMixin } from "../../prompts/index.js";
 import type { BaseSelector } from "../../selectors/index.js";
 import { LLMSingleSelector } from "../../selectors/index.js";
@@ -55,8 +55,6 @@ async function combineResponses(
  * A query engine that uses multiple query engines and selects the best one.
  */
 export class RouterQueryEngine extends PromptMixin implements BaseQueryEngine {
-  serviceContext: ServiceContext;
-
   private selector: BaseSelector;
   private queryEngines: BaseQueryEngine[];
   private metadatas: RouterQueryEngineMetadata[];
@@ -72,13 +70,12 @@ export class RouterQueryEngine extends PromptMixin implements BaseQueryEngine {
   }) {
     super();
 
-    this.serviceContext = init.serviceContext || serviceContextFromDefaults({});
     this.selector = init.selector;
     this.queryEngines = init.queryEngineTools.map((tool) => tool.queryEngine);
     this.metadatas = init.queryEngineTools.map((tool) => ({
       description: tool.description,
     }));
-    this.summarizer = init.summarizer || new TreeSummarize(this.serviceContext);
+    this.summarizer = init.summarizer || new TreeSummarize(init.serviceContext);
     this.verbose = init.verbose ?? false;
   }
 
@@ -96,12 +93,14 @@ export class RouterQueryEngine extends PromptMixin implements BaseQueryEngine {
     summarizer?: TreeSummarize;
     verbose?: boolean;
   }) {
-    const serviceContext =
-      init.serviceContext ?? serviceContextFromDefaults({});
+    const serviceContext = init.serviceContext;
 
     return new RouterQueryEngine({
       selector:
-        init.selector ?? new LLMSingleSelector({ llm: serviceContext.llm }),
+        init.selector ??
+        new LLMSingleSelector({
+          llm: llmFromSettingsOrContext(serviceContext),
+        }),
       queryEngineTools: init.queryEngineTools,
       serviceContext,
       summarizer: init.summarizer,
diff --git a/packages/core/src/engines/query/SubQuestionQueryEngine.ts b/packages/core/src/engines/query/SubQuestionQueryEngine.ts
index 5d0333fe2..00ff68b60 100644
--- a/packages/core/src/engines/query/SubQuestionQueryEngine.ts
+++ b/packages/core/src/engines/query/SubQuestionQueryEngine.ts
@@ -4,7 +4,6 @@ import { TextNode } from "../../Node.js";
 import { LLMQuestionGenerator } from "../../QuestionGenerator.js";
 import type { Response } from "../../Response.js";
 import type { ServiceContext } from "../../ServiceContext.js";
-import { serviceContextFromDefaults } from "../../ServiceContext.js";
 import type { Event } from "../../callbacks/CallbackManager.js";
 import { PromptMixin } from "../../prompts/Mixin.js";
 import type { BaseSynthesizer } from "../../synthesizers/index.js";
@@ -62,8 +61,7 @@ export class SubQuestionQueryEngine
     responseSynthesizer?: BaseSynthesizer;
     serviceContext?: ServiceContext;
   }) {
-    const serviceContext =
-      init.serviceContext ?? serviceContextFromDefaults({});
+    const serviceContext = init.serviceContext;
 
     const questionGen = init.questionGen ?? new LLMQuestionGenerator();
     const responseSynthesizer =
diff --git a/packages/core/src/evaluation/Correctness.ts b/packages/core/src/evaluation/Correctness.ts
index 8004e93a2..1354e83f9 100644
--- a/packages/core/src/evaluation/Correctness.ts
+++ b/packages/core/src/evaluation/Correctness.ts
@@ -1,7 +1,7 @@
 import { MetadataMode } from "../Node.js";
 import type { ServiceContext } from "../ServiceContext.js";
-import { serviceContextFromDefaults } from "../ServiceContext.js";
-import type { ChatMessage } from "../llm/types.js";
+import { llmFromSettingsOrContext } from "../Settings.js";
+import type { ChatMessage, LLM } from "../llm/types.js";
 import { PromptMixin } from "../prompts/Mixin.js";
 import type { CorrectnessSystemPrompt } from "./prompts.js";
 import {
@@ -24,20 +24,20 @@ type CorrectnessParams = {
 
 /** Correctness Evaluator */
 export class CorrectnessEvaluator extends PromptMixin implements BaseEvaluator {
-  private serviceContext: ServiceContext;
   private scoreThreshold: number;
   private parserFunction: (str: string) => [number, string];
+  private llm: LLM;
 
   private correctnessPrompt: CorrectnessSystemPrompt =
     defaultCorrectnessSystemPrompt;
 
-  constructor(params: CorrectnessParams) {
+  constructor(params?: CorrectnessParams) {
     super();
 
-    this.serviceContext = params.serviceContext || serviceContextFromDefaults();
+    this.llm = llmFromSettingsOrContext(params?.serviceContext);
     this.correctnessPrompt = defaultCorrectnessSystemPrompt;
-    this.scoreThreshold = params.scoreThreshold || 4.0;
-    this.parserFunction = params.parserFunction || defaultEvaluationParser;
+    this.scoreThreshold = params?.scoreThreshold ?? 4.0;
+    this.parserFunction = params?.parserFunction ?? defaultEvaluationParser;
   }
 
   _updatePrompts(prompts: {
@@ -80,7 +80,7 @@ export class CorrectnessEvaluator extends PromptMixin implements BaseEvaluator {
       },
     ];
 
-    const evalResponse = await this.serviceContext.llm.chat({
+    const evalResponse = await this.llm.chat({
       messages,
     });
 
diff --git a/packages/core/src/evaluation/Faithfulness.ts b/packages/core/src/evaluation/Faithfulness.ts
index 3c7d69ef5..a4b512dd7 100644
--- a/packages/core/src/evaluation/Faithfulness.ts
+++ b/packages/core/src/evaluation/Faithfulness.ts
@@ -1,6 +1,5 @@
 import { Document, MetadataMode } from "../Node.js";
 import type { ServiceContext } from "../ServiceContext.js";
-import { serviceContextFromDefaults } from "../ServiceContext.js";
 import { SummaryIndex } from "../indices/summary/index.js";
 import { PromptMixin } from "../prompts/Mixin.js";
 import type {
@@ -22,25 +21,25 @@ export class FaithfulnessEvaluator
   extends PromptMixin
   implements BaseEvaluator
 {
-  private serviceContext: ServiceContext;
+  private serviceContext?: ServiceContext;
   private raiseError: boolean;
   private evalTemplate: FaithfulnessTextQAPrompt;
   private refineTemplate: FaithfulnessRefinePrompt;
 
-  constructor(params: {
+  constructor(params?: {
     serviceContext?: ServiceContext;
     raiseError?: boolean;
     faithfulnessSystemPrompt?: FaithfulnessTextQAPrompt;
     faithFulnessRefinePrompt?: FaithfulnessRefinePrompt;
   }) {
     super();
-    this.serviceContext = params.serviceContext || serviceContextFromDefaults();
-    this.raiseError = params.raiseError || false;
+    this.serviceContext = params?.serviceContext;
+    this.raiseError = params?.raiseError ?? false;
 
     this.evalTemplate =
-      params.faithfulnessSystemPrompt || defaultFaithfulnessTextQaPrompt;
+      params?.faithfulnessSystemPrompt ?? defaultFaithfulnessTextQaPrompt;
     this.refineTemplate =
-      params.faithFulnessRefinePrompt || defaultFaithfulnessRefinePrompt;
+      params?.faithFulnessRefinePrompt ?? defaultFaithfulnessRefinePrompt;
   }
 
   protected _getPrompts(): { [x: string]: any } {
diff --git a/packages/core/src/evaluation/Relevancy.ts b/packages/core/src/evaluation/Relevancy.ts
index a7ed4992e..55a4506b2 100644
--- a/packages/core/src/evaluation/Relevancy.ts
+++ b/packages/core/src/evaluation/Relevancy.ts
@@ -1,6 +1,5 @@
 import { Document, MetadataMode } from "../Node.js";
 import type { ServiceContext } from "../ServiceContext.js";
-import { serviceContextFromDefaults } from "../ServiceContext.js";
 import { SummaryIndex } from "../indices/summary/index.js";
 import { PromptMixin } from "../prompts/Mixin.js";
 import type { RelevancyEvalPrompt, RelevancyRefinePrompt } from "./prompts.js";
@@ -23,19 +22,20 @@ type RelevancyParams = {
 };
 
 export class RelevancyEvaluator extends PromptMixin implements BaseEvaluator {
-  private serviceContext: ServiceContext;
+  private serviceContext?: ServiceContext;
   private raiseError: boolean;
 
   private evalTemplate: RelevancyEvalPrompt;
   private refineTemplate: RelevancyRefinePrompt;
 
-  constructor(params: RelevancyParams) {
+  constructor(params?: RelevancyParams) {
     super();
 
-    this.serviceContext = params.serviceContext ?? serviceContextFromDefaults();
-    this.raiseError = params.raiseError ?? false;
-    this.evalTemplate = params.evalTemplate ?? defaultRelevancyEvalPrompt;
-    this.refineTemplate = params.refineTemplate ?? defaultRelevancyRefinePrompt;
+    this.serviceContext = params?.serviceContext;
+    this.raiseError = params?.raiseError ?? false;
+    this.evalTemplate = params?.evalTemplate ?? defaultRelevancyEvalPrompt;
+    this.refineTemplate =
+      params?.refineTemplate ?? defaultRelevancyRefinePrompt;
   }
 
   _getPrompts() {
diff --git a/packages/core/src/index.edge.ts b/packages/core/src/index.edge.ts
index bbfed3743..d547430da 100644
--- a/packages/core/src/index.edge.ts
+++ b/packages/core/src/index.edge.ts
@@ -8,6 +8,7 @@ export * from "./QuestionGenerator.js";
 export * from "./Response.js";
 export * from "./Retriever.js";
 export * from "./ServiceContext.js";
+export { Settings } from "./Settings.js";
 export * from "./TextSplitter.js";
 export * from "./agent/index.js";
 export * from "./callbacks/CallbackManager.js";
diff --git a/packages/core/src/indices/BaseIndex.ts b/packages/core/src/indices/BaseIndex.ts
index 028b3bfaa..2cce62428 100644
--- a/packages/core/src/indices/BaseIndex.ts
+++ b/packages/core/src/indices/BaseIndex.ts
@@ -1,6 +1,7 @@
 import type { BaseNode, Document } from "../Node.js";
 import type { BaseRetriever } from "../Retriever.js";
 import type { ServiceContext } from "../ServiceContext.js";
+import { nodeParserFromSettingsOrContext } from "../Settings.js";
 import { runTransformations } from "../ingestion/IngestionPipeline.js";
 import type { StorageContext } from "../storage/StorageContext.js";
 import type { BaseDocumentStore } from "../storage/docStore/types.js";
@@ -15,6 +16,7 @@ import { IndexStructType } from "./json-to-index-struct.js";
 export class KeywordTable extends IndexStruct {
   table: Map<string, Set<string>> = new Map();
   type: IndexStructType = IndexStructType.KEYWORD_TABLE;
+
   addNode(keywords: string[], nodeId: string): void {
     keywords.forEach((keyword) => {
       if (!this.table.has(keyword)) {
@@ -42,7 +44,7 @@ export class KeywordTable extends IndexStruct {
 }
 
 export interface BaseIndexInit<T> {
-  serviceContext: ServiceContext;
+  serviceContext?: ServiceContext;
   storageContext: StorageContext;
   docStore: BaseDocumentStore;
   vectorStore?: VectorStore;
@@ -55,7 +57,7 @@ export interface BaseIndexInit<T> {
  * they can be retrieved for our queries.
  */
 export abstract class BaseIndex<T> {
-  serviceContext: ServiceContext;
+  serviceContext?: ServiceContext;
   storageContext: StorageContext;
   docStore: BaseDocumentStore;
   vectorStore?: VectorStore;
@@ -94,7 +96,7 @@ export abstract class BaseIndex<T> {
   async insert(document: Document) {
     const nodes = await runTransformations(
       [document],
-      [this.serviceContext.nodeParser],
+      [nodeParserFromSettingsOrContext(this.serviceContext)],
     );
     await this.insertNodes(nodes);
     this.docStore.setDocumentHash(document.id_, document.hash);
diff --git a/packages/core/src/indices/keyword/index.ts b/packages/core/src/indices/keyword/index.ts
index 61067f55f..3fedcef49 100644
--- a/packages/core/src/indices/keyword/index.ts
+++ b/packages/core/src/indices/keyword/index.ts
@@ -27,11 +27,15 @@ import {
   simpleExtractKeywords,
 } from "./utils.js";
 
+import { llmFromSettingsOrContext } from "../../Settings.js";
+import type { LLM } from "../../llm/types.js";
+
 export interface KeywordIndexOptions {
   nodes?: BaseNode[];
   indexStruct?: KeywordTable;
   indexId?: string;
   serviceContext?: ServiceContext;
+  llm?: LLM;
   storageContext?: StorageContext;
 }
 export enum KeywordTableRetrieverMode {
@@ -45,7 +49,7 @@ abstract class BaseKeywordTableRetriever implements BaseRetriever {
   protected index: KeywordTableIndex;
   protected indexStruct: KeywordTable;
   protected docstore: BaseDocumentStore;
-  protected serviceContext: ServiceContext;
+  protected llm: LLM;
 
   protected maxKeywordsPerQuery: number; // Maximum number of keywords to extract from query.
   protected numChunksPerQuery: number; // Maximum number of text chunks to query.
@@ -68,7 +72,7 @@ abstract class BaseKeywordTableRetriever implements BaseRetriever {
     this.index = index;
     this.indexStruct = index.indexStruct;
     this.docstore = index.docStore;
-    this.serviceContext = index.serviceContext;
+    this.llm = llmFromSettingsOrContext(index.serviceContext);
 
     this.maxKeywordsPerQuery = maxKeywordsPerQuery;
     this.numChunksPerQuery = numChunksPerQuery;
@@ -101,16 +105,12 @@ abstract class BaseKeywordTableRetriever implements BaseRetriever {
 
     return sortedNodes.map((node) => ({ node }));
   }
-
-  getServiceContext(): ServiceContext {
-    return this.index.serviceContext;
-  }
 }
 
 // Extracts keywords using LLMs.
 export class KeywordTableLLMRetriever extends BaseKeywordTableRetriever {
   async getKeywords(query: string): Promise<string[]> {
-    const response = await this.serviceContext.llm.complete({
+    const response = await this.llm.complete({
       prompt: this.queryKeywordExtractTemplate({
         question: query,
         maxKeywords: this.maxKeywordsPerQuery,
@@ -156,8 +156,7 @@ export class KeywordTableIndex extends BaseIndex<KeywordTable> {
   static async init(options: KeywordIndexOptions): Promise<KeywordTableIndex> {
     const storageContext =
       options.storageContext ?? (await storageContextFromDefaults({}));
-    const serviceContext =
-      options.serviceContext ?? serviceContextFromDefaults({});
+    const serviceContext = options.serviceContext;
     const { docStore, indexStore } = storageContext;
 
     // Setup IndexStruct from storage
@@ -247,13 +246,16 @@ export class KeywordTableIndex extends BaseIndex<KeywordTable> {
 
   static async extractKeywords(
     text: string,
-    serviceContext: ServiceContext,
+    serviceContext?: ServiceContext,
   ): Promise<Set<string>> {
-    const response = await serviceContext.llm.complete({
+    const llm = llmFromSettingsOrContext(serviceContext);
+
+    const response = await llm.complete({
       prompt: defaultKeywordExtractPrompt({
         context: text,
       }),
     });
+
     return extractKeywordsGivenResponse(response.text, "KEYWORDS:");
   }
 
@@ -300,7 +302,7 @@ export class KeywordTableIndex extends BaseIndex<KeywordTable> {
   static async buildIndexFromNodes(
     nodes: BaseNode[],
     docStore: BaseDocumentStore,
-    serviceContext: ServiceContext,
+    serviceContext?: ServiceContext,
   ): Promise<KeywordTable> {
     const indexStruct = new KeywordTable();
     await docStore.addDocuments(nodes, true);
diff --git a/packages/core/src/indices/summary/index.ts b/packages/core/src/indices/summary/index.ts
index a948327b7..1eef0ebe5 100644
--- a/packages/core/src/indices/summary/index.ts
+++ b/packages/core/src/indices/summary/index.ts
@@ -5,8 +5,11 @@ import type { ChoiceSelectPrompt } from "../../Prompt.js";
 import { defaultChoiceSelectPrompt } from "../../Prompt.js";
 import type { BaseRetriever, RetrieveParams } from "../../Retriever.js";
 import type { ServiceContext } from "../../ServiceContext.js";
-import { serviceContextFromDefaults } from "../../ServiceContext.js";
-import { getCurrentCallbackManager } from "../../callbacks/CallbackManager.js";
+import {
+  Settings,
+  llmFromSettingsOrContext,
+  nodeParserFromSettingsOrContext,
+} from "../../Settings.js";
 import { RetrieverQueryEngine } from "../../engines/query/index.js";
 import type { BaseNodePostprocessor } from "../../postprocessors/index.js";
 import type { StorageContext } from "../../storage/StorageContext.js";
@@ -58,8 +61,7 @@ export class SummaryIndex extends BaseIndex<IndexList> {
   static async init(options: SummaryIndexOptions): Promise<SummaryIndex> {
     const storageContext =
       options.storageContext ?? (await storageContextFromDefaults({}));
-    const serviceContext =
-      options.serviceContext ?? serviceContextFromDefaults({});
+    const serviceContext = options.serviceContext;
     const { docStore, indexStore } = storageContext;
 
     // Setup IndexStruct from storage
@@ -130,7 +132,7 @@ export class SummaryIndex extends BaseIndex<IndexList> {
   ): Promise<SummaryIndex> {
     let { storageContext, serviceContext } = args;
     storageContext = storageContext ?? (await storageContextFromDefaults({}));
-    serviceContext = serviceContext ?? serviceContextFromDefaults({});
+    serviceContext = serviceContext;
     const docStore = storageContext.docStore;
 
     docStore.addDocuments(documents, true);
@@ -138,7 +140,11 @@ export class SummaryIndex extends BaseIndex<IndexList> {
       docStore.setDocumentHash(doc.id_, doc.hash);
     }
 
-    const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents);
+    const nodes =
+      nodeParserFromSettingsOrContext(serviceContext).getNodesFromDocuments(
+        documents,
+      );
+
     const index = await SummaryIndex.init({
       nodes,
       storageContext,
@@ -292,7 +298,7 @@ export class SummaryIndexRetriever implements BaseRetriever {
       score: 1,
     }));
 
-    getCurrentCallbackManager().onRetrieve({
+    Settings.callbackManager.onRetrieve({
       query,
       nodes: result,
       event: globalsHelper.createEvent({
@@ -303,10 +309,6 @@ export class SummaryIndexRetriever implements BaseRetriever {
 
     return result;
   }
-
-  getServiceContext(): ServiceContext {
-    return this.index.serviceContext;
-  }
 }
 
 /**
@@ -318,7 +320,7 @@ export class SummaryIndexLLMRetriever implements BaseRetriever {
   choiceBatchSize: number;
   formatNodeBatchFn: NodeFormatterFunction;
   parseChoiceSelectAnswerFn: ChoiceSelectParserFunction;
-  serviceContext: ServiceContext;
+  serviceContext?: ServiceContext;
 
   // eslint-disable-next-line max-params
   constructor(
@@ -351,8 +353,11 @@ export class SummaryIndexLLMRetriever implements BaseRetriever {
 
       const fmtBatchStr = this.formatNodeBatchFn(nodesBatch);
       const input = { context: fmtBatchStr, query: query };
+
+      const llm = llmFromSettingsOrContext(this.serviceContext);
+
       const rawResponse = (
-        await this.serviceContext.llm.complete({
+        await llm.complete({
           prompt: this.choiceSelectPrompt(input),
         })
       ).text;
@@ -375,7 +380,7 @@ export class SummaryIndexLLMRetriever implements BaseRetriever {
       results.push(...nodeWithScores);
     }
 
-    getCurrentCallbackManager().onRetrieve({
+    Settings.callbackManager.onRetrieve({
       query,
       nodes: results,
       event: globalsHelper.createEvent({
@@ -386,10 +391,6 @@ export class SummaryIndexLLMRetriever implements BaseRetriever {
 
     return results;
   }
-
-  getServiceContext(): ServiceContext {
-    return this.serviceContext;
-  }
 }
 
 // Legacy
diff --git a/packages/core/src/indices/vectorStore/index.ts b/packages/core/src/indices/vectorStore/index.ts
index 7aebb45c2..06937a094 100644
--- a/packages/core/src/indices/vectorStore/index.ts
+++ b/packages/core/src/indices/vectorStore/index.ts
@@ -13,11 +13,12 @@ import {
 } from "../../Node.js";
 import type { BaseRetriever, RetrieveParams } from "../../Retriever.js";
 import type { ServiceContext } from "../../ServiceContext.js";
-import { serviceContextFromDefaults } from "../../ServiceContext.js";
 import {
-  getCurrentCallbackManager,
-  type Event,
-} from "../../callbacks/CallbackManager.js";
+  Settings,
+  embedModelFromSettingsOrContext,
+  nodeParserFromSettingsOrContext,
+} from "../../Settings.js";
+import { type Event } from "../../callbacks/CallbackManager.js";
 import { DEFAULT_SIMILARITY_TOP_K } from "../../constants.js";
 import type {
   BaseEmbedding,
@@ -79,7 +80,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
     super(init);
     this.indexStore = init.indexStore;
     this.vectorStore = init.vectorStore ?? init.storageContext.vectorStore;
-    this.embedModel = init.serviceContext.embedModel;
+    this.embedModel = embedModelFromSettingsOrContext(init.serviceContext);
     this.imageVectorStore =
       init.imageVectorStore ?? init.storageContext.imageVectorStore;
     if (this.imageVectorStore) {
@@ -97,8 +98,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
   ): Promise<VectorStoreIndex> {
     const storageContext =
       options.storageContext ?? (await storageContextFromDefaults({}));
-    const serviceContext =
-      options.serviceContext ?? serviceContextFromDefaults({});
+    const serviceContext = options.serviceContext;
     const indexStore = storageContext.indexStore;
     const docStore = storageContext.docStore;
 
@@ -222,7 +222,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
         : DocStoreStrategy.DUPLICATES_ONLY);
     args.storageContext =
       args.storageContext ?? (await storageContextFromDefaults({}));
-    args.serviceContext = args.serviceContext ?? serviceContextFromDefaults({});
+    args.serviceContext = args.serviceContext;
     const docStore = args.storageContext.docStore;
 
     if (args.logProgress) {
@@ -237,7 +237,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
     );
     args.nodes = await runTransformations(
       documents,
-      [args.serviceContext.nodeParser],
+      [nodeParserFromSettingsOrContext(args.serviceContext)],
       {},
       { docStoreStrategy },
     );
@@ -249,7 +249,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
 
   static async fromVectorStore(
     vectorStore: VectorStore,
-    serviceContext: ServiceContext,
+    serviceContext?: ServiceContext,
     imageVectorStore?: VectorStore,
   ) {
     if (!vectorStore.storesText) {
@@ -424,7 +424,8 @@ export class VectorIndexRetriever implements BaseRetriever {
   index: VectorStoreIndex;
   similarityTopK: number;
   imageSimilarityTopK: number;
-  private serviceContext: ServiceContext;
+
+  serviceContext?: ServiceContext;
 
   constructor({
     index,
@@ -491,7 +492,7 @@ export class VectorIndexRetriever implements BaseRetriever {
     nodesWithScores: NodeWithScore<Metadata>[],
     parentEvent: Event | undefined,
   ) {
-    getCurrentCallbackManager().onRetrieve({
+    Settings.callbackManager.onRetrieve({
       query,
       nodes: nodesWithScores,
       event: globalsHelper.createEvent({
@@ -540,8 +541,4 @@ export class VectorIndexRetriever implements BaseRetriever {
 
     return nodesWithScores;
   }
-
-  getServiceContext(): ServiceContext {
-    return this.serviceContext;
-  }
 }
diff --git a/packages/core/src/internal/settings/CallbackManager.ts b/packages/core/src/internal/settings/CallbackManager.ts
new file mode 100644
index 000000000..55660a14b
--- /dev/null
+++ b/packages/core/src/internal/settings/CallbackManager.ts
@@ -0,0 +1,25 @@
+import { AsyncLocalStorage } from "@llamaindex/env";
+import { CallbackManager } from "../../callbacks/CallbackManager.js";
+
+const callbackManagerAsyncLocalStorage =
+  new AsyncLocalStorage<CallbackManager>();
+let globalCallbackManager: CallbackManager | null = null;
+
+export function getCallbackManager(): CallbackManager {
+  if (globalCallbackManager === null) {
+    globalCallbackManager = new CallbackManager();
+  }
+
+  return callbackManagerAsyncLocalStorage.getStore() ?? globalCallbackManager;
+}
+
+export function setCallbackManager(callbackManager: CallbackManager) {
+  globalCallbackManager = callbackManager;
+}
+
+export function withCallbackManager<Result>(
+  callbackManager: CallbackManager,
+  fn: () => Result,
+): Result {
+  return callbackManagerAsyncLocalStorage.run(callbackManager, fn);
+}
diff --git a/packages/core/src/llm/LLM.ts b/packages/core/src/llm/LLM.ts
index 6970cab33..8d20169c3 100644
--- a/packages/core/src/llm/LLM.ts
+++ b/packages/core/src/llm/LLM.ts
@@ -1,7 +1,6 @@
 import type OpenAILLM from "openai";
 import type { ClientOptions as OpenAIClientOptions } from "openai";
 import {
-  getCurrentCallbackManager,
   type Event,
   type EventType,
   type OpenAIStreamToken,
@@ -11,6 +10,7 @@ import {
 import type { ChatCompletionMessageParam } from "openai/resources/index.js";
 import type { LLMOptions } from "portkey-ai";
 import { Tokenizers, globalsHelper } from "../GlobalsHelper.js";
+import { getCallbackManager } from "../internal/settings/CallbackManager.js";
 import type { AnthropicSession } from "./anthropic.js";
 import { getAnthropicSession } from "./anthropic.js";
 import type { AzureOpenAIConfig } from "./azure.js";
@@ -290,7 +290,7 @@ export class OpenAI extends BaseLLM {
     };
 
     //Now let's wrap our stream in a callback
-    const onLLMStream = getCurrentCallbackManager().onLLMStream;
+    const onLLMStream = getCallbackManager().onLLMStream;
 
     const chunk_stream: AsyncIterable<OpenAIStreamToken> =
       await this.session.openai.chat.completions.create({
@@ -835,7 +835,7 @@ export class Portkey extends BaseLLM {
     params?: Record<string, any>,
   ): AsyncIterable<ChatResponseChunk> {
     // Wrapping the stream in a callback.
-    const onLLMStream = getCurrentCallbackManager().onLLMStream;
+    const onLLMStream = getCallbackManager().onLLMStream;
 
     const chunkStream = await this.session.portkey.chatCompletions.create({
       messages,
diff --git a/packages/core/src/llm/mistral.ts b/packages/core/src/llm/mistral.ts
index 526543c2b..7ee798030 100644
--- a/packages/core/src/llm/mistral.ts
+++ b/packages/core/src/llm/mistral.ts
@@ -1,6 +1,6 @@
 import { getEnv } from "@llamaindex/env";
+import { Settings } from "../Settings.js";
 import {
-  getCurrentCallbackManager,
   type Event,
   type EventType,
   type StreamCallbackResponse,
@@ -123,7 +123,7 @@ export class MistralAI extends BaseLLM {
     parentEvent,
   }: LLMChatParamsStreaming): AsyncIterable<ChatResponseChunk> {
     //Now let's wrap our stream in a callback
-    const onLLMStream = getCurrentCallbackManager().onLLMStream;
+    const onLLMStream = Settings.callbackManager.onLLMStream;
 
     const client = await this.session.getClient();
     const chunkStream = await client.chatStream(this.buildParams(messages));
diff --git a/packages/core/src/llm/utils.ts b/packages/core/src/llm/utils.ts
index 3438c8bd4..ffc40e0cb 100644
--- a/packages/core/src/llm/utils.ts
+++ b/packages/core/src/llm/utils.ts
@@ -1,4 +1,4 @@
-import { getCurrentCallbackManager } from "../callbacks/CallbackManager.js";
+import { getCallbackManager } from "../internal/settings/CallbackManager.js";
 import type { ChatResponse, LLM, LLMChat, MessageContent } from "./types.js";
 
 export async function* streamConverter<S, D>(
@@ -55,7 +55,7 @@ export function llmEvent(
     this: LLM,
     ...params: Parameters<LLMChat["chat"]>
   ): ReturnType<LLMChat["chat"]> {
-    getCurrentCallbackManager().dispatchEvent("llm-start", {
+    getCallbackManager().dispatchEvent("llm-start", {
       payload: {
         messages: params[0].messages,
       },
@@ -82,14 +82,14 @@ export function llmEvent(
           }
           yield chunk;
         }
-        getCurrentCallbackManager().dispatchEvent("llm-end", {
+        getCallbackManager().dispatchEvent("llm-end", {
           payload: {
             response: finalResponse,
           },
         });
       };
     } else {
-      getCurrentCallbackManager().dispatchEvent("llm-end", {
+      getCallbackManager().dispatchEvent("llm-end", {
         payload: {
           response,
         },
diff --git a/packages/core/src/not-allow.js b/packages/core/src/not-allow.js
new file mode 100644
index 000000000..21f6251ac
--- /dev/null
+++ b/packages/core/src/not-allow.js
@@ -0,0 +1 @@
+throw new Error("Not allowed to import internal modules directly.");
diff --git a/packages/core/src/selectors/utils.ts b/packages/core/src/selectors/utils.ts
index 084ec0267..a4db900d8 100644
--- a/packages/core/src/selectors/utils.ts
+++ b/packages/core/src/selectors/utils.ts
@@ -1,4 +1,5 @@
 import type { ServiceContext } from "../ServiceContext.js";
+import { llmFromSettingsOrContext } from "../Settings.js";
 import type { BaseSelector } from "./base.js";
 import { LLMMultiSelector, LLMSingleSelector } from "./llmSelectors.js";
 
@@ -8,7 +9,7 @@ export const getSelectorFromContext = (
 ): BaseSelector => {
   let selector: BaseSelector | null = null;
 
-  const llm = serviceContext.llm;
+  const llm = llmFromSettingsOrContext(serviceContext);
 
   if (isMulti) {
     selector = new LLMMultiSelector({ llm });
diff --git a/packages/core/src/synthesizers/MultiModalResponseSynthesizer.ts b/packages/core/src/synthesizers/MultiModalResponseSynthesizer.ts
index a6454c134..857cbd9a7 100644
--- a/packages/core/src/synthesizers/MultiModalResponseSynthesizer.ts
+++ b/packages/core/src/synthesizers/MultiModalResponseSynthesizer.ts
@@ -2,7 +2,7 @@ import type { ImageNode } from "../Node.js";
 import { MetadataMode, splitNodesByType } from "../Node.js";
 import { Response } from "../Response.js";
 import type { ServiceContext } from "../ServiceContext.js";
-import { serviceContextFromDefaults } from "../ServiceContext.js";
+import { llmFromSettingsOrContext } from "../Settings.js";
 import { imageToDataUrl } from "../embeddings/index.js";
 import type { MessageContentDetail } from "../llm/types.js";
 import { PromptMixin } from "../prompts/Mixin.js";
@@ -18,7 +18,7 @@ export class MultiModalResponseSynthesizer
   extends PromptMixin
   implements BaseSynthesizer
 {
-  serviceContext: ServiceContext;
+  serviceContext?: ServiceContext;
   metadataMode: MetadataMode;
   textQATemplate: TextQaPrompt;
 
@@ -29,7 +29,7 @@ export class MultiModalResponseSynthesizer
   }: Partial<MultiModalResponseSynthesizer> = {}) {
     super();
 
-    this.serviceContext = serviceContext ?? serviceContextFromDefaults();
+    this.serviceContext = serviceContext;
     this.metadataMode = metadataMode ?? MetadataMode.NONE;
     this.textQATemplate = textQATemplate ?? defaultTextQaPrompt;
   }
@@ -85,10 +85,14 @@ export class MultiModalResponseSynthesizer
       { type: "text", text: textPrompt },
       ...images,
     ];
-    const response = await this.serviceContext.llm.complete({
+
+    const llm = llmFromSettingsOrContext(this.serviceContext);
+
+    const response = await llm.complete({
       prompt,
       parentEvent,
     });
+
     return new Response(response.text, nodes);
   }
 }
diff --git a/packages/core/src/synthesizers/ResponseSynthesizer.ts b/packages/core/src/synthesizers/ResponseSynthesizer.ts
index 6f5663295..b837a8867 100644
--- a/packages/core/src/synthesizers/ResponseSynthesizer.ts
+++ b/packages/core/src/synthesizers/ResponseSynthesizer.ts
@@ -1,7 +1,6 @@
 import { MetadataMode } from "../Node.js";
 import { Response } from "../Response.js";
 import type { ServiceContext } from "../ServiceContext.js";
-import { serviceContextFromDefaults } from "../ServiceContext.js";
 import { streamConverter } from "../llm/utils.js";
 import { PromptMixin } from "../prompts/Mixin.js";
 import type { ResponseBuilderPrompts } from "./builders.js";
@@ -21,7 +20,6 @@ export class ResponseSynthesizer
   implements BaseSynthesizer
 {
   responseBuilder: ResponseBuilder;
-  serviceContext: ServiceContext;
   metadataMode: MetadataMode;
 
   constructor({
@@ -35,9 +33,8 @@ export class ResponseSynthesizer
   } = {}) {
     super();
 
-    this.serviceContext = serviceContext ?? serviceContextFromDefaults();
     this.responseBuilder =
-      responseBuilder ?? getResponseBuilder(this.serviceContext);
+      responseBuilder ?? getResponseBuilder(serviceContext);
     this.metadataMode = metadataMode;
   }
 
diff --git a/packages/core/src/synthesizers/builders.ts b/packages/core/src/synthesizers/builders.ts
index 49e1105ad..25aa70ddc 100644
--- a/packages/core/src/synthesizers/builders.ts
+++ b/packages/core/src/synthesizers/builders.ts
@@ -16,6 +16,10 @@ import type { PromptHelper } from "../PromptHelper.js";
 import { getBiggestPrompt } from "../PromptHelper.js";
 import { PromptMixin } from "../prompts/Mixin.js";
 import type { ServiceContext } from "../ServiceContext.js";
+import {
+  llmFromSettingsOrContext,
+  promptHelperFromSettingsOrContext,
+} from "../Settings.js";
 import type {
   ResponseBuilder,
   ResponseBuilderParamsNonStreaming,
@@ -39,8 +43,8 @@ export class SimpleResponseBuilder implements ResponseBuilder {
   llm: LLM;
   textQATemplate: TextQaPrompt;
 
-  constructor(serviceContext: ServiceContext, textQATemplate?: TextQaPrompt) {
-    this.llm = serviceContext.llm;
+  constructor(serviceContext?: ServiceContext, textQATemplate?: TextQaPrompt) {
+    this.llm = llmFromSettingsOrContext(serviceContext);
     this.textQATemplate = textQATemplate ?? defaultTextQaPrompt;
   }
 
@@ -84,14 +88,14 @@ export class Refine extends PromptMixin implements ResponseBuilder {
   refineTemplate: RefinePrompt;
 
   constructor(
-    serviceContext: ServiceContext,
+    serviceContext?: ServiceContext,
     textQATemplate?: TextQaPrompt,
     refineTemplate?: RefinePrompt,
   ) {
     super();
 
-    this.llm = serviceContext.llm;
-    this.promptHelper = serviceContext.promptHelper;
+    this.llm = llmFromSettingsOrContext(serviceContext);
+    this.promptHelper = promptHelperFromSettingsOrContext(serviceContext);
     this.textQATemplate = textQATemplate ?? defaultTextQaPrompt;
     this.refineTemplate = refineTemplate ?? defaultRefinePrompt;
   }
@@ -293,13 +297,13 @@ export class TreeSummarize extends PromptMixin implements ResponseBuilder {
   summaryTemplate: TreeSummarizePrompt;
 
   constructor(
-    serviceContext: ServiceContext,
+    serviceContext?: ServiceContext,
     summaryTemplate?: TreeSummarizePrompt,
   ) {
     super();
 
-    this.llm = serviceContext.llm;
-    this.promptHelper = serviceContext.promptHelper;
+    this.llm = llmFromSettingsOrContext(serviceContext);
+    this.promptHelper = promptHelperFromSettingsOrContext(serviceContext);
     this.summaryTemplate = summaryTemplate ?? defaultTreeSummarizePrompt;
   }
 
@@ -383,7 +387,7 @@ export class TreeSummarize extends PromptMixin implements ResponseBuilder {
 }
 
 export function getResponseBuilder(
-  serviceContext: ServiceContext,
+  serviceContext?: ServiceContext,
   responseMode?: ResponseMode,
 ): ResponseBuilder {
   switch (responseMode) {
diff --git a/packages/core/tests/CallbackManager.test.ts b/packages/core/tests/CallbackManager.test.ts
index a03ec543b..461a18bc0 100644
--- a/packages/core/tests/CallbackManager.test.ts
+++ b/packages/core/tests/CallbackManager.test.ts
@@ -11,14 +11,12 @@ import {
 import { Document } from "llamaindex/Node";
 import type { ServiceContext } from "llamaindex/ServiceContext";
 import { serviceContextFromDefaults } from "llamaindex/ServiceContext";
+import { Settings } from "llamaindex/Settings";
 import type {
   RetrievalCallbackResponse,
   StreamCallbackResponse,
 } from "llamaindex/callbacks/CallbackManager";
-import {
-  CallbackManager,
-  runWithCallbackManager,
-} from "llamaindex/callbacks/CallbackManager";
+import { CallbackManager } from "llamaindex/callbacks/CallbackManager";
 import { OpenAIEmbedding } from "llamaindex/embeddings/index";
 import { SummaryIndex } from "llamaindex/indices/summary/index";
 import { VectorStoreIndex } from "llamaindex/indices/vectorStore/index";
@@ -83,7 +81,7 @@ describe("CallbackManager: onLLMStream and onRetrieve", () => {
     });
     const queryEngine = vectorStoreIndex.asQueryEngine();
     const query = "What is the author's name?";
-    const response = await runWithCallbackManager(callbackManager, () => {
+    const response = await Settings.withCallbackManager(callbackManager, () => {
       return queryEngine.query({ query });
     });
 
@@ -164,8 +162,9 @@ describe("CallbackManager: onLLMStream and onRetrieve", () => {
       responseSynthesizer,
     });
     const query = "What is the author's name?";
-    const response = await runWithCallbackManager(callbackManager, async () =>
-      queryEngine.query({ query }),
+    const response = await Settings.withCallbackManager(
+      callbackManager,
+      async () => queryEngine.query({ query }),
     );
     expect(response.toString()).toBe("MOCK_TOKEN_1-MOCK_TOKEN_2");
     expect(streamCallbackData).toEqual([
diff --git a/packages/experimental/examples/jsonQueryEngine.ts b/packages/experimental/examples/jsonQueryEngine.ts
index 539093258..2757e60f6 100644
--- a/packages/experimental/examples/jsonQueryEngine.ts
+++ b/packages/experimental/examples/jsonQueryEngine.ts
@@ -1,6 +1,9 @@
 import { JSONQueryEngine } from "@llamaindex/experimental";
 
-import { OpenAI, serviceContextFromDefaults } from "llamaindex";
+import { OpenAI, Settings } from "llamaindex";
+
+// Update LLM
+Settings.llm = new OpenAI({ model: "gpt-4" });
 
 const jsonValue = {
   blogPosts: [
@@ -84,22 +87,14 @@ const jsonSchema = {
 };
 
 async function main() {
-  const llm = new OpenAI({ model: "gpt-4" });
-
-  const serviceContext = serviceContextFromDefaults({
-    llm,
-  });
-
   const jsonQueryEngine = new JSONQueryEngine({
     jsonValue,
     jsonSchema,
-    serviceContext,
   });
 
   const rawQueryEngine = new JSONQueryEngine({
     jsonValue,
     jsonSchema,
-    serviceContext,
     synthesizeResponse: false,
   });
 
diff --git a/tsconfig.json b/tsconfig.json
index 7b3adfd10..84821fc77 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -8,7 +8,6 @@
     "forceConsistentCasingInFileNames": true,
     "strict": true,
     "skipLibCheck": true,
-    "stripInternal": true,
     "outDir": "./lib",
     "tsBuildInfoFile": "./lib/.tsbuildinfo",
     "incremental": true,
-- 
GitLab