diff --git a/benchmarks/embeddings/bench_embeddings.py b/benchmarks/embeddings/bench_embeddings.py
index 1d0320ecb5778a5b9ef44129b769c91c6ed4dd1d..1ea5ea45bbab31ec7ea9a399619c654d7aca264c 100644
--- a/benchmarks/embeddings/bench_embeddings.py
+++ b/benchmarks/embeddings/bench_embeddings.py
@@ -5,8 +5,8 @@ from typing import Callable, List, Optional, Tuple
 import pandas as pd
 
 from llama_index import SimpleDirectoryReader
+from llama_index.core.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 from llama_index.embeddings import OpenAIEmbedding, resolve_embed_model
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 
 
 def generate_strings(num_strings: int = 100, string_length: int = 10) -> List[str]:
diff --git a/benchmarks/struct_indices/spider/evaluate.py b/benchmarks/struct_indices/spider/evaluate.py
index a914d1a02619779b24457a594ea1870b17f76120..3d65777b651b4f827cc7b5fd9e0b91a55153c802 100644
--- a/benchmarks/struct_indices/spider/evaluate.py
+++ b/benchmarks/struct_indices/spider/evaluate.py
@@ -9,10 +9,10 @@ from typing import Dict, List, Optional
 from spider_utils import create_indexes, load_examples
 from tqdm import tqdm
 
+from llama_index.core.llms.types import ChatMessage, MessageRole
+from llama_index.core.response.schema import Response
 from llama_index.indices.struct_store.sql import SQLQueryMode, SQLStructStoreIndex
 from llama_index.llms.openai import OpenAI
-from llama_index.llms.types import ChatMessage, MessageRole
-from llama_index.response.schema import Response
 
 logging.getLogger("root").setLevel(logging.WARNING)
 
diff --git a/docs/api_reference/response.rst b/docs/api_reference/response.rst
index c5e98aca71473281ca4dad073700edb4be52e33b..78b33f0414ab83dd8ee9b5cf49726271bdf5e63e 100644
--- a/docs/api_reference/response.rst
+++ b/docs/api_reference/response.rst
@@ -3,6 +3,6 @@
 Response
 =================
 
-.. automodule:: llama_index.response.schema
+.. automodule:: llama_index.core.response.schema
    :members:
    :inherited-members:
diff --git a/docs/examples/query_transformations/query_transform_cookbook.ipynb b/docs/examples/query_transformations/query_transform_cookbook.ipynb
index 5155306bc781a0f692fb3779d1094ac4b27fb9c1..7fc0cf0c44c8e1dd2237d3b99684458c7e9a8bff 100644
--- a/docs/examples/query_transformations/query_transform_cookbook.ipynb
+++ b/docs/examples/query_transformations/query_transform_cookbook.ipynb
@@ -600,7 +600,7 @@
     "from llama_index.agent.react.formatter import ReActChatFormatter\n",
     "from llama_index.agent.react.output_parser import ReActOutputParser\n",
     "from llama_index.tools import FunctionTool\n",
-    "from llama_index.llms.types import ChatMessage"
+    "from llama_index.core.llms.types import ChatMessage"
    ]
   },
   {
diff --git a/llama_index/__init__.py b/llama_index/__init__.py
index 77340926418baf0fcf9e339849642b575d2448e6..0fb73b1917eda6a3761244049015586d3a3e407d 100644
--- a/llama_index/__init__.py
+++ b/llama_index/__init__.py
@@ -11,6 +11,9 @@ from typing import Callable, Optional
 
 # import global eval handler
 from llama_index.callbacks.global_handlers import set_global_handler
+
+# response
+from llama_index.core.response.schema import Response
 from llama_index.data_structs.struct_type import IndexStructType
 
 # embeddings
@@ -63,9 +66,6 @@ from llama_index.prompts import (
 )
 from llama_index.readers import SimpleDirectoryReader, download_loader
 
-# response
-from llama_index.response.schema import Response
-
 # Response Synthesizer
 from llama_index.response_synthesizers.factory import get_response_synthesizer
 from llama_index.schema import Document, QueryBundle
diff --git a/llama_index/agent/legacy/context_retriever_agent.py b/llama_index/agent/legacy/context_retriever_agent.py
index 1636e31a63da177005f678c84abdf1996a7c91b3..2a8c2c0315aaca3dbe703ba973aad2a42f2c722a 100644
--- a/llama_index/agent/legacy/context_retriever_agent.py
+++ b/llama_index/agent/legacy/context_retriever_agent.py
@@ -11,11 +11,11 @@ from llama_index.callbacks import CallbackManager
 from llama_index.chat_engine.types import (
     AgentChatResponse,
 )
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.llm import LLM
 from llama_index.llms.openai import OpenAI
 from llama_index.llms.openai_utils import is_function_calling_model
-from llama_index.llms.types import ChatMessage
 from llama_index.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.prompts import PromptTemplate
 from llama_index.schema import NodeWithScore
diff --git a/llama_index/agent/legacy/openai_agent.py b/llama_index/agent/legacy/openai_agent.py
index 28564c5842f1d8e2744bd583c8ad7fc6be58bed5..04de84966a5f76a68ab3379ec1c8c7f325096fb1 100644
--- a/llama_index/agent/legacy/openai_agent.py
+++ b/llama_index/agent/legacy/openai_agent.py
@@ -19,10 +19,10 @@ from llama_index.chat_engine.types import (
     ChatResponseMode,
     StreamingAgentChatResponse,
 )
+from llama_index.core.llms.types import ChatMessage, ChatResponse, MessageRole
 from llama_index.llms.llm import LLM
 from llama_index.llms.openai import OpenAI
 from llama_index.llms.openai_utils import OpenAIToolCall
-from llama_index.llms.types import ChatMessage, ChatResponse, MessageRole
 from llama_index.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.objects.base import ObjectRetriever
 from llama_index.tools import BaseTool, ToolOutput, adapt_to_async_tool
diff --git a/llama_index/agent/legacy/react/base.py b/llama_index/agent/legacy/react/base.py
index a86a1c4437c36cb89b422e4d7d23e5e9da1bd41d..e3a59727977f53fab985ff0c7fb5737881bd86bf 100644
--- a/llama_index/agent/legacy/react/base.py
+++ b/llama_index/agent/legacy/react/base.py
@@ -30,10 +30,10 @@ from llama_index.callbacks import (
     trace_method,
 )
 from llama_index.chat_engine.types import AgentChatResponse, StreamingAgentChatResponse
+from llama_index.core.llms.types import MessageRole
 from llama_index.llms.base import ChatMessage, ChatResponse
 from llama_index.llms.llm import LLM
 from llama_index.llms.openai import OpenAI
-from llama_index.llms.types import MessageRole
 from llama_index.memory.chat_memory_buffer import ChatMemoryBuffer
 from llama_index.memory.types import BaseMemory
 from llama_index.objects.base import ObjectRetriever
diff --git a/llama_index/agent/openai/step.py b/llama_index/agent/openai/step.py
index 397559a5b16525516907d2627a478aa3ce93bf88..7e1b712c57502424508013399d400debc8e47668 100644
--- a/llama_index/agent/openai/step.py
+++ b/llama_index/agent/openai/step.py
@@ -27,11 +27,11 @@ from llama_index.chat_engine.types import (
     ChatResponseMode,
     StreamingAgentChatResponse,
 )
+from llama_index.core.llms.types import MessageRole
 from llama_index.llms.base import ChatMessage, ChatResponse
 from llama_index.llms.llm import LLM
 from llama_index.llms.openai import OpenAI
 from llama_index.llms.openai_utils import OpenAIToolCall
-from llama_index.llms.types import MessageRole
 from llama_index.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.memory.types import BaseMemory
 from llama_index.objects.base import ObjectRetriever
diff --git a/llama_index/agent/openai_assistant_agent.py b/llama_index/agent/openai_assistant_agent.py
index c3a932cf5ca5025b0fb7025dc13d661d871aa5ac..15213c74a94f2482471c38d932dbb51966c5fcdd 100644
--- a/llama_index/agent/openai_assistant_agent.py
+++ b/llama_index/agent/openai_assistant_agent.py
@@ -19,7 +19,7 @@ from llama_index.chat_engine.types import (
     ChatResponseMode,
     StreamingAgentChatResponse,
 )
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.tools import BaseTool, ToolOutput, adapt_to_async_tool
 
 logger = logging.getLogger(__name__)
diff --git a/llama_index/agent/react/base.py b/llama_index/agent/react/base.py
index 89dd1e60b34c339f440b4ca4752e5a8aaa7e76ef..731b1e2f23b44632e7916a6b247519ee5b13b272 100644
--- a/llama_index/agent/react/base.py
+++ b/llama_index/agent/react/base.py
@@ -23,9 +23,9 @@ from llama_index.agent.runner.base import AgentRunner
 from llama_index.callbacks import (
     CallbackManager,
 )
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.llm import LLM
 from llama_index.llms.openai import OpenAI
-from llama_index.llms.types import ChatMessage
 from llama_index.memory.chat_memory_buffer import ChatMemoryBuffer
 from llama_index.memory.types import BaseMemory
 from llama_index.objects.base import ObjectRetriever
diff --git a/llama_index/agent/react/formatter.py b/llama_index/agent/react/formatter.py
index ab39d29fe5f40b53280dedcfd6f621443cce4af8..f00c21426ddfc7806dffbffd5191160574ce2381 100644
--- a/llama_index/agent/react/formatter.py
+++ b/llama_index/agent/react/formatter.py
@@ -6,7 +6,7 @@ from typing import List, Optional, Sequence
 from llama_index.agent.react.prompts import REACT_CHAT_SYSTEM_HEADER
 from llama_index.agent.react.types import BaseReasoningStep, ObservationReasoningStep
 from llama_index.bridge.pydantic import BaseModel
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.tools import BaseTool
 
 
diff --git a/llama_index/agent/react/step.py b/llama_index/agent/react/step.py
index 4d33ed475081e33f24194c42035fe37f6d5c854a..4b855b783efca280fc462cb6a95bc3e34c6b913a 100644
--- a/llama_index/agent/react/step.py
+++ b/llama_index/agent/react/step.py
@@ -41,10 +41,10 @@ from llama_index.chat_engine.types import (
     AgentChatResponse,
     StreamingAgentChatResponse,
 )
+from llama_index.core.llms.types import MessageRole
 from llama_index.llms.base import ChatMessage, ChatResponse
 from llama_index.llms.llm import LLM
 from llama_index.llms.openai import OpenAI
-from llama_index.llms.types import MessageRole
 from llama_index.memory.chat_memory_buffer import ChatMemoryBuffer
 from llama_index.memory.types import BaseMemory
 from llama_index.objects.base import ObjectRetriever
diff --git a/llama_index/agent/react_multimodal/step.py b/llama_index/agent/react_multimodal/step.py
index b23066410276c687cd4995000e353a48c11785bd..c961540ffae535f2930f777e5eda0393a34d7b01 100644
--- a/llama_index/agent/react_multimodal/step.py
+++ b/llama_index/agent/react_multimodal/step.py
@@ -36,8 +36,8 @@ from llama_index.chat_engine.types import (
     AGENT_CHAT_RESPONSE_TYPE,
     AgentChatResponse,
 )
+from llama_index.core.llms.types import MessageRole
 from llama_index.llms.base import ChatMessage, ChatResponse
-from llama_index.llms.types import MessageRole
 from llama_index.memory.chat_memory_buffer import ChatMemoryBuffer
 from llama_index.memory.types import BaseMemory
 from llama_index.multi_modal_llms.base import MultiModalLLM
diff --git a/llama_index/agent/types.py b/llama_index/agent/types.py
index f8b5b2a276bd45fd1b2c3216beb6dad537cba2fa..08630b95f76b220349b3870141fef856b0c5594b 100644
--- a/llama_index/agent/types.py
+++ b/llama_index/agent/types.py
@@ -6,11 +6,11 @@ from typing import Any, Dict, List, Optional
 from llama_index.bridge.pydantic import BaseModel, Field
 from llama_index.callbacks import trace_method
 from llama_index.chat_engine.types import BaseChatEngine, StreamingAgentChatResponse
-from llama_index.core import BaseQueryEngine
-from llama_index.llms.types import ChatMessage
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.llms.types import ChatMessage
+from llama_index.core.response.schema import RESPONSE_TYPE, Response
 from llama_index.memory.types import BaseMemory
 from llama_index.prompts.mixin import PromptDictType, PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE, Response
 from llama_index.schema import QueryBundle
 
 
diff --git a/llama_index/agent/utils.py b/llama_index/agent/utils.py
index d41dc1cf0d369b918121690455f8caada9ce6804..b95e86f22ac61fd6664ab0ddd2f72dd5b967b6e6 100644
--- a/llama_index/agent/utils.py
+++ b/llama_index/agent/utils.py
@@ -2,8 +2,8 @@
 
 
 from llama_index.agent.types import TaskStep
+from llama_index.core.llms.types import MessageRole
 from llama_index.llms.base import ChatMessage
-from llama_index.llms.types import MessageRole
 from llama_index.memory import BaseMemory
 
 
diff --git a/llama_index/callbacks/finetuning_handler.py b/llama_index/callbacks/finetuning_handler.py
index 577e1fe104b54b4677b33675d76e4950d3999cb0..288a1235bd747dcad42821a4d0f93d373321349a 100644
--- a/llama_index/callbacks/finetuning_handler.py
+++ b/llama_index/callbacks/finetuning_handler.py
@@ -35,7 +35,7 @@ class BaseFinetuningHandler(BaseCallbackHandler):
         **kwargs: Any,
     ) -> str:
         """Run when an event starts and return id of event."""
-        from llama_index.llms.types import ChatMessage, MessageRole
+        from llama_index.core.llms.types import ChatMessage, MessageRole
 
         if event_type == CBEventType.LLM:
             cur_messages = []
@@ -68,7 +68,7 @@ class BaseFinetuningHandler(BaseCallbackHandler):
         **kwargs: Any,
     ) -> None:
         """Run when an event ends."""
-        from llama_index.llms.types import ChatMessage, MessageRole
+        from llama_index.core.llms.types import ChatMessage, MessageRole
 
         if (
             event_type == CBEventType.LLM
diff --git a/llama_index/chat_engine/condense_plus_context.py b/llama_index/chat_engine/condense_plus_context.py
index bc3b14ab4ec7975c7ee42213326d823eef2de227..620a15ef19cd681a8d5feab45f0a06c6e29792d2 100644
--- a/llama_index/chat_engine/condense_plus_context.py
+++ b/llama_index/chat_engine/condense_plus_context.py
@@ -10,12 +10,12 @@ from llama_index.chat_engine.types import (
     StreamingAgentChatResponse,
     ToolOutput,
 )
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.indices.base_retriever import BaseRetriever
 from llama_index.indices.query.schema import QueryBundle
 from llama_index.indices.service_context import ServiceContext
 from llama_index.llms.generic_utils import messages_to_history_str
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import ChatMessage, MessageRole
 from llama_index.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.postprocessor.types import BaseNodePostprocessor
 from llama_index.prompts.base import PromptTemplate
diff --git a/llama_index/chat_engine/condense_question.py b/llama_index/chat_engine/condense_question.py
index ef8f2f19a13a7f164cf9b45b3f4dd2c08f23bcac..27430eaf889c46d62aa81568e0ba03929a43c4be 100644
--- a/llama_index/chat_engine/condense_question.py
+++ b/llama_index/chat_engine/condense_question.py
@@ -9,13 +9,13 @@ from llama_index.chat_engine.types import (
     StreamingAgentChatResponse,
 )
 from llama_index.chat_engine.utils import response_gen_from_query_engine
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.llms.types import ChatMessage, MessageRole
+from llama_index.core.response.schema import RESPONSE_TYPE, StreamingResponse
 from llama_index.llm_predictor.base import LLMPredictorType
 from llama_index.llms.generic_utils import messages_to_history_str
-from llama_index.llms.types import ChatMessage, MessageRole
 from llama_index.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.prompts.base import BasePromptTemplate, PromptTemplate
-from llama_index.response.schema import RESPONSE_TYPE, StreamingResponse
 from llama_index.service_context import ServiceContext
 from llama_index.tools import ToolOutput
 
diff --git a/llama_index/chat_engine/context.py b/llama_index/chat_engine/context.py
index 04b76f136372f1894261a79c7c5757d1827a3238..694b5bac487d4e5aa6445f34620c0d2f7a08865b 100644
--- a/llama_index/chat_engine/context.py
+++ b/llama_index/chat_engine/context.py
@@ -9,9 +9,9 @@ from llama_index.chat_engine.types import (
     StreamingAgentChatResponse,
     ToolOutput,
 )
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import ChatMessage, MessageRole
 from llama_index.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.postprocessor.types import BaseNodePostprocessor
 from llama_index.schema import MetadataMode, NodeWithScore, QueryBundle
diff --git a/llama_index/chat_engine/simple.py b/llama_index/chat_engine/simple.py
index 4e95aeb5dafd1e438f85a97dc77a9bcd0fabaf8e..a9ea59cb1707d528f721990b73ac90ab53db0e76 100644
--- a/llama_index/chat_engine/simple.py
+++ b/llama_index/chat_engine/simple.py
@@ -8,8 +8,8 @@ from llama_index.chat_engine.types import (
     BaseChatEngine,
     StreamingAgentChatResponse,
 )
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import ChatMessage
 from llama_index.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.service_context import ServiceContext
 
diff --git a/llama_index/chat_engine/types.py b/llama_index/chat_engine/types.py
index 64ccad79983598a4c2b9816f5979b9b3d1c295fc..84d6452643e3d0e864841a09a0fec0eda45c75d9 100644
--- a/llama_index/chat_engine/types.py
+++ b/llama_index/chat_engine/types.py
@@ -7,9 +7,13 @@ from enum import Enum
 from threading import Event
 from typing import AsyncGenerator, Generator, List, Optional, Union
 
-from llama_index.llms.types import ChatMessage, ChatResponseAsyncGen, ChatResponseGen
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+)
+from llama_index.core.response.schema import Response, StreamingResponse
 from llama_index.memory import BaseMemory
-from llama_index.response.schema import Response, StreamingResponse
 from llama_index.schema import NodeWithScore
 from llama_index.tools import ToolOutput
 
diff --git a/llama_index/chat_engine/utils.py b/llama_index/chat_engine/utils.py
index b33e8ff6be61342482b30aae5bccc6dda0235bbc..a85336e2e029332ba5868167f16efa3a35d53820 100644
--- a/llama_index/chat_engine/utils.py
+++ b/llama_index/chat_engine/utils.py
@@ -1,4 +1,4 @@
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseGen,
diff --git a/llama_index/core/__init__.py b/llama_index/core/__init__.py
index bd2300c87746a1088eea2694eb5c18290d71a4d0..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644
--- a/llama_index/core/__init__.py
+++ b/llama_index/core/__init__.py
@@ -1,13 +0,0 @@
-from llama_index.core.base_auto_retriever import BaseAutoRetriever
-from llama_index.core.base_multi_modal_retriever import MultiModalRetriever
-from llama_index.core.base_query_engine import BaseQueryEngine
-from llama_index.core.base_retriever import BaseRetriever
-from llama_index.core.image_retriever import BaseImageRetriever
-
-__all__ = [
-    "BaseRetriever",
-    "BaseAutoRetriever",
-    "BaseQueryEngine",
-    "MultiModalRetriever",
-    "BaseImageRetriever",
-]
diff --git a/llama_index/core/base_query_engine.py b/llama_index/core/base_query_engine.py
index c7546b79f5ea9cfe851efd684baff967d615387f..934b37314e2caf119ce45028f7004b1d85433552 100644
--- a/llama_index/core/base_query_engine.py
+++ b/llama_index/core/base_query_engine.py
@@ -5,14 +5,16 @@ from abc import abstractmethod
 from typing import Any, Dict, List, Optional, Sequence
 
 from llama_index.callbacks.base import CallbackManager
+from llama_index.core.response.schema import RESPONSE_TYPE
 from llama_index.prompts.mixin import PromptDictType, PromptMixin
-from llama_index.response.schema import RESPONSE_TYPE
 from llama_index.schema import NodeWithScore, QueryBundle, QueryType
 
 logger = logging.getLogger(__name__)
 
 
 class BaseQueryEngine(PromptMixin):
+    """Base query engine."""
+
     def __init__(self, callback_manager: Optional[CallbackManager]) -> None:
         self.callback_manager = callback_manager or CallbackManager([])
 
diff --git a/llama_index/core/base_retriever.py b/llama_index/core/base_retriever.py
index 9e6871c54b3d55d7c6819ef8a1c989642cd3aff5..e36607c4e3caec50e80874b533b6a0ad2ad4c6ab 100644
--- a/llama_index/core/base_retriever.py
+++ b/llama_index/core/base_retriever.py
@@ -4,17 +4,16 @@ from typing import List, Optional
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.indices.query.schema import QueryBundle, QueryType
-from llama_index.indices.service_context import ServiceContext
 from llama_index.prompts.mixin import PromptDictType, PromptMixin, PromptMixinType
-from llama_index.schema import NodeWithScore
+from llama_index.schema import NodeWithScore, QueryBundle, QueryType
+from llama_index.service_context import ServiceContext
 
 
 class BaseRetriever(PromptMixin):
     """Base retriever."""
 
     def __init__(self, callback_manager: Optional[CallbackManager] = None) -> None:
-        self.callback_manager = callback_manager or CallbackManager()
+        callback_manager = callback_manager or CallbackManager()
 
     def _check_callback_manager(self) -> None:
         """Check callback manager."""
diff --git a/llama_index/core/embeddings/__init__.py b/llama_index/core/embeddings/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llama_index/core/embeddings/base.py b/llama_index/core/embeddings/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..474422dfde9133ac90425979161bb5d2c3555974
--- /dev/null
+++ b/llama_index/core/embeddings/base.py
@@ -0,0 +1,354 @@
+"""Base embeddings file."""
+
+import asyncio
+from abc import abstractmethod
+from enum import Enum
+from typing import Any, Callable, Coroutine, List, Optional, Tuple
+
+import numpy as np
+
+from llama_index.bridge.pydantic import Field, validator
+from llama_index.callbacks.base import CallbackManager
+from llama_index.callbacks.schema import CBEventType, EventPayload
+from llama_index.constants import (
+    DEFAULT_EMBED_BATCH_SIZE,
+)
+from llama_index.schema import BaseNode, MetadataMode, TransformComponent
+from llama_index.utils import get_tqdm_iterable
+
+# TODO: change to numpy array
+Embedding = List[float]
+
+
+class SimilarityMode(str, Enum):
+    """Modes for similarity/distance."""
+
+    DEFAULT = "cosine"
+    DOT_PRODUCT = "dot_product"
+    EUCLIDEAN = "euclidean"
+
+
+def mean_agg(embeddings: List[Embedding]) -> Embedding:
+    """Mean aggregation for embeddings."""
+    return list(np.array(embeddings).mean(axis=0))
+
+
+def similarity(
+    embedding1: Embedding,
+    embedding2: Embedding,
+    mode: SimilarityMode = SimilarityMode.DEFAULT,
+) -> float:
+    """Get embedding similarity."""
+    if mode == SimilarityMode.EUCLIDEAN:
+        # Using -euclidean distance as similarity to achieve same ranking order
+        return -float(np.linalg.norm(np.array(embedding1) - np.array(embedding2)))
+    elif mode == SimilarityMode.DOT_PRODUCT:
+        return np.dot(embedding1, embedding2)
+    else:
+        product = np.dot(embedding1, embedding2)
+        norm = np.linalg.norm(embedding1) * np.linalg.norm(embedding2)
+        return product / norm
+
+
+class BaseEmbedding(TransformComponent):
+    """Base class for embeddings."""
+
+    model_name: str = Field(
+        default="unknown", description="The name of the embedding model."
+    )
+    embed_batch_size: int = Field(
+        default=DEFAULT_EMBED_BATCH_SIZE,
+        description="The batch size for embedding calls.",
+        gt=0,
+        lte=2048,
+    )
+    callback_manager: CallbackManager = Field(
+        default_factory=lambda: CallbackManager([]), exclude=True
+    )
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    @validator("callback_manager", pre=True)
+    def _validate_callback_manager(
+        cls, v: Optional[CallbackManager]
+    ) -> CallbackManager:
+        if v is None:
+            return CallbackManager([])
+        return v
+
+    @abstractmethod
+    def _get_query_embedding(self, query: str) -> Embedding:
+        """
+        Embed the input query synchronously.
+
+        Subclasses should implement this method. Reference get_query_embedding's
+        docstring for more information.
+        """
+
+    @abstractmethod
+    async def _aget_query_embedding(self, query: str) -> Embedding:
+        """
+        Embed the input query asynchronously.
+
+        Subclasses should implement this method. Reference get_query_embedding's
+        docstring for more information.
+        """
+
+    def get_query_embedding(self, query: str) -> Embedding:
+        """
+        Embed the input query.
+
+        When embedding a query, depending on the model, a special instruction
+        can be prepended to the raw query string. For example, "Represent the
+        question for retrieving supporting documents: ". If you're curious,
+        other examples of predefined instructions can be found in
+        embeddings/huggingface_utils.py.
+        """
+        with self.callback_manager.event(
+            CBEventType.EMBEDDING, payload={EventPayload.SERIALIZED: self.to_dict()}
+        ) as event:
+            query_embedding = self._get_query_embedding(query)
+
+            event.on_end(
+                payload={
+                    EventPayload.CHUNKS: [query],
+                    EventPayload.EMBEDDINGS: [query_embedding],
+                },
+            )
+        return query_embedding
+
+    async def aget_query_embedding(self, query: str) -> Embedding:
+        """Get query embedding."""
+        with self.callback_manager.event(
+            CBEventType.EMBEDDING, payload={EventPayload.SERIALIZED: self.to_dict()}
+        ) as event:
+            query_embedding = await self._aget_query_embedding(query)
+
+            event.on_end(
+                payload={
+                    EventPayload.CHUNKS: [query],
+                    EventPayload.EMBEDDINGS: [query_embedding],
+                },
+            )
+        return query_embedding
+
+    def get_agg_embedding_from_queries(
+        self,
+        queries: List[str],
+        agg_fn: Optional[Callable[..., Embedding]] = None,
+    ) -> Embedding:
+        """Get aggregated embedding from multiple queries."""
+        query_embeddings = [self.get_query_embedding(query) for query in queries]
+        agg_fn = agg_fn or mean_agg
+        return agg_fn(query_embeddings)
+
+    async def aget_agg_embedding_from_queries(
+        self,
+        queries: List[str],
+        agg_fn: Optional[Callable[..., Embedding]] = None,
+    ) -> Embedding:
+        """Async get aggregated embedding from multiple queries."""
+        query_embeddings = [await self.aget_query_embedding(query) for query in queries]
+        agg_fn = agg_fn or mean_agg
+        return agg_fn(query_embeddings)
+
+    @abstractmethod
+    def _get_text_embedding(self, text: str) -> Embedding:
+        """
+        Embed the input text synchronously.
+
+        Subclasses should implement this method. Reference get_text_embedding's
+        docstring for more information.
+        """
+
+    async def _aget_text_embedding(self, text: str) -> Embedding:
+        """
+        Embed the input text asynchronously.
+
+        Subclasses can implement this method if there is a true async
+        implementation. Reference get_text_embedding's docstring for more
+        information.
+        """
+        # Default implementation just falls back on _get_text_embedding
+        return self._get_text_embedding(text)
+
+    def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]:
+        """
+        Embed the input sequence of text synchronously.
+
+        Subclasses can implement this method if batch queries are supported.
+        """
+        # Default implementation just loops over _get_text_embedding
+        return [self._get_text_embedding(text) for text in texts]
+
+    async def _aget_text_embeddings(self, texts: List[str]) -> List[Embedding]:
+        """
+        Embed the input sequence of text asynchronously.
+
+        Subclasses can implement this method if batch queries are supported.
+        """
+        return await asyncio.gather(
+            *[self._aget_text_embedding(text) for text in texts]
+        )
+
+    def get_text_embedding(self, text: str) -> Embedding:
+        """
+        Embed the input text.
+
+        When embedding text, depending on the model, a special instruction
+        can be prepended to the raw text string. For example, "Represent the
+        document for retrieval: ". If you're curious, other examples of
+        predefined instructions can be found in embeddings/huggingface_utils.py.
+        """
+        with self.callback_manager.event(
+            CBEventType.EMBEDDING, payload={EventPayload.SERIALIZED: self.to_dict()}
+        ) as event:
+            text_embedding = self._get_text_embedding(text)
+
+            event.on_end(
+                payload={
+                    EventPayload.CHUNKS: [text],
+                    EventPayload.EMBEDDINGS: [text_embedding],
+                }
+            )
+
+        return text_embedding
+
+    async def aget_text_embedding(self, text: str) -> Embedding:
+        """Async get text embedding."""
+        with self.callback_manager.event(
+            CBEventType.EMBEDDING, payload={EventPayload.SERIALIZED: self.to_dict()}
+        ) as event:
+            text_embedding = await self._aget_text_embedding(text)
+
+            event.on_end(
+                payload={
+                    EventPayload.CHUNKS: [text],
+                    EventPayload.EMBEDDINGS: [text_embedding],
+                }
+            )
+
+        return text_embedding
+
+    def get_text_embedding_batch(
+        self,
+        texts: List[str],
+        show_progress: bool = False,
+        **kwargs: Any,
+    ) -> List[Embedding]:
+        """Get a list of text embeddings, with batching."""
+        cur_batch: List[str] = []
+        result_embeddings: List[Embedding] = []
+
+        queue_with_progress = enumerate(
+            get_tqdm_iterable(texts, show_progress, "Generating embeddings")
+        )
+
+        for idx, text in queue_with_progress:
+            cur_batch.append(text)
+            if idx == len(texts) - 1 or len(cur_batch) == self.embed_batch_size:
+                # flush
+                with self.callback_manager.event(
+                    CBEventType.EMBEDDING,
+                    payload={EventPayload.SERIALIZED: self.to_dict()},
+                ) as event:
+                    embeddings = self._get_text_embeddings(cur_batch)
+                    result_embeddings.extend(embeddings)
+                    event.on_end(
+                        payload={
+                            EventPayload.CHUNKS: cur_batch,
+                            EventPayload.EMBEDDINGS: embeddings,
+                        },
+                    )
+                cur_batch = []
+
+        return result_embeddings
+
+    async def aget_text_embedding_batch(
+        self, texts: List[str], show_progress: bool = False
+    ) -> List[Embedding]:
+        """Asynchronously get a list of text embeddings, with batching."""
+        cur_batch: List[str] = []
+        callback_payloads: List[Tuple[str, List[str]]] = []
+        result_embeddings: List[Embedding] = []
+        embeddings_coroutines: List[Coroutine] = []
+        for idx, text in enumerate(texts):
+            cur_batch.append(text)
+            if idx == len(texts) - 1 or len(cur_batch) == self.embed_batch_size:
+                # flush
+                event_id = self.callback_manager.on_event_start(
+                    CBEventType.EMBEDDING,
+                    payload={EventPayload.SERIALIZED: self.to_dict()},
+                )
+                callback_payloads.append((event_id, cur_batch))
+                embeddings_coroutines.append(self._aget_text_embeddings(cur_batch))
+                cur_batch = []
+
+        # flatten the results of asyncio.gather, which is a list of embeddings lists
+        nested_embeddings = []
+        if show_progress:
+            try:
+                from tqdm.auto import tqdm
+
+                nested_embeddings = [
+                    await f
+                    for f in tqdm(
+                        asyncio.as_completed(embeddings_coroutines),
+                        total=len(embeddings_coroutines),
+                        desc="Generating embeddings",
+                    )
+                ]
+            except ImportError:
+                nested_embeddings = await asyncio.gather(*embeddings_coroutines)
+        else:
+            nested_embeddings = await asyncio.gather(*embeddings_coroutines)
+
+        result_embeddings = [
+            embedding for embeddings in nested_embeddings for embedding in embeddings
+        ]
+
+        for (event_id, text_batch), embeddings in zip(
+            callback_payloads, nested_embeddings
+        ):
+            self.callback_manager.on_event_end(
+                CBEventType.EMBEDDING,
+                payload={
+                    EventPayload.CHUNKS: text_batch,
+                    EventPayload.EMBEDDINGS: embeddings,
+                },
+                event_id=event_id,
+            )
+
+        return result_embeddings
+
+    def similarity(
+        self,
+        embedding1: Embedding,
+        embedding2: Embedding,
+        mode: SimilarityMode = SimilarityMode.DEFAULT,
+    ) -> float:
+        """Get embedding similarity."""
+        return similarity(embedding1=embedding1, embedding2=embedding2, mode=mode)
+
+    def __call__(self, nodes: List[BaseNode], **kwargs: Any) -> List[BaseNode]:
+        embeddings = self.get_text_embedding_batch(
+            [node.get_content(metadata_mode=MetadataMode.EMBED) for node in nodes],
+            **kwargs,
+        )
+
+        for node, embedding in zip(nodes, embeddings):
+            node.embedding = embedding
+
+        return nodes
+
+    async def acall(self, nodes: List[BaseNode], **kwargs: Any) -> List[BaseNode]:
+        embeddings = await self.aget_text_embedding_batch(
+            [node.get_content(metadata_mode=MetadataMode.EMBED) for node in nodes],
+            **kwargs,
+        )
+
+        for node, embedding in zip(nodes, embeddings):
+            node.embedding = embedding
+
+        return nodes
diff --git a/llama_index/core/llms/__init__.py b/llama_index/core/llms/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llama_index/core/llms/types.py b/llama_index/core/llms/types.py
new file mode 100644
index 0000000000000000000000000000000000000000..9db785861d0a20d2b702d9aabe24c6bb07202d7b
--- /dev/null
+++ b/llama_index/core/llms/types.py
@@ -0,0 +1,110 @@
+from enum import Enum
+from typing import Any, AsyncGenerator, Generator, Optional
+
+from llama_index.bridge.pydantic import BaseModel, Field
+from llama_index.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
+
+
+class MessageRole(str, Enum):
+    """Message role."""
+
+    SYSTEM = "system"
+    USER = "user"
+    ASSISTANT = "assistant"
+    FUNCTION = "function"
+    TOOL = "tool"
+
+
+# ===== Generic Model Input - Chat =====
+class ChatMessage(BaseModel):
+    """Chat message."""
+
+    role: MessageRole = MessageRole.USER
+    content: Optional[Any] = ""
+    additional_kwargs: dict = Field(default_factory=dict)
+
+    def __str__(self) -> str:
+        return f"{self.role.value}: {self.content}"
+
+
+# ===== Generic Model Output - Chat =====
+class ChatResponse(BaseModel):
+    """Chat response."""
+
+    message: ChatMessage
+    raw: Optional[dict] = None
+    delta: Optional[str] = None
+    additional_kwargs: dict = Field(default_factory=dict)
+
+    def __str__(self) -> str:
+        return str(self.message)
+
+
+ChatResponseGen = Generator[ChatResponse, None, None]
+ChatResponseAsyncGen = AsyncGenerator[ChatResponse, None]
+
+
+# ===== Generic Model Output - Completion =====
+class CompletionResponse(BaseModel):
+    """
+    Completion response.
+
+    Fields:
+        text: Text content of the response if not streaming, or if streaming,
+            the current extent of streamed text.
+        additional_kwargs: Additional information on the response(i.e. token
+            counts, function calling information).
+        raw: Optional raw JSON that was parsed to populate text, if relevant.
+        delta: New text that just streamed in (only relevant when streaming).
+    """
+
+    text: str
+    additional_kwargs: dict = Field(default_factory=dict)
+    raw: Optional[dict] = None
+    delta: Optional[str] = None
+
+    def __str__(self) -> str:
+        return self.text
+
+
+CompletionResponseGen = Generator[CompletionResponse, None, None]
+CompletionResponseAsyncGen = AsyncGenerator[CompletionResponse, None]
+
+
+class LLMMetadata(BaseModel):
+    context_window: int = Field(
+        default=DEFAULT_CONTEXT_WINDOW,
+        description=(
+            "Total number of tokens the model can be input and output for one response."
+        ),
+    )
+    num_output: int = Field(
+        default=DEFAULT_NUM_OUTPUTS,
+        description="Number of tokens the model can output when generating a response.",
+    )
+    is_chat_model: bool = Field(
+        default=False,
+        description=(
+            "Set True if the model exposes a chat interface (i.e. can be passed a"
+            " sequence of messages, rather than text), like OpenAI's"
+            " /v1/chat/completions endpoint."
+        ),
+    )
+    is_function_calling_model: bool = Field(
+        default=False,
+        # SEE: https://openai.com/blog/function-calling-and-other-api-updates
+        description=(
+            "Set True if the model supports function calling messages, similar to"
+            " OpenAI's function calling API. For example, converting 'Email Anya to"
+            " see if she wants to get coffee next Friday' to a function call like"
+            " `send_email(to: string, body: string)`."
+        ),
+    )
+    model_name: str = Field(
+        default="unknown",
+        description=(
+            "The model's name used for logging, testing, and sanity checking. For some"
+            " models this can be automatically discerned. For other models, like"
+            " locally loaded models, this must be manually specified."
+        ),
+    )
diff --git a/llama_index/core/response/__init__.py b/llama_index/core/response/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llama_index/core/response/schema.py b/llama_index/core/response/schema.py
new file mode 100644
index 0000000000000000000000000000000000000000..1834b6ccf1f5c820fb076993937349605170cfeb
--- /dev/null
+++ b/llama_index/core/response/schema.py
@@ -0,0 +1,142 @@
+"""Response schema."""
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+
+from llama_index.bridge.pydantic import BaseModel
+from llama_index.schema import NodeWithScore
+from llama_index.types import TokenGen
+from llama_index.utils import truncate_text
+
+
+@dataclass
+class Response:
+    """Response object.
+
+    Returned if streaming=False.
+
+    Attributes:
+        response: The response text.
+
+    """
+
+    response: Optional[str]
+    source_nodes: List[NodeWithScore] = field(default_factory=list)
+    metadata: Optional[Dict[str, Any]] = None
+
+    def __str__(self) -> str:
+        """Convert to string representation."""
+        return self.response or "None"
+
+    def get_formatted_sources(self, length: int = 100) -> str:
+        """Get formatted sources text."""
+        texts = []
+        for source_node in self.source_nodes:
+            fmt_text_chunk = truncate_text(source_node.node.get_content(), length)
+            doc_id = source_node.node.node_id or "None"
+            source_text = f"> Source (Doc id: {doc_id}): {fmt_text_chunk}"
+            texts.append(source_text)
+        return "\n\n".join(texts)
+
+
+@dataclass
+class PydanticResponse:
+    """PydanticResponse object.
+
+    Returned if streaming=False.
+
+    Attributes:
+        response: The response text.
+
+    """
+
+    response: Optional[BaseModel]
+    source_nodes: List[NodeWithScore] = field(default_factory=list)
+    metadata: Optional[Dict[str, Any]] = None
+
+    def __str__(self) -> str:
+        """Convert to string representation."""
+        return self.response.json() if self.response else "None"
+
+    def __getattr__(self, name: str) -> Any:
+        """Get attribute, but prioritize the pydantic  response object."""
+        if self.response is not None and name in self.response.dict():
+            return getattr(self.response, name)
+        else:
+            return None
+
+    def get_formatted_sources(self, length: int = 100) -> str:
+        """Get formatted sources text."""
+        texts = []
+        for source_node in self.source_nodes:
+            fmt_text_chunk = truncate_text(source_node.node.get_content(), length)
+            doc_id = source_node.node.node_id or "None"
+            source_text = f"> Source (Doc id: {doc_id}): {fmt_text_chunk}"
+            texts.append(source_text)
+        return "\n\n".join(texts)
+
+    def get_response(self) -> Response:
+        """Get a standard response object."""
+        response_txt = self.response.json() if self.response else "None"
+        return Response(response_txt, self.source_nodes, self.metadata)
+
+
+@dataclass
+class StreamingResponse:
+    """StreamingResponse object.
+
+    Returned if streaming=True.
+
+    Attributes:
+        response_gen: The response generator.
+
+    """
+
+    response_gen: TokenGen
+    source_nodes: List[NodeWithScore] = field(default_factory=list)
+    metadata: Optional[Dict[str, Any]] = None
+    response_txt: Optional[str] = None
+
+    def __str__(self) -> str:
+        """Convert to string representation."""
+        if self.response_txt is None and self.response_gen is not None:
+            response_txt = ""
+            for text in self.response_gen:
+                response_txt += text
+            self.response_txt = response_txt
+        return self.response_txt or "None"
+
+    def get_response(self) -> Response:
+        """Get a standard response object."""
+        if self.response_txt is None and self.response_gen is not None:
+            response_txt = ""
+            for text in self.response_gen:
+                response_txt += text
+            self.response_txt = response_txt
+        return Response(self.response_txt, self.source_nodes, self.metadata)
+
+    def print_response_stream(self) -> None:
+        """Print the response stream."""
+        if self.response_txt is None and self.response_gen is not None:
+            response_txt = ""
+            for text in self.response_gen:
+                print(text, end="", flush=True)
+                response_txt += text
+            self.response_txt = response_txt
+        else:
+            print(self.response_txt)
+
+    def get_formatted_sources(self, length: int = 100, trim_text: int = True) -> str:
+        """Get formatted sources text."""
+        texts = []
+        for source_node in self.source_nodes:
+            fmt_text_chunk = source_node.node.get_content()
+            if trim_text:
+                fmt_text_chunk = truncate_text(fmt_text_chunk, length)
+            node_id = source_node.node.node_id or "None"
+            source_text = f"> Source (Node id: {node_id}): {fmt_text_chunk}"
+            texts.append(source_text)
+        return "\n\n".join(texts)
+
+
+RESPONSE_TYPE = Union[Response, StreamingResponse, PydanticResponse]
diff --git a/llama_index/embeddings/adapter.py b/llama_index/embeddings/adapter.py
index c21fe4be2a256ceab535592669ed85608a2e6dbc..5e7b9bfae75e1e018f667ddeed3cc239adad05a9 100644
--- a/llama_index/embeddings/adapter.py
+++ b/llama_index/embeddings/adapter.py
@@ -5,7 +5,8 @@ from typing import Any, List, Optional, Type, cast
 
 from llama_index.bridge.pydantic import PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.constants import DEFAULT_EMBED_BATCH_SIZE
+from llama_index.core.embeddings.base import BaseEmbedding
 from llama_index.utils import infer_torch_device
 
 logger = logging.getLogger(__name__)
diff --git a/llama_index/embeddings/azure_openai.py b/llama_index/embeddings/azure_openai.py
index 8c7910df673d1dc348edf7dec94ef2b2a1d76b99..efb96a4e646eb01fc058440511d7fedbcfadb7ce 100644
--- a/llama_index/embeddings/azure_openai.py
+++ b/llama_index/embeddings/azure_openai.py
@@ -5,7 +5,7 @@ from openai import AsyncAzureOpenAI, AzureOpenAI
 
 from llama_index.bridge.pydantic import Field, PrivateAttr, root_validator
 from llama_index.callbacks.base import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE
+from llama_index.constants import DEFAULT_EMBED_BATCH_SIZE
 from llama_index.embeddings.openai import (
     OpenAIEmbedding,
     OpenAIEmbeddingMode,
diff --git a/llama_index/embeddings/base.py b/llama_index/embeddings/base.py
index 474422dfde9133ac90425979161bb5d2c3555974..97028437cd4b0eb6bf3b761ff0e344e4ea7cb5ef 100644
--- a/llama_index/embeddings/base.py
+++ b/llama_index/embeddings/base.py
@@ -1,354 +1,23 @@
-"""Base embeddings file."""
+"""Base embeddings file.
 
-import asyncio
-from abc import abstractmethod
-from enum import Enum
-from typing import Any, Callable, Coroutine, List, Optional, Tuple
+Maintain for backwards compatibility.
 
-import numpy as np
+"""
 
-from llama_index.bridge.pydantic import Field, validator
-from llama_index.callbacks.base import CallbackManager
-from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.constants import (
+from llama_index.core.embeddings.base import (
     DEFAULT_EMBED_BATCH_SIZE,
+    BaseEmbedding,
+    Embedding,
+    SimilarityMode,
+    mean_agg,
+    similarity,
 )
-from llama_index.schema import BaseNode, MetadataMode, TransformComponent
-from llama_index.utils import get_tqdm_iterable
 
-# TODO: change to numpy array
-Embedding = List[float]
-
-
-class SimilarityMode(str, Enum):
-    """Modes for similarity/distance."""
-
-    DEFAULT = "cosine"
-    DOT_PRODUCT = "dot_product"
-    EUCLIDEAN = "euclidean"
-
-
-def mean_agg(embeddings: List[Embedding]) -> Embedding:
-    """Mean aggregation for embeddings."""
-    return list(np.array(embeddings).mean(axis=0))
-
-
-def similarity(
-    embedding1: Embedding,
-    embedding2: Embedding,
-    mode: SimilarityMode = SimilarityMode.DEFAULT,
-) -> float:
-    """Get embedding similarity."""
-    if mode == SimilarityMode.EUCLIDEAN:
-        # Using -euclidean distance as similarity to achieve same ranking order
-        return -float(np.linalg.norm(np.array(embedding1) - np.array(embedding2)))
-    elif mode == SimilarityMode.DOT_PRODUCT:
-        return np.dot(embedding1, embedding2)
-    else:
-        product = np.dot(embedding1, embedding2)
-        norm = np.linalg.norm(embedding1) * np.linalg.norm(embedding2)
-        return product / norm
-
-
-class BaseEmbedding(TransformComponent):
-    """Base class for embeddings."""
-
-    model_name: str = Field(
-        default="unknown", description="The name of the embedding model."
-    )
-    embed_batch_size: int = Field(
-        default=DEFAULT_EMBED_BATCH_SIZE,
-        description="The batch size for embedding calls.",
-        gt=0,
-        lte=2048,
-    )
-    callback_manager: CallbackManager = Field(
-        default_factory=lambda: CallbackManager([]), exclude=True
-    )
-
-    class Config:
-        arbitrary_types_allowed = True
-
-    @validator("callback_manager", pre=True)
-    def _validate_callback_manager(
-        cls, v: Optional[CallbackManager]
-    ) -> CallbackManager:
-        if v is None:
-            return CallbackManager([])
-        return v
-
-    @abstractmethod
-    def _get_query_embedding(self, query: str) -> Embedding:
-        """
-        Embed the input query synchronously.
-
-        Subclasses should implement this method. Reference get_query_embedding's
-        docstring for more information.
-        """
-
-    @abstractmethod
-    async def _aget_query_embedding(self, query: str) -> Embedding:
-        """
-        Embed the input query asynchronously.
-
-        Subclasses should implement this method. Reference get_query_embedding's
-        docstring for more information.
-        """
-
-    def get_query_embedding(self, query: str) -> Embedding:
-        """
-        Embed the input query.
-
-        When embedding a query, depending on the model, a special instruction
-        can be prepended to the raw query string. For example, "Represent the
-        question for retrieving supporting documents: ". If you're curious,
-        other examples of predefined instructions can be found in
-        embeddings/huggingface_utils.py.
-        """
-        with self.callback_manager.event(
-            CBEventType.EMBEDDING, payload={EventPayload.SERIALIZED: self.to_dict()}
-        ) as event:
-            query_embedding = self._get_query_embedding(query)
-
-            event.on_end(
-                payload={
-                    EventPayload.CHUNKS: [query],
-                    EventPayload.EMBEDDINGS: [query_embedding],
-                },
-            )
-        return query_embedding
-
-    async def aget_query_embedding(self, query: str) -> Embedding:
-        """Get query embedding."""
-        with self.callback_manager.event(
-            CBEventType.EMBEDDING, payload={EventPayload.SERIALIZED: self.to_dict()}
-        ) as event:
-            query_embedding = await self._aget_query_embedding(query)
-
-            event.on_end(
-                payload={
-                    EventPayload.CHUNKS: [query],
-                    EventPayload.EMBEDDINGS: [query_embedding],
-                },
-            )
-        return query_embedding
-
-    def get_agg_embedding_from_queries(
-        self,
-        queries: List[str],
-        agg_fn: Optional[Callable[..., Embedding]] = None,
-    ) -> Embedding:
-        """Get aggregated embedding from multiple queries."""
-        query_embeddings = [self.get_query_embedding(query) for query in queries]
-        agg_fn = agg_fn or mean_agg
-        return agg_fn(query_embeddings)
-
-    async def aget_agg_embedding_from_queries(
-        self,
-        queries: List[str],
-        agg_fn: Optional[Callable[..., Embedding]] = None,
-    ) -> Embedding:
-        """Async get aggregated embedding from multiple queries."""
-        query_embeddings = [await self.aget_query_embedding(query) for query in queries]
-        agg_fn = agg_fn or mean_agg
-        return agg_fn(query_embeddings)
-
-    @abstractmethod
-    def _get_text_embedding(self, text: str) -> Embedding:
-        """
-        Embed the input text synchronously.
-
-        Subclasses should implement this method. Reference get_text_embedding's
-        docstring for more information.
-        """
-
-    async def _aget_text_embedding(self, text: str) -> Embedding:
-        """
-        Embed the input text asynchronously.
-
-        Subclasses can implement this method if there is a true async
-        implementation. Reference get_text_embedding's docstring for more
-        information.
-        """
-        # Default implementation just falls back on _get_text_embedding
-        return self._get_text_embedding(text)
-
-    def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]:
-        """
-        Embed the input sequence of text synchronously.
-
-        Subclasses can implement this method if batch queries are supported.
-        """
-        # Default implementation just loops over _get_text_embedding
-        return [self._get_text_embedding(text) for text in texts]
-
-    async def _aget_text_embeddings(self, texts: List[str]) -> List[Embedding]:
-        """
-        Embed the input sequence of text asynchronously.
-
-        Subclasses can implement this method if batch queries are supported.
-        """
-        return await asyncio.gather(
-            *[self._aget_text_embedding(text) for text in texts]
-        )
-
-    def get_text_embedding(self, text: str) -> Embedding:
-        """
-        Embed the input text.
-
-        When embedding text, depending on the model, a special instruction
-        can be prepended to the raw text string. For example, "Represent the
-        document for retrieval: ". If you're curious, other examples of
-        predefined instructions can be found in embeddings/huggingface_utils.py.
-        """
-        with self.callback_manager.event(
-            CBEventType.EMBEDDING, payload={EventPayload.SERIALIZED: self.to_dict()}
-        ) as event:
-            text_embedding = self._get_text_embedding(text)
-
-            event.on_end(
-                payload={
-                    EventPayload.CHUNKS: [text],
-                    EventPayload.EMBEDDINGS: [text_embedding],
-                }
-            )
-
-        return text_embedding
-
-    async def aget_text_embedding(self, text: str) -> Embedding:
-        """Async get text embedding."""
-        with self.callback_manager.event(
-            CBEventType.EMBEDDING, payload={EventPayload.SERIALIZED: self.to_dict()}
-        ) as event:
-            text_embedding = await self._aget_text_embedding(text)
-
-            event.on_end(
-                payload={
-                    EventPayload.CHUNKS: [text],
-                    EventPayload.EMBEDDINGS: [text_embedding],
-                }
-            )
-
-        return text_embedding
-
-    def get_text_embedding_batch(
-        self,
-        texts: List[str],
-        show_progress: bool = False,
-        **kwargs: Any,
-    ) -> List[Embedding]:
-        """Get a list of text embeddings, with batching."""
-        cur_batch: List[str] = []
-        result_embeddings: List[Embedding] = []
-
-        queue_with_progress = enumerate(
-            get_tqdm_iterable(texts, show_progress, "Generating embeddings")
-        )
-
-        for idx, text in queue_with_progress:
-            cur_batch.append(text)
-            if idx == len(texts) - 1 or len(cur_batch) == self.embed_batch_size:
-                # flush
-                with self.callback_manager.event(
-                    CBEventType.EMBEDDING,
-                    payload={EventPayload.SERIALIZED: self.to_dict()},
-                ) as event:
-                    embeddings = self._get_text_embeddings(cur_batch)
-                    result_embeddings.extend(embeddings)
-                    event.on_end(
-                        payload={
-                            EventPayload.CHUNKS: cur_batch,
-                            EventPayload.EMBEDDINGS: embeddings,
-                        },
-                    )
-                cur_batch = []
-
-        return result_embeddings
-
-    async def aget_text_embedding_batch(
-        self, texts: List[str], show_progress: bool = False
-    ) -> List[Embedding]:
-        """Asynchronously get a list of text embeddings, with batching."""
-        cur_batch: List[str] = []
-        callback_payloads: List[Tuple[str, List[str]]] = []
-        result_embeddings: List[Embedding] = []
-        embeddings_coroutines: List[Coroutine] = []
-        for idx, text in enumerate(texts):
-            cur_batch.append(text)
-            if idx == len(texts) - 1 or len(cur_batch) == self.embed_batch_size:
-                # flush
-                event_id = self.callback_manager.on_event_start(
-                    CBEventType.EMBEDDING,
-                    payload={EventPayload.SERIALIZED: self.to_dict()},
-                )
-                callback_payloads.append((event_id, cur_batch))
-                embeddings_coroutines.append(self._aget_text_embeddings(cur_batch))
-                cur_batch = []
-
-        # flatten the results of asyncio.gather, which is a list of embeddings lists
-        nested_embeddings = []
-        if show_progress:
-            try:
-                from tqdm.auto import tqdm
-
-                nested_embeddings = [
-                    await f
-                    for f in tqdm(
-                        asyncio.as_completed(embeddings_coroutines),
-                        total=len(embeddings_coroutines),
-                        desc="Generating embeddings",
-                    )
-                ]
-            except ImportError:
-                nested_embeddings = await asyncio.gather(*embeddings_coroutines)
-        else:
-            nested_embeddings = await asyncio.gather(*embeddings_coroutines)
-
-        result_embeddings = [
-            embedding for embeddings in nested_embeddings for embedding in embeddings
-        ]
-
-        for (event_id, text_batch), embeddings in zip(
-            callback_payloads, nested_embeddings
-        ):
-            self.callback_manager.on_event_end(
-                CBEventType.EMBEDDING,
-                payload={
-                    EventPayload.CHUNKS: text_batch,
-                    EventPayload.EMBEDDINGS: embeddings,
-                },
-                event_id=event_id,
-            )
-
-        return result_embeddings
-
-    def similarity(
-        self,
-        embedding1: Embedding,
-        embedding2: Embedding,
-        mode: SimilarityMode = SimilarityMode.DEFAULT,
-    ) -> float:
-        """Get embedding similarity."""
-        return similarity(embedding1=embedding1, embedding2=embedding2, mode=mode)
-
-    def __call__(self, nodes: List[BaseNode], **kwargs: Any) -> List[BaseNode]:
-        embeddings = self.get_text_embedding_batch(
-            [node.get_content(metadata_mode=MetadataMode.EMBED) for node in nodes],
-            **kwargs,
-        )
-
-        for node, embedding in zip(nodes, embeddings):
-            node.embedding = embedding
-
-        return nodes
-
-    async def acall(self, nodes: List[BaseNode], **kwargs: Any) -> List[BaseNode]:
-        embeddings = await self.aget_text_embedding_batch(
-            [node.get_content(metadata_mode=MetadataMode.EMBED) for node in nodes],
-            **kwargs,
-        )
-
-        for node, embedding in zip(nodes, embeddings):
-            node.embedding = embedding
-
-        return nodes
+__all__ = [
+    "BaseEmbedding",
+    "similarity",
+    "SimilarityMode",
+    "DEFAULT_EMBED_BATCH_SIZE",
+    "mean_agg",
+    "Embedding",
+]
diff --git a/llama_index/embeddings/bedrock.py b/llama_index/embeddings/bedrock.py
index a90352aaacf331b2527805320701b62d33428580..d9f3c7c0026d976ef131b03ee4ef4e6431033984 100644
--- a/llama_index/embeddings/bedrock.py
+++ b/llama_index/embeddings/bedrock.py
@@ -6,11 +6,8 @@ from typing import Any, Dict, List, Literal, Optional
 
 from llama_index.bridge.pydantic import PrivateAttr
 from llama_index.callbacks.base import CallbackManager
-from llama_index.embeddings.base import (
-    DEFAULT_EMBED_BATCH_SIZE,
-    BaseEmbedding,
-    Embedding,
-)
+from llama_index.constants import DEFAULT_EMBED_BATCH_SIZE
+from llama_index.core.embeddings.base import BaseEmbedding, Embedding
 
 
 class PROVIDERS(str, Enum):
diff --git a/llama_index/embeddings/clarifai.py b/llama_index/embeddings/clarifai.py
index e77bfd2df07676357e7771085151823acce1af43..3f2c459c24c9f1014f98820fae3995cfe45d1a85 100644
--- a/llama_index/embeddings/clarifai.py
+++ b/llama_index/embeddings/clarifai.py
@@ -3,7 +3,8 @@ from typing import Any, List, Optional
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.constants import DEFAULT_EMBED_BATCH_SIZE
+from llama_index.core.embeddings.base import BaseEmbedding
 
 logger = logging.getLogger(__name__)
 
diff --git a/llama_index/embeddings/clip.py b/llama_index/embeddings/clip.py
index 9f905cd6eaf4ba1bbf4adfd90d144aa9a5dc936c..1c20bb86a9eb2e073ebb95d6bff8a29e46ea6c22 100644
--- a/llama_index/embeddings/clip.py
+++ b/llama_index/embeddings/clip.py
@@ -2,10 +2,8 @@ import logging
 from typing import Any, List
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
-from llama_index.embeddings.base import (
-    DEFAULT_EMBED_BATCH_SIZE,
-    Embedding,
-)
+from llama_index.constants import DEFAULT_EMBED_BATCH_SIZE
+from llama_index.core.embeddings.base import Embedding
 from llama_index.embeddings.multi_modal_base import MultiModalEmbedding
 from llama_index.schema import ImageType
 
diff --git a/llama_index/embeddings/cohereai.py b/llama_index/embeddings/cohereai.py
index 94883fdb389afcbb7ddde3fb78ae64c5696205be..1fd4f19edd7ffdf10843b9439ad216aaf84d7261 100644
--- a/llama_index/embeddings/cohereai.py
+++ b/llama_index/embeddings/cohereai.py
@@ -3,7 +3,7 @@ from typing import Any, List, Optional
 
 from llama_index.bridge.pydantic import Field
 from llama_index.callbacks import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.core.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 
 
 # Enums for validation and type safety
diff --git a/llama_index/embeddings/gemini.py b/llama_index/embeddings/gemini.py
index b335528a4e46e5dd39da220f1a8917c858d1c265..553a2ea68778b8cabeff58f4010464ad2da65e34 100644
--- a/llama_index/embeddings/gemini.py
+++ b/llama_index/embeddings/gemini.py
@@ -4,7 +4,7 @@ from typing import Any, List, Optional
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks.base import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.core.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 
 
 class GeminiEmbedding(BaseEmbedding):
diff --git a/llama_index/embeddings/google.py b/llama_index/embeddings/google.py
index 64770062ddebcc6879671545e00d64cc685031ac..ef9142a2ff69056200b3cd5dc26b74466d07b679 100644
--- a/llama_index/embeddings/google.py
+++ b/llama_index/embeddings/google.py
@@ -4,7 +4,7 @@ from typing import Any, List, Optional
 
 from llama_index.bridge.pydantic import PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.core.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 
 # Google Universal Sentence Encode v5
 DEFAULT_HANDLE = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
diff --git a/llama_index/embeddings/google_palm.py b/llama_index/embeddings/google_palm.py
index 8e01f8fba3f5d5ead51a34507c70d7932f8dfd77..7fc3df38b07d7868eb7ef3a521db62974af61789 100644
--- a/llama_index/embeddings/google_palm.py
+++ b/llama_index/embeddings/google_palm.py
@@ -4,7 +4,7 @@ from typing import Any, List, Optional
 
 from llama_index.bridge.pydantic import PrivateAttr
 from llama_index.callbacks.base import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.core.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 
 
 class GooglePaLMEmbedding(BaseEmbedding):
diff --git a/llama_index/embeddings/gradient.py b/llama_index/embeddings/gradient.py
index 21c607dc5912d1767b482c101c637a396f1f2ed4..bc620492ed16d910f8a8cb5d2fff07627447c80c 100644
--- a/llama_index/embeddings/gradient.py
+++ b/llama_index/embeddings/gradient.py
@@ -2,7 +2,7 @@ import logging
 from typing import Any, List, Optional
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
-from llama_index.embeddings.base import (
+from llama_index.core.embeddings.base import (
     DEFAULT_EMBED_BATCH_SIZE,
     BaseEmbedding,
     Embedding,
diff --git a/llama_index/embeddings/huggingface.py b/llama_index/embeddings/huggingface.py
index b9e4ccc03171fdc447a74b812bacf6a62399d91f..b4f348f9da43377449b98782a5c70bf266be4453 100644
--- a/llama_index/embeddings/huggingface.py
+++ b/llama_index/embeddings/huggingface.py
@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Union
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.embeddings.base import (
+from llama_index.core.embeddings.base import (
     DEFAULT_EMBED_BATCH_SIZE,
     BaseEmbedding,
     Embedding,
diff --git a/llama_index/embeddings/huggingface_optimum.py b/llama_index/embeddings/huggingface_optimum.py
index 341f060f993b6340e7ac2277ba354adf4dff4450..73f0a48eb88aa3f9928d968cb6aaea0f09f18243 100644
--- a/llama_index/embeddings/huggingface_optimum.py
+++ b/llama_index/embeddings/huggingface_optimum.py
@@ -2,7 +2,7 @@ from typing import Any, List, Optional
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.core.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 from llama_index.embeddings.huggingface_utils import format_query, format_text
 from llama_index.utils import infer_torch_device
 
diff --git a/llama_index/embeddings/instructor.py b/llama_index/embeddings/instructor.py
index 513ee3fa470c681e34f5b6ea52167fc6f8ba26ed..7cf01c44533f50a045446c019c2c9d763ecbd280 100644
--- a/llama_index/embeddings/instructor.py
+++ b/llama_index/embeddings/instructor.py
@@ -2,7 +2,7 @@ from typing import Any, List, Optional
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.core.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 from llama_index.embeddings.huggingface_utils import (
     DEFAULT_INSTRUCT_MODEL,
     get_query_instruct_for_model_name,
diff --git a/llama_index/embeddings/jinaai.py b/llama_index/embeddings/jinaai.py
index ef07f5da010396998108e02ce1da03e0177476b2..8a4ed52531edb9d6f1fe680ff9269df881d129e5 100644
--- a/llama_index/embeddings/jinaai.py
+++ b/llama_index/embeddings/jinaai.py
@@ -6,7 +6,7 @@ import requests
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks.base import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.core.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 from llama_index.llms.generic_utils import get_from_param_or_env
 
 MAX_BATCH_SIZE = 2048
diff --git a/llama_index/embeddings/langchain.py b/llama_index/embeddings/langchain.py
index 7fda89b84d8a813df5fcd1255d1ad108c4de3e81..2318abe8d710ed4a54e54563c41354ee070f9f02 100644
--- a/llama_index/embeddings/langchain.py
+++ b/llama_index/embeddings/langchain.py
@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, List, Optional
 
 from llama_index.bridge.pydantic import PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.core.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 
 if TYPE_CHECKING:
     from llama_index.bridge.langchain import Embeddings as LCEmbeddings
diff --git a/llama_index/embeddings/mistralai.py b/llama_index/embeddings/mistralai.py
index 2bd444859f280724ca6d118c1b9d085a73bff540..05943cf9fc2597de959b0ed3ddce73772149e22f 100644
--- a/llama_index/embeddings/mistralai.py
+++ b/llama_index/embeddings/mistralai.py
@@ -4,7 +4,7 @@ from typing import Any, List, Optional
 
 from llama_index.bridge.pydantic import PrivateAttr
 from llama_index.callbacks.base import CallbackManager
-from llama_index.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
+from llama_index.core.embeddings.base import DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding
 from llama_index.llms.generic_utils import get_from_param_or_env
 
 
diff --git a/llama_index/embeddings/multi_modal_base.py b/llama_index/embeddings/multi_modal_base.py
index 276063ca07a294f02012a29680b7a66b33de198e..c3adf485f123f85fd9253878081057173632e5f4 100644
--- a/llama_index/embeddings/multi_modal_base.py
+++ b/llama_index/embeddings/multi_modal_base.py
@@ -5,7 +5,7 @@ from abc import abstractmethod
 from typing import Coroutine, List, Tuple
 
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.embeddings.base import (
+from llama_index.core.embeddings.base import (
     BaseEmbedding,
     Embedding,
 )
diff --git a/llama_index/embeddings/text_embeddings_inference.py b/llama_index/embeddings/text_embeddings_inference.py
index 48ffd861a1027d2d41da6b6f1d17b57ac24e592d..ad1a48a2f63d5d61fbff23ceda9c324daada3dc5 100644
--- a/llama_index/embeddings/text_embeddings_inference.py
+++ b/llama_index/embeddings/text_embeddings_inference.py
@@ -2,7 +2,7 @@ from typing import Callable, List, Optional, Union
 
 from llama_index.bridge.pydantic import Field
 from llama_index.callbacks import CallbackManager
-from llama_index.embeddings.base import (
+from llama_index.core.embeddings.base import (
     DEFAULT_EMBED_BATCH_SIZE,
     BaseEmbedding,
     Embedding,
diff --git a/llama_index/evaluation/base.py b/llama_index/evaluation/base.py
index 3f2023a0fcda0f6d2ad4cfa6807acbd9a6b1a58e..9ddf0052122738c7f9e6a912cd671fbd34b82d59 100644
--- a/llama_index/evaluation/base.py
+++ b/llama_index/evaluation/base.py
@@ -4,8 +4,8 @@ from abc import abstractmethod
 from typing import Any, Optional, Sequence
 
 from llama_index.bridge.pydantic import BaseModel, Field
+from llama_index.core.response.schema import Response
 from llama_index.prompts.mixin import PromptMixin, PromptMixinType
-from llama_index.response.schema import Response
 
 
 class EvaluationResult(BaseModel):
diff --git a/llama_index/evaluation/batch_runner.py b/llama_index/evaluation/batch_runner.py
index 306480b711ed4ddc9859d137f63fbec6b5a920dd..4b4cb2e6f917911f714154fd7bea9a1746fe406b 100644
--- a/llama_index/evaluation/batch_runner.py
+++ b/llama_index/evaluation/batch_runner.py
@@ -2,9 +2,9 @@ import asyncio
 from typing import Any, Dict, List, Optional, Sequence, Tuple, cast
 
 from llama_index.async_utils import asyncio_module
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE, Response
 from llama_index.evaluation.base import BaseEvaluator, EvaluationResult
-from llama_index.response.schema import RESPONSE_TYPE, Response
 
 
 async def eval_response_worker(
diff --git a/llama_index/evaluation/benchmarks/beir.py b/llama_index/evaluation/benchmarks/beir.py
index 6bab13b3c35abc7ab65a6355f1db13689263be7b..5751f5123886bad3b48448d974945a3df9c54f99 100644
--- a/llama_index/evaluation/benchmarks/beir.py
+++ b/llama_index/evaluation/benchmarks/beir.py
@@ -4,7 +4,7 @@ from typing import Callable, Dict, List, Optional
 
 import tqdm
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.postprocessor.types import BaseNodePostprocessor
 from llama_index.schema import Document, QueryBundle
 from llama_index.utils import get_cache_dir
diff --git a/llama_index/evaluation/benchmarks/hotpotqa.py b/llama_index/evaluation/benchmarks/hotpotqa.py
index 2d5ff6bb6e371c7eafdacf49f5c714a252113b2f..4e7e2cb019ff361266a68ed038763f000f04a770 100644
--- a/llama_index/evaluation/benchmarks/hotpotqa.py
+++ b/llama_index/evaluation/benchmarks/hotpotqa.py
@@ -9,7 +9,8 @@ from typing import Any, Dict, List, Optional, Tuple
 import requests
 import tqdm
 
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
 from llama_index.schema import NodeWithScore, QueryBundle, TextNode
 from llama_index.utils import get_cache_dir
diff --git a/llama_index/evaluation/eval_utils.py b/llama_index/evaluation/eval_utils.py
index 0d401741e549570fcd988547e08906884c8ff990..f9432d0d64896faf96e043562ea6b3fb4d3152a6 100644
--- a/llama_index/evaluation/eval_utils.py
+++ b/llama_index/evaluation/eval_utils.py
@@ -12,7 +12,7 @@ import numpy as np
 import pandas as pd
 
 from llama_index.async_utils import asyncio_module
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
 from llama_index.evaluation.base import EvaluationResult
 
 
diff --git a/llama_index/evaluation/retrieval/evaluator.py b/llama_index/evaluation/retrieval/evaluator.py
index 7174d80e7c82f3d480c16cce3666454c4b4e4e2a..e8b24d3080ddcc2a1d3f0429567a6129315013bb 100644
--- a/llama_index/evaluation/retrieval/evaluator.py
+++ b/llama_index/evaluation/retrieval/evaluator.py
@@ -3,7 +3,7 @@
 from typing import Any, List, Sequence, Tuple
 
 from llama_index.bridge.pydantic import Field
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.evaluation.retrieval.base import (
     BaseRetrievalEvaluator,
     RetrievalEvalMode,
diff --git a/llama_index/evaluation/semantic_similarity.py b/llama_index/evaluation/semantic_similarity.py
index c77f2fa085bd6a5ac911f5fd56b2a3cc1b0ddac5..393b7866a062238e46c12c2fa0f28d017beb56de 100644
--- a/llama_index/evaluation/semantic_similarity.py
+++ b/llama_index/evaluation/semantic_similarity.py
@@ -1,6 +1,6 @@
 from typing import Any, Callable, Optional, Sequence
 
-from llama_index.embeddings.base import SimilarityMode, similarity
+from llama_index.core.embeddings.base import SimilarityMode, similarity
 from llama_index.evaluation.base import BaseEvaluator, EvaluationResult
 from llama_index.prompts.mixin import PromptDictType
 from llama_index.service_context import ServiceContext
diff --git a/llama_index/indices/base.py b/llama_index/indices/base.py
index be79007aca4f86ce4783b743c1b5e15871689909..c904d657b8d2ce5298998a8419b164f70ddd78d1 100644
--- a/llama_index/indices/base.py
+++ b/llama_index/indices/base.py
@@ -4,7 +4,8 @@ from abc import ABC, abstractmethod
 from typing import Any, Dict, Generic, List, Optional, Sequence, Type, TypeVar, cast
 
 from llama_index.chat_engine.types import BaseChatEngine, ChatMode
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import IndexStruct
 from llama_index.ingestion import run_transformations
 from llama_index.llms.openai import OpenAI
diff --git a/llama_index/indices/base_retriever.py b/llama_index/indices/base_retriever.py
index 22087ac2625a0eb7e8511da52eff235b6a103912..0cad9e778bb1c4411b002bf5c3b05503725d19df 100644
--- a/llama_index/indices/base_retriever.py
+++ b/llama_index/indices/base_retriever.py
@@ -1,5 +1,5 @@
 # for backwards compatibility
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 
 __all__ = [
     "BaseRetriever",
diff --git a/llama_index/indices/composability/graph.py b/llama_index/indices/composability/graph.py
index d7e5e14c3db829c17e20f2c100f1fea955e26f9e..c3e522d6dfd60bd0fcc88a057c476ed0954b6854 100644
--- a/llama_index/indices/composability/graph.py
+++ b/llama_index/indices/composability/graph.py
@@ -2,7 +2,7 @@
 
 from typing import Any, Dict, List, Optional, Sequence, Type, cast
 
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
 from llama_index.data_structs.data_structs import IndexStruct
 from llama_index.indices.base import BaseIndex
 from llama_index.schema import IndexNode, NodeRelationship, ObjectType, RelatedNodeInfo
diff --git a/llama_index/indices/document_summary/base.py b/llama_index/indices/document_summary/base.py
index 1bf97e20ac5ef0db910c0de478191a288b46bbb1..79e67b5063fbf0465ccd494768b584724fd52dc5 100644
--- a/llama_index/indices/document_summary/base.py
+++ b/llama_index/indices/document_summary/base.py
@@ -10,11 +10,11 @@ from collections import defaultdict
 from enum import Enum
 from typing import Any, Dict, Optional, Sequence, Union, cast
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
+from llama_index.core.response.schema import Response
 from llama_index.data_structs.document_summary import IndexDocumentSummary
 from llama_index.indices.base import BaseIndex
 from llama_index.indices.utils import embed_nodes
-from llama_index.response.schema import Response
 from llama_index.response_synthesizers import (
     BaseSynthesizer,
     ResponseMode,
diff --git a/llama_index/indices/document_summary/retrievers.py b/llama_index/indices/document_summary/retrievers.py
index fda5c8a845f730d88172d0dd4f0c6dd9e0c889a0..cbb46d74ab39609e4a9fb975a85e499b9f02f1db 100644
--- a/llama_index/indices/document_summary/retrievers.py
+++ b/llama_index/indices/document_summary/retrievers.py
@@ -8,7 +8,7 @@ import logging
 from typing import Any, Callable, List, Optional
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.document_summary.base import DocumentSummaryIndex
 from llama_index.indices.utils import (
     default_format_node_batch_fn,
diff --git a/llama_index/indices/empty/base.py b/llama_index/indices/empty/base.py
index 6f74184f486a7fb23d96d6046ba91f30a226b551..295a56bb73e5318fee4b06cb3a42a3bea4bd81c5 100644
--- a/llama_index/indices/empty/base.py
+++ b/llama_index/indices/empty/base.py
@@ -7,7 +7,8 @@ pure LLM calls.
 
 from typing import Any, Dict, Optional, Sequence
 
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import EmptyIndexStruct
 from llama_index.indices.base import BaseIndex
 from llama_index.schema import BaseNode
diff --git a/llama_index/indices/empty/retrievers.py b/llama_index/indices/empty/retrievers.py
index e79532bc574f07c20194b6aff7883ab5d8a470a8..19d0eb3db9ab44fc3dae897cec9e0e973aa1cd03 100644
--- a/llama_index/indices/empty/retrievers.py
+++ b/llama_index/indices/empty/retrievers.py
@@ -2,7 +2,7 @@
 from typing import Any, List, Optional
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.empty.base import EmptyIndex
 from llama_index.prompts import BasePromptTemplate
 from llama_index.prompts.default_prompts import DEFAULT_SIMPLE_INPUT_PROMPT
diff --git a/llama_index/indices/keyword_table/base.py b/llama_index/indices/keyword_table/base.py
index 02a031d674a8d5b6537532887da6ca258db4714d..885243c8c316f7d96556e457583c581f3a77dd4c 100644
--- a/llama_index/indices/keyword_table/base.py
+++ b/llama_index/indices/keyword_table/base.py
@@ -13,7 +13,7 @@ from enum import Enum
 from typing import Any, Dict, Optional, Sequence, Set, Union
 
 from llama_index.async_utils import run_async_tasks
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import KeywordTable
 from llama_index.indices.base import BaseIndex
 from llama_index.indices.keyword_table.utils import extract_keywords_given_response
diff --git a/llama_index/indices/keyword_table/rake_base.py b/llama_index/indices/keyword_table/rake_base.py
index b4188e731282216b08bf1a9d28cf43c24e7768a7..5b5a8c1f9667e6850eca23708ffba16ee36a6ef0 100644
--- a/llama_index/indices/keyword_table/rake_base.py
+++ b/llama_index/indices/keyword_table/rake_base.py
@@ -6,7 +6,7 @@ Similar to KeywordTableIndex, but uses RAKE instead of GPT.
 
 from typing import Any, Set, Union
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.keyword_table.base import (
     BaseKeywordTableIndex,
     KeywordTableRetrieverMode,
diff --git a/llama_index/indices/keyword_table/retrievers.py b/llama_index/indices/keyword_table/retrievers.py
index 0d687b2feaf6c792f81c76862857ee03e57c6f40..05480c05121f014e20acf28a03bfd8fc9bb5e463 100644
--- a/llama_index/indices/keyword_table/retrievers.py
+++ b/llama_index/indices/keyword_table/retrievers.py
@@ -5,7 +5,7 @@ from collections import defaultdict
 from typing import Any, Dict, List, Optional
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.keyword_table.base import BaseKeywordTableIndex
 from llama_index.indices.keyword_table.utils import (
     extract_keywords_given_response,
diff --git a/llama_index/indices/keyword_table/simple_base.py b/llama_index/indices/keyword_table/simple_base.py
index f54a57866c8bff67925e0f7a477b5387103f9e82..c296d96336d83836f7a2dca7debf9298dae27a86 100644
--- a/llama_index/indices/keyword_table/simple_base.py
+++ b/llama_index/indices/keyword_table/simple_base.py
@@ -7,7 +7,7 @@ technique that doesn't involve GPT - just uses regex.
 
 from typing import Any, Set, Union
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.keyword_table.base import (
     BaseKeywordTableIndex,
     KeywordTableRetrieverMode,
diff --git a/llama_index/indices/knowledge_graph/base.py b/llama_index/indices/knowledge_graph/base.py
index 00cc76d836d32d0a00d4ad72450237fa46a5499c..eb7dd65c9c94231e4057476b34e48087580d4c52 100644
--- a/llama_index/indices/knowledge_graph/base.py
+++ b/llama_index/indices/knowledge_graph/base.py
@@ -8,7 +8,7 @@ import logging
 from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
 
 from llama_index.constants import GRAPH_STORE_KEY
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import KG
 from llama_index.graph_stores.simple import SimpleGraphStore
 from llama_index.graph_stores.types import GraphStore
diff --git a/llama_index/indices/knowledge_graph/retrievers.py b/llama_index/indices/knowledge_graph/retrievers.py
index 9ac9cc064e8874c4f22b0c3bbb624b7e5633ce22..d7118083a2d3c47be44f3bd393847a72982802a2 100644
--- a/llama_index/indices/knowledge_graph/retrievers.py
+++ b/llama_index/indices/knowledge_graph/retrievers.py
@@ -5,7 +5,7 @@ from enum import Enum
 from typing import Any, Callable, Dict, List, Optional, Set, Tuple
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.keyword_table.utils import extract_keywords_given_response
 from llama_index.indices.knowledge_graph.base import KnowledgeGraphIndex
 from llama_index.indices.query.embedding_utils import get_top_k_embeddings
diff --git a/llama_index/indices/list/base.py b/llama_index/indices/list/base.py
index c4e3321c2e848a12892e2ae5f394957002a8e97d..f09f63d6fed6632d58b1ac5c8f5578c3b997ad9f 100644
--- a/llama_index/indices/list/base.py
+++ b/llama_index/indices/list/base.py
@@ -8,7 +8,7 @@ in sequence in order to answer a given query.
 from enum import Enum
 from typing import Any, Dict, Optional, Sequence, Union
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import IndexList
 from llama_index.indices.base import BaseIndex
 from llama_index.schema import BaseNode
diff --git a/llama_index/indices/list/retrievers.py b/llama_index/indices/list/retrievers.py
index 4f92ee0763fd4e895b1705356efc30eb183a5d18..5548e4fdd1f48bf0e59855ef259f7372f8df7ee2 100644
--- a/llama_index/indices/list/retrievers.py
+++ b/llama_index/indices/list/retrievers.py
@@ -3,7 +3,7 @@ import logging
 from typing import Any, Callable, List, Optional, Tuple
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.list.base import SummaryIndex
 from llama_index.indices.query.embedding_utils import get_top_k_embeddings
 from llama_index.indices.utils import (
diff --git a/llama_index/indices/managed/base.py b/llama_index/indices/managed/base.py
index d192f6d302d5e45127a0356d135c5782f794b539..92d2475ca351381bee80929b5cf8531c712b104e 100644
--- a/llama_index/indices/managed/base.py
+++ b/llama_index/indices/managed/base.py
@@ -6,7 +6,7 @@ An index that that is built on top of a managed service.
 from abc import ABC, abstractmethod
 from typing import Any, Dict, Optional, Sequence, Type
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import IndexDict
 from llama_index.indices.base import BaseIndex, IndexType
 from llama_index.schema import BaseNode, Document
diff --git a/llama_index/indices/managed/colbert_index/base.py b/llama_index/indices/managed/colbert_index/base.py
index e0ad967f1ce3a0f7ab63870aa13895aac83d2011..d808f297abd0c85e5168c5a92f79db734609cc95 100644
--- a/llama_index/indices/managed/colbert_index/base.py
+++ b/llama_index/indices/managed/colbert_index/base.py
@@ -1,6 +1,6 @@
 from typing import Any, Dict, List, Optional, Sequence
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import IndexDict
 from llama_index.indices.base import BaseIndex
 from llama_index.schema import BaseNode, NodeWithScore
diff --git a/llama_index/indices/managed/colbert_index/retriever.py b/llama_index/indices/managed/colbert_index/retriever.py
index 199dfa7860959fb98456093fefb037880484e0eb..c3e0d043123b30fa62080df801e3c1f57202578d 100644
--- a/llama_index/indices/managed/colbert_index/retriever.py
+++ b/llama_index/indices/managed/colbert_index/retriever.py
@@ -2,7 +2,7 @@ from typing import Any, Dict, List, Optional
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.constants import DEFAULT_SIMILARITY_TOP_K
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.schema import NodeWithScore, QueryBundle
 from llama_index.vector_stores.types import MetadataFilters
 
diff --git a/llama_index/indices/managed/vectara/base.py b/llama_index/indices/managed/vectara/base.py
index adc1467f677d830b6015d9098a892c6aa03f8888..7b51e813ec81dfb4f8f1ff4302720e18dc9d373f 100644
--- a/llama_index/indices/managed/vectara/base.py
+++ b/llama_index/indices/managed/vectara/base.py
@@ -13,7 +13,8 @@ from typing import Any, Dict, List, Optional, Sequence, Type
 
 import requests
 
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import IndexDict, IndexStructType
 from llama_index.indices.managed.base import BaseManagedIndex, IndexType
 from llama_index.schema import BaseNode, Document, MetadataMode, TextNode
diff --git a/llama_index/indices/managed/vectara/query.py b/llama_index/indices/managed/vectara/query.py
index faf2e2a5508acffcb000c279c723eb9abcae048d..d958bae313b9b951c0442d241b7d0b0d25cc0daf 100644
--- a/llama_index/indices/managed/vectara/query.py
+++ b/llama_index/indices/managed/vectara/query.py
@@ -2,11 +2,12 @@ from typing import Any, List, Optional
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
+from llama_index.core.response.schema import RESPONSE_TYPE, Response
 from llama_index.indices.managed.vectara.retriever import VectaraRetriever
 from llama_index.postprocessor.types import BaseNodePostprocessor
 from llama_index.prompts.mixin import PromptDictType, PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE, Response
 from llama_index.schema import NodeWithScore, QueryBundle
 
 
diff --git a/llama_index/indices/managed/vectara/retriever.py b/llama_index/indices/managed/vectara/retriever.py
index 93b0927d3d906d0ae81c4e8d1ced0d6edb9db032..fc2fc7741263c2d69ad85380b6d0783173217ae0 100644
--- a/llama_index/indices/managed/vectara/retriever.py
+++ b/llama_index/indices/managed/vectara/retriever.py
@@ -8,7 +8,7 @@ from typing import Any, List, Optional, Tuple
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.constants import DEFAULT_SIMILARITY_TOP_K
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.managed.types import ManagedIndexQueryMode
 from llama_index.indices.managed.vectara.base import VectaraIndex
 from llama_index.schema import NodeWithScore, QueryBundle, TextNode
diff --git a/llama_index/indices/managed/zilliz/base.py b/llama_index/indices/managed/zilliz/base.py
index 484d6069921ea83a1f2cbd9cf381d387f5b9653c..7b31d7d87cc232b1dd883552ea3c357f09b37f45 100644
--- a/llama_index/indices/managed/zilliz/base.py
+++ b/llama_index/indices/managed/zilliz/base.py
@@ -10,7 +10,7 @@ from typing import Any, Dict, Optional, Sequence, Type
 
 import requests
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import IndexDict, IndexStructType
 from llama_index.indices.managed.base import BaseManagedIndex, IndexType
 from llama_index.schema import BaseNode, Document
diff --git a/llama_index/indices/managed/zilliz/retriever.py b/llama_index/indices/managed/zilliz/retriever.py
index b38068e1e83247c68f37d6e4a735c0493735af4c..15cda246a8679a9bc5fa24055e372de94cdb0445 100644
--- a/llama_index/indices/managed/zilliz/retriever.py
+++ b/llama_index/indices/managed/zilliz/retriever.py
@@ -5,7 +5,7 @@ import requests
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.constants import DEFAULT_SIMILARITY_TOP_K
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.managed.zilliz.base import ZillizCloudPipelineIndex
 from llama_index.indices.query.schema import QueryBundle
 from llama_index.schema import NodeWithScore, QueryBundle, TextNode
diff --git a/llama_index/indices/multi_modal/base.py b/llama_index/indices/multi_modal/base.py
index 95048aaefeebd3ace39c50580d1e7d6fa5681d1f..d3ac2d19a1f1bb4564493b4c99d71522b406d599 100644
--- a/llama_index/indices/multi_modal/base.py
+++ b/llama_index/indices/multi_modal/base.py
@@ -6,7 +6,8 @@ An index that that is built on top of multiple vector stores for different modal
 import logging
 from typing import Any, List, Optional, Sequence, cast
 
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import IndexDict, MultiModelIndexDict
 from llama_index.embeddings.multi_modal_base import MultiModalEmbedding
 from llama_index.embeddings.utils import EmbedType, resolve_embed_model
diff --git a/llama_index/indices/multi_modal/retriever.py b/llama_index/indices/multi_modal/retriever.py
index ef36e3795dfd368a09095f82e465177ec9089298..cf3ce560f97b4ccd61219aa42ec5ec6e39e209b2 100644
--- a/llama_index/indices/multi_modal/retriever.py
+++ b/llama_index/indices/multi_modal/retriever.py
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.constants import DEFAULT_SIMILARITY_TOP_K
-from llama_index.core import (
+from llama_index.core.base_multi_modal_retriever import (
     MultiModalRetriever,
 )
 from llama_index.data_structs.data_structs import IndexDict
diff --git a/llama_index/indices/prompt_helper.py b/llama_index/indices/prompt_helper.py
index f2c5ca9efbfbfe2443b11fa38fc8c7a8469966c9..5e6a25bcd147f034461d4edeebccce6c1dbf2aa7 100644
--- a/llama_index/indices/prompt_helper.py
+++ b/llama_index/indices/prompt_helper.py
@@ -15,9 +15,9 @@ from typing import Callable, List, Optional, Sequence
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llm_predictor.base import LLMMetadata
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import ChatMessage
 from llama_index.node_parser.text.token import TokenTextSplitter
 from llama_index.node_parser.text.utils import truncate_text
 from llama_index.prompts import (
diff --git a/llama_index/indices/query/base.py b/llama_index/indices/query/base.py
index 87d179f262bea1a7d156bbff7cd6627dcc22007d..f6fc5a3c1d619779bb402f8c825f7bea324d1f87 100644
--- a/llama_index/indices/query/base.py
+++ b/llama_index/indices/query/base.py
@@ -1,5 +1,5 @@
 # for backwards compatibility
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
 
 __all__ = [
     "BaseQueryEngine",
diff --git a/llama_index/indices/query/embedding_utils.py b/llama_index/indices/query/embedding_utils.py
index 22e80dcad2da6eb57c12ffd0dea33187d1850113..40031f9cfcc6b6d4ce504db2fc281db54d17e41a 100644
--- a/llama_index/indices/query/embedding_utils.py
+++ b/llama_index/indices/query/embedding_utils.py
@@ -5,7 +5,7 @@ from typing import Any, Callable, List, Optional, Tuple
 
 import numpy as np
 
-from llama_index.embeddings.base import similarity as default_similarity_fn
+from llama_index.core.embeddings.base import similarity as default_similarity_fn
 from llama_index.vector_stores.types import VectorStoreQueryMode
 
 
diff --git a/llama_index/indices/query/query_transform/base.py b/llama_index/indices/query/query_transform/base.py
index bb4ee668feae253fabd94603fc3e51c7c1d310a1..c69d2cc3160aa211d8af5029e1eaa8ac6965714a 100644
--- a/llama_index/indices/query/query_transform/base.py
+++ b/llama_index/indices/query/query_transform/base.py
@@ -4,6 +4,7 @@ import dataclasses
 from abc import abstractmethod
 from typing import Dict, Optional, cast
 
+from llama_index.core.response.schema import Response
 from llama_index.indices.query.query_transform.prompts import (
     DEFAULT_DECOMPOSE_QUERY_TRANSFORM_PROMPT,
     DEFAULT_IMAGE_OUTPUT_PROMPT,
@@ -17,7 +18,6 @@ from llama_index.llms.utils import resolve_llm
 from llama_index.prompts import BasePromptTemplate
 from llama_index.prompts.default_prompts import DEFAULT_HYDE_PROMPT
 from llama_index.prompts.mixin import PromptDictType, PromptMixin, PromptMixinType
-from llama_index.response.schema import Response
 from llama_index.schema import QueryBundle, QueryType
 from llama_index.utils import print_text
 
diff --git a/llama_index/indices/struct_store/json_query.py b/llama_index/indices/struct_store/json_query.py
index 353aff77edbcc1f730bbb7bb7e500b8ebbf3d424..bb7e389dbde22433aea3e6d2b4b5c3ce44d577c3 100644
--- a/llama_index/indices/struct_store/json_query.py
+++ b/llama_index/indices/struct_store/json_query.py
@@ -2,12 +2,12 @@ import json
 import logging
 from typing import Any, Callable, Dict, List, Optional, Union
 
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import Response
 from llama_index.prompts import BasePromptTemplate, PromptTemplate
 from llama_index.prompts.default_prompts import DEFAULT_JSON_PATH_PROMPT
 from llama_index.prompts.mixin import PromptDictType, PromptMixinType
 from llama_index.prompts.prompt_type import PromptType
-from llama_index.response.schema import Response
 from llama_index.schema import QueryBundle
 from llama_index.service_context import ServiceContext
 from llama_index.utils import print_text
diff --git a/llama_index/indices/struct_store/pandas.py b/llama_index/indices/struct_store/pandas.py
index 129b6e927a5081320b8f251a875068e45cb0cfc1..85109e17db801359c161aaa0a02cb087a691f381 100644
--- a/llama_index/indices/struct_store/pandas.py
+++ b/llama_index/indices/struct_store/pandas.py
@@ -5,7 +5,8 @@ from typing import Any, Optional, Sequence
 
 import pandas as pd
 
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.table import PandasStructTable
 from llama_index.indices.struct_store.base import BaseStructStoreIndex
 from llama_index.schema import BaseNode
diff --git a/llama_index/indices/struct_store/sql.py b/llama_index/indices/struct_store/sql.py
index f59127669c94573dd69801521308f13c9a98b34b..592f19649634926adbd1a700843acbd7bcd25956 100644
--- a/llama_index/indices/struct_store/sql.py
+++ b/llama_index/indices/struct_store/sql.py
@@ -5,7 +5,8 @@ from typing import Any, Optional, Sequence, Union
 
 from sqlalchemy import Table
 
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.table import SQLStructTable
 from llama_index.indices.common.struct_store.schema import SQLContextContainer
 from llama_index.indices.common.struct_store.sql import SQLStructDatapointExtractor
diff --git a/llama_index/indices/struct_store/sql_query.py b/llama_index/indices/struct_store/sql_query.py
index 53cf0d933e4b1d0cf5cd513a3362d532f7494985..b9d9b4f73e2098360b07ae54e701742dc5ab5df7 100644
--- a/llama_index/indices/struct_store/sql_query.py
+++ b/llama_index/indices/struct_store/sql_query.py
@@ -5,7 +5,8 @@ from typing import Any, Dict, List, Optional, Tuple, Union, cast
 
 from sqlalchemy import Table
 
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import Response
 from llama_index.indices.struct_store.container_builder import (
     SQLContextContainerBuilder,
 )
@@ -20,7 +21,6 @@ from llama_index.prompts.default_prompts import (
 )
 from llama_index.prompts.mixin import PromptDictType, PromptMixinType
 from llama_index.prompts.prompt_type import PromptType
-from llama_index.response.schema import Response
 from llama_index.response_synthesizers import (
     get_response_synthesizer,
 )
diff --git a/llama_index/indices/struct_store/sql_retriever.py b/llama_index/indices/struct_store/sql_retriever.py
index a901603640f4cae25106722f3464721e0a38ff81..0971cb6c6166549dfe0e58a9e3005e1ca45b7b9b 100644
--- a/llama_index/indices/struct_store/sql_retriever.py
+++ b/llama_index/indices/struct_store/sql_retriever.py
@@ -8,7 +8,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
 from sqlalchemy import Table
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.embeddings.base import BaseEmbedding
 from llama_index.objects.base import ObjectRetriever
 from llama_index.objects.table_node_mapping import SQLTableSchema
diff --git a/llama_index/indices/tree/all_leaf_retriever.py b/llama_index/indices/tree/all_leaf_retriever.py
index db831f073c1776b8e3bb8e0445d0ea0e366d08d3..8de60a921ea099e685007f8e36fb25015e5bf314 100644
--- a/llama_index/indices/tree/all_leaf_retriever.py
+++ b/llama_index/indices/tree/all_leaf_retriever.py
@@ -4,7 +4,7 @@ import logging
 from typing import Any, List, Optional, cast
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import IndexGraph
 from llama_index.indices.tree.base import TreeIndex
 from llama_index.indices.utils import get_sorted_node_list
diff --git a/llama_index/indices/tree/base.py b/llama_index/indices/tree/base.py
index c1365f09db63bc13850cf66f4d9945b025ce414a..4fa5a020a0d43b33023e5649cbb117a122c6c5f4 100644
--- a/llama_index/indices/tree/base.py
+++ b/llama_index/indices/tree/base.py
@@ -3,7 +3,7 @@
 from enum import Enum
 from typing import Any, Dict, Optional, Sequence, Union
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 
 # from llama_index.data_structs.data_structs import IndexGraph
 from llama_index.data_structs.data_structs import IndexGraph
diff --git a/llama_index/indices/tree/select_leaf_retriever.py b/llama_index/indices/tree/select_leaf_retriever.py
index a61a3e5ae99a158dcc345f914ac5c1025cc2b21e..d4ce25fb9700346e88dfe542440e0e7e2c1ceeec 100644
--- a/llama_index/indices/tree/select_leaf_retriever.py
+++ b/llama_index/indices/tree/select_leaf_retriever.py
@@ -4,7 +4,8 @@ import logging
 from typing import Any, Dict, List, Optional, cast
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
+from llama_index.core.response.schema import Response
 from llama_index.indices.query.schema import QueryBundle
 from llama_index.indices.tree.base import TreeIndex
 from llama_index.indices.tree.utils import get_numbered_text_from_nodes
@@ -19,7 +20,6 @@ from llama_index.prompts.default_prompts import (
     DEFAULT_QUERY_PROMPT_MULTIPLE,
     DEFAULT_TEXT_QA_PROMPT,
 )
-from llama_index.response.schema import Response
 from llama_index.response_synthesizers import get_response_synthesizer
 from llama_index.schema import BaseNode, MetadataMode, NodeWithScore, QueryBundle
 from llama_index.utils import print_text, truncate_text
diff --git a/llama_index/indices/tree/tree_root_retriever.py b/llama_index/indices/tree/tree_root_retriever.py
index 58b2ef0cb97a4be845a49659a7dd4f0f1eb8b55f..fe581e3cc36db75434c7bd6b920a0dbfe35fdff1 100644
--- a/llama_index/indices/tree/tree_root_retriever.py
+++ b/llama_index/indices/tree/tree_root_retriever.py
@@ -3,7 +3,7 @@ import logging
 from typing import Any, List, Optional
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.query.schema import QueryBundle
 from llama_index.indices.tree.base import TreeIndex
 from llama_index.indices.utils import get_sorted_node_list
diff --git a/llama_index/indices/vector_store/base.py b/llama_index/indices/vector_store/base.py
index 98a7e32a634ef15dcaaaef52f4211c7070f9a349..918cbc41bfaf1b0c45a6a8786c804943326af299 100644
--- a/llama_index/indices/vector_store/base.py
+++ b/llama_index/indices/vector_store/base.py
@@ -7,7 +7,7 @@ import logging
 from typing import Any, Dict, List, Optional, Sequence
 
 from llama_index.async_utils import run_async_tasks
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import IndexDict
 from llama_index.indices.base import BaseIndex
 from llama_index.indices.utils import async_embed_nodes, embed_nodes
diff --git a/llama_index/indices/vector_store/retrievers/auto_retriever/auto_retriever.py b/llama_index/indices/vector_store/retrievers/auto_retriever/auto_retriever.py
index 9b8db1169ec3df4d8c0d216127f891a5704f2efd..0405db29f5075bd3ebf56bcd428dc7e2db22d0d2 100644
--- a/llama_index/indices/vector_store/retrievers/auto_retriever/auto_retriever.py
+++ b/llama_index/indices/vector_store/retrievers/auto_retriever/auto_retriever.py
@@ -4,7 +4,7 @@ from typing import Any, List, Optional, Tuple, cast
 from llama_index.bridge.pydantic import BaseModel
 from llama_index.callbacks.base import CallbackManager
 from llama_index.constants import DEFAULT_SIMILARITY_TOP_K
-from llama_index.core import BaseAutoRetriever
+from llama_index.core.base_auto_retriever import BaseAutoRetriever
 from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.vector_store.base import VectorStoreIndex
 from llama_index.indices.vector_store.retrievers import VectorIndexRetriever
diff --git a/llama_index/indices/vector_store/retrievers/retriever.py b/llama_index/indices/vector_store/retrievers/retriever.py
index 2be7db0d51cb5ebb3c6f452c2265d5b6b25608c7..83b0974968c32caa255d8ad4ff9e87906dac4356 100644
--- a/llama_index/indices/vector_store/retrievers/retriever.py
+++ b/llama_index/indices/vector_store/retrievers/retriever.py
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.constants import DEFAULT_SIMILARITY_TOP_K
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.data_structs.data_structs import IndexDict
 from llama_index.indices.utils import log_vector_store_query_result
 from llama_index.indices.vector_store.base import VectorStoreIndex
diff --git a/llama_index/langchain_helpers/agents/tools.py b/llama_index/langchain_helpers/agents/tools.py
index 01801486dfe48cafced23ad1a035884535718d40..f85853d1dd24a2183781663461828bd5a94a0df7 100644
--- a/llama_index/langchain_helpers/agents/tools.py
+++ b/llama_index/langchain_helpers/agents/tools.py
@@ -4,8 +4,8 @@ from typing import Any, Dict, List
 
 from llama_index.bridge.langchain import BaseTool
 from llama_index.bridge.pydantic import BaseModel, Field
-from llama_index.core import BaseQueryEngine
-from llama_index.response.schema import RESPONSE_TYPE
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE
 from llama_index.schema import TextNode
 
 
diff --git a/llama_index/llama_dataset/base.py b/llama_index/llama_dataset/base.py
index 0d0c0472b9b97772cb2392518fc3ff49410fbad7..a7fc03b844d87482a9af6a07d49614ff84d1babe 100644
--- a/llama_index/llama_dataset/base.py
+++ b/llama_index/llama_dataset/base.py
@@ -11,7 +11,7 @@ from pandas import DataFrame as PandasDataFrame
 
 from llama_index.async_utils import asyncio_module
 from llama_index.bridge.pydantic import BaseModel, Field, PrivateAttr
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
 from llama_index.evaluation import BaseEvaluator
 
 PredictorType = Union[BaseQueryEngine, BaseEvaluator]
diff --git a/llama_index/llama_dataset/generator.py b/llama_index/llama_dataset/generator.py
index 6adb38213d89062f3ebc250b2e48def91ae9699b..e0085a8853444b3cfe09902632b34466d9072057 100644
--- a/llama_index/llama_dataset/generator.py
+++ b/llama_index/llama_dataset/generator.py
@@ -7,6 +7,7 @@ from typing import List
 
 from llama_index import Document, ServiceContext, SummaryIndex
 from llama_index.async_utils import DEFAULT_NUM_WORKERS, run_jobs
+from llama_index.core.response.schema import RESPONSE_TYPE
 from llama_index.ingestion import run_transformations
 from llama_index.llama_dataset import (
     CreatedBy,
@@ -18,7 +19,6 @@ from llama_index.postprocessor.node import KeywordNodePostprocessor
 from llama_index.prompts.base import BasePromptTemplate, PromptTemplate
 from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT
 from llama_index.prompts.mixin import PromptDictType, PromptMixin, PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE
 from llama_index.schema import BaseNode, MetadataMode, NodeWithScore
 
 DEFAULT_QUESTION_GENERATION_PROMPT = """\
diff --git a/llama_index/llama_dataset/rag.py b/llama_index/llama_dataset/rag.py
index 5e9c897f48373e20cd4d84e31e56e3100a02bbf6..3a41827676a0286dc8e51e173ebf68e86fe2e3bc 100644
--- a/llama_index/llama_dataset/rag.py
+++ b/llama_index/llama_dataset/rag.py
@@ -7,7 +7,7 @@ from typing import List, Optional
 from pandas import DataFrame as PandasDataFrame
 
 from llama_index.bridge.pydantic import Field
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
 from llama_index.llama_dataset.base import (
     BaseLlamaDataExample,
     BaseLlamaDataset,
diff --git a/llama_index/llm_predictor/base.py b/llama_index/llm_predictor/base.py
index d807d221f6df74806b2366ee991adb344c61f12c..0b1ab16938e858fb0aed7fc649fa05243099dbc2 100644
--- a/llama_index/llm_predictor/base.py
+++ b/llama_index/llm_predictor/base.py
@@ -10,6 +10,11 @@ from typing_extensions import Self
 from llama_index.bridge.pydantic import BaseModel, PrivateAttr
 from llama_index.callbacks.base import CallbackManager
 from llama_index.callbacks.schema import CBEventType, EventPayload
+from llama_index.core.llms.types import (
+    ChatMessage,
+    LLMMetadata,
+    MessageRole,
+)
 from llama_index.llms.llm import (
     LLM,
     astream_chat_response_to_tokens,
@@ -17,11 +22,6 @@ from llama_index.llms.llm import (
     stream_chat_response_to_tokens,
     stream_completion_response_to_tokens,
 )
-from llama_index.llms.types import (
-    ChatMessage,
-    LLMMetadata,
-    MessageRole,
-)
 from llama_index.llms.utils import LLMType, resolve_llm
 from llama_index.prompts.base import BasePromptTemplate, PromptTemplate
 from llama_index.schema import BaseComponent
diff --git a/llama_index/llm_predictor/mock.py b/llama_index/llm_predictor/mock.py
index d3a971f18ec54175e8a6ff272d8134fe12bb7347..dc005201defafe69ec7d0bf673f531efd75888a7 100644
--- a/llama_index/llm_predictor/mock.py
+++ b/llama_index/llm_predictor/mock.py
@@ -6,9 +6,9 @@ from deprecated import deprecated
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks.base import CallbackManager
 from llama_index.constants import DEFAULT_NUM_OUTPUTS
+from llama_index.core.llms.types import LLMMetadata
 from llama_index.llm_predictor.base import BaseLLMPredictor
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import LLMMetadata
 from llama_index.prompts.base import BasePromptTemplate
 from llama_index.prompts.prompt_type import PromptType
 from llama_index.token_counter.utils import (
diff --git a/llama_index/llms/__init__.py b/llama_index/llms/__init__.py
index bf4d304e502fc10ace28c9aae22ebe9d6ad042da..6a1aa18caca592fe40c6412eb608985ab28ef801 100644
--- a/llama_index/llms/__init__.py
+++ b/llama_index/llms/__init__.py
@@ -1,3 +1,14 @@
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
 from llama_index.llms.ai21 import AI21
 from llama_index.llms.anthropic import Anthropic
 from llama_index.llms.anyscale import Anyscale
@@ -30,17 +41,6 @@ from llama_index.llms.perplexity import Perplexity
 from llama_index.llms.portkey import Portkey
 from llama_index.llms.predibase import PredibaseLLM
 from llama_index.llms.replicate import Replicate
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-    CompletionResponseGen,
-    LLMMetadata,
-    MessageRole,
-)
 from llama_index.llms.vertex import Vertex
 from llama_index.llms.vllm import Vllm, VllmServer
 from llama_index.llms.watsonx import WatsonX
diff --git a/llama_index/llms/ai21.py b/llama_index/llms/ai21.py
index 0ed8216b691737a7a1c619621c8dd459391901f7..860e360345a6abfab77068c1713a34acb473ab44 100644
--- a/llama_index/llms/ai21.py
+++ b/llama_index/llms/ai21.py
@@ -2,14 +2,7 @@ from typing import Any, Callable, Dict, Optional, Sequence
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.llms.ai21_utils import ai21_model_to_context_size
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.generic_utils import (
-    completion_to_chat_decorator,
-    get_from_param_or_env,
-)
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseGen,
@@ -17,6 +10,13 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.ai21_utils import ai21_model_to_context_size
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.custom import CustomLLM
+from llama_index.llms.generic_utils import (
+    completion_to_chat_decorator,
+    get_from_param_or_env,
+)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 
diff --git a/llama_index/llms/anthropic.py b/llama_index/llms/anthropic.py
index 86ceff3a58b42413fcc9a7ed6fc00fa732dbd3a5..5cbf2ca487b32b8fac02a6f067777bdd163ca36f 100644
--- a/llama_index/llms/anthropic.py
+++ b/llama_index/llms/anthropic.py
@@ -3,6 +3,17 @@ from typing import Any, Callable, Dict, Optional, Sequence
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_TEMPERATURE
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
 from llama_index.llms.anthropic_utils import (
     anthropic_modelname_to_contextsize,
     messages_to_anthropic_prompt,
@@ -18,17 +29,6 @@ from llama_index.llms.generic_utils import (
     stream_chat_to_completion_decorator,
 )
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-    CompletionResponseGen,
-    LLMMetadata,
-    MessageRole,
-)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 DEFAULT_ANTHROPIC_MODEL = "claude-2"
diff --git a/llama_index/llms/anthropic_utils.py b/llama_index/llms/anthropic_utils.py
index f0904bd73aace13ec7bc758aefb0463062183fb6..eb2eb23fbc772368dd69af1cb60da89afcca7d4d 100644
--- a/llama_index/llms/anthropic_utils.py
+++ b/llama_index/llms/anthropic_utils.py
@@ -1,6 +1,6 @@
 from typing import Dict, Sequence
 
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 
 HUMAN_PREFIX = "\n\nHuman:"
 ASSISTANT_PREFIX = "\n\nAssistant:"
diff --git a/llama_index/llms/anyscale.py b/llama_index/llms/anyscale.py
index d9404326d11dd3fc58374bbc50544908c2152ad3..d17f86ec62f515d40bc7e0011c27b7e045227aa9 100644
--- a/llama_index/llms/anyscale.py
+++ b/llama_index/llms/anyscale.py
@@ -2,12 +2,12 @@ from typing import Any, Callable, Dict, Optional, Sequence
 
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_NUM_OUTPUTS, DEFAULT_TEMPERATURE
+from llama_index.core.llms.types import ChatMessage, LLMMetadata
 from llama_index.llms.anyscale_utils import (
     anyscale_modelname_to_contextsize,
 )
 from llama_index.llms.generic_utils import get_from_param_or_env
 from llama_index.llms.openai import OpenAI
-from llama_index.llms.types import ChatMessage, LLMMetadata
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 DEFAULT_API_BASE = "https://api.endpoints.anyscale.com/v1"
diff --git a/llama_index/llms/anyscale_utils.py b/llama_index/llms/anyscale_utils.py
index d86bbf3004fac8f84b29c66e7634bc2b5e7ad71d..b82a1c3bb3c5806b05d360a540623757cdf68339 100644
--- a/llama_index/llms/anyscale_utils.py
+++ b/llama_index/llms/anyscale_utils.py
@@ -1,6 +1,6 @@
 from typing import Any, Dict, List, Sequence
 
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 
 LLAMA_MODELS = {
     "meta-llama/Llama-2-7b-chat-hf": 4096,
diff --git a/llama_index/llms/azure_openai.py b/llama_index/llms/azure_openai.py
index 137f78f862791a3b4aff365dc867bd290695c186..8caa77f5640da9039e7a11b50613aeb958a896fe 100644
--- a/llama_index/llms/azure_openai.py
+++ b/llama_index/llms/azure_openai.py
@@ -6,13 +6,13 @@ from openai import AzureOpenAI as SyncAzureOpenAI
 
 from llama_index.bridge.pydantic import Field, PrivateAttr, root_validator
 from llama_index.callbacks import CallbackManager
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.generic_utils import get_from_param_or_env
 from llama_index.llms.openai import OpenAI
 from llama_index.llms.openai_utils import (
     refresh_openai_azuread_token,
     resolve_from_aliases,
 )
-from llama_index.llms.types import ChatMessage
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 
diff --git a/llama_index/llms/base.py b/llama_index/llms/base.py
index 734143046f16ca3d73b2ec6aab845773d72e6c7d..e3a0b1b3170f187ca7b688bfe6505ca1517ff5ec 100644
--- a/llama_index/llms/base.py
+++ b/llama_index/llms/base.py
@@ -12,7 +12,7 @@ from typing import (
 
 from llama_index.bridge.pydantic import Field, validator
 from llama_index.callbacks import CallbackManager, CBEventType, EventPayload
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
diff --git a/llama_index/llms/bedrock.py b/llama_index/llms/bedrock.py
index c6c7d4e6f1460f1db5e065517f68d203e061e5e0..b76d19b91f3e413bebc10da7d6d276c75a8cf1cd 100644
--- a/llama_index/llms/bedrock.py
+++ b/llama_index/llms/bedrock.py
@@ -3,6 +3,16 @@ from typing import Any, Callable, Dict, Optional, Sequence
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+)
 from llama_index.llms.base import (
     llm_chat_callback,
     llm_completion_callback,
@@ -20,16 +30,6 @@ from llama_index.llms.generic_utils import (
     stream_completion_response_to_chat_response,
 )
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-    CompletionResponseGen,
-    LLMMetadata,
-)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 
diff --git a/llama_index/llms/bedrock_utils.py b/llama_index/llms/bedrock_utils.py
index a43a4a3c41ed09767d64670cf9e9121492f3077a..cf8e9a05c8f3ad455cfa3afc1775a88203f8cf78 100644
--- a/llama_index/llms/bedrock_utils.py
+++ b/llama_index/llms/bedrock_utils.py
@@ -10,6 +10,7 @@ from tenacity import (
     wait_exponential,
 )
 
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.anthropic_utils import messages_to_anthropic_prompt
 from llama_index.llms.generic_utils import (
     prompt_to_messages,
@@ -20,7 +21,6 @@ from llama_index.llms.llama_utils import (
 from llama_index.llms.llama_utils import (
     messages_to_prompt as messages_to_llama_prompt,
 )
-from llama_index.llms.types import ChatMessage
 
 HUMAN_PREFIX = "\n\nHuman:"
 ASSISTANT_PREFIX = "\n\nAssistant:"
diff --git a/llama_index/llms/clarifai.py b/llama_index/llms/clarifai.py
index 3d821023d56613e915f84b5b5f237d55e5bdf3ea..88950cc0fcd094dc808f2b8a5dcd1f5f18a203b7 100644
--- a/llama_index/llms/clarifai.py
+++ b/llama_index/llms/clarifai.py
@@ -2,12 +2,7 @@ from typing import Any, Callable, Dict, Optional, Sequence
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.llms.base import (
-    llm_chat_callback,
-    llm_completion_callback,
-)
-from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -17,6 +12,11 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import (
+    llm_chat_callback,
+    llm_completion_callback,
+)
+from llama_index.llms.llm import LLM
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 EXAMPLE_URL = "https://clarifai.com/anthropic/completion/models/claude-v2"
diff --git a/llama_index/llms/cohere.py b/llama_index/llms/cohere.py
index 2383a2eae0ed85a370058e8c72f6739c9484b02b..d83d6a39e23819eb16ec6ff73688f44cfd5f868a 100644
--- a/llama_index/llms/cohere.py
+++ b/llama_index/llms/cohere.py
@@ -3,6 +3,17 @@ from typing import Any, Callable, Dict, Optional, Sequence
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
 from llama_index.llms.base import (
     llm_chat_callback,
     llm_completion_callback,
@@ -15,17 +26,6 @@ from llama_index.llms.cohere_utils import (
     messages_to_cohere_history,
 )
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-    CompletionResponseGen,
-    LLMMetadata,
-    MessageRole,
-)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 
diff --git a/llama_index/llms/cohere_utils.py b/llama_index/llms/cohere_utils.py
index 292102f51016c9c92e48b3d437270d7f7c4f2cdb..421d9037ab0d5528fd02b6183237484d6a83f774 100644
--- a/llama_index/llms/cohere_utils.py
+++ b/llama_index/llms/cohere_utils.py
@@ -9,7 +9,7 @@ from tenacity import (
     wait_exponential,
 )
 
-from llama_index.llms.types import ChatMessage
+from llama_index.core.llms.types import ChatMessage
 
 COMMAND_MODELS = {
     "command": 4096,
diff --git a/llama_index/llms/custom.py b/llama_index/llms/custom.py
index 48eee2aee4368fbc1e8447660ed247f4fb4ea477..516a5e08c5c05df0249b8c49187075dbe8a1f690 100644
--- a/llama_index/llms/custom.py
+++ b/llama_index/llms/custom.py
@@ -1,5 +1,13 @@
 from typing import Any, Sequence
 
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+)
 from llama_index.llms.base import (
     llm_chat_callback,
     llm_completion_callback,
@@ -9,14 +17,6 @@ from llama_index.llms.generic_utils import (
     stream_completion_to_chat_decorator,
 )
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-)
 
 
 class CustomLLM(LLM):
diff --git a/llama_index/llms/everlyai.py b/llama_index/llms/everlyai.py
index 708b801db46579e53729b24c4a7ec084a2f4d196..211ff729c545ffdaa665d19d1976f3fe15294a84 100644
--- a/llama_index/llms/everlyai.py
+++ b/llama_index/llms/everlyai.py
@@ -2,10 +2,10 @@ from typing import Any, Callable, Dict, Optional, Sequence
 
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_NUM_OUTPUTS, DEFAULT_TEMPERATURE
+from llama_index.core.llms.types import ChatMessage, LLMMetadata
 from llama_index.llms.everlyai_utils import everlyai_modelname_to_contextsize
 from llama_index.llms.generic_utils import get_from_param_or_env
 from llama_index.llms.openai import OpenAI
-from llama_index.llms.types import ChatMessage, LLMMetadata
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 EVERLYAI_API_BASE = "https://everlyai.xyz/hosted"
diff --git a/llama_index/llms/gemini.py b/llama_index/llms/gemini.py
index 54195e44173a1e777ac9a203aac5579d17ae5203..57eaa8d1db8f7602cebcd7b09638a5f925f8177a 100644
--- a/llama_index/llms/gemini.py
+++ b/llama_index/llms/gemini.py
@@ -6,6 +6,14 @@ from typing import Any, Dict, Optional, Sequence
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_NUM_OUTPUTS, DEFAULT_TEMPERATURE
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseGen,
+    LLMMetadata,
+)
 from llama_index.llms.base import (
     llm_chat_callback,
     llm_completion_callback,
@@ -18,14 +26,6 @@ from llama_index.llms.gemini_utils import (
     completion_from_gemini_response,
     merge_neighboring_same_role_messages,
 )
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseGen,
-    LLMMetadata,
-)
 
 if typing.TYPE_CHECKING:
     import google.generativeai as genai
diff --git a/llama_index/llms/gemini_utils.py b/llama_index/llms/gemini_utils.py
index f235a01345c7980cdb400c3304230f7c293b15a8..b19f9ee1947db7c2363e1cb006daf265fec880cb 100644
--- a/llama_index/llms/gemini_utils.py
+++ b/llama_index/llms/gemini_utils.py
@@ -1,12 +1,12 @@
 import typing
 from typing import Sequence, Union
 
+from llama_index.core.llms.types import MessageRole
 from llama_index.llms.base import (
     ChatMessage,
     ChatResponse,
     CompletionResponse,
 )
-from llama_index.llms.types import MessageRole
 
 if typing.TYPE_CHECKING:
     import google.ai.generativelanguage as glm
diff --git a/llama_index/llms/generic_utils.py b/llama_index/llms/generic_utils.py
index 3ad12c0c2bda0a17666808b8db949a1fea547df6..3be36a2677cdf7a3e1d25301d0b60ffe39f8aa04 100644
--- a/llama_index/llms/generic_utils.py
+++ b/llama_index/llms/generic_utils.py
@@ -1,7 +1,7 @@
 import os
 from typing import Any, Awaitable, Callable, List, Optional, Sequence
 
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
diff --git a/llama_index/llms/gradient.py b/llama_index/llms/gradient.py
index 6cc7548931e5320f5c259c06d16fff3c29f32ff2..9928590587bdc0a6295846b676fc81f49cb52a57 100644
--- a/llama_index/llms/gradient.py
+++ b/llama_index/llms/gradient.py
@@ -5,14 +5,14 @@ from typing_extensions import override
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_NUM_OUTPUTS
-from llama_index.llms.base import llm_completion_callback
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     CompletionResponse,
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_completion_callback
+from llama_index.llms.custom import CustomLLM
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 
diff --git a/llama_index/llms/huggingface.py b/llama_index/llms/huggingface.py
index 4369315392c09605ec0ab5c8da8c6016fc27c5cd..bb338a1f25567efadd7b7eed76f34a36c7869df4 100644
--- a/llama_index/llms/huggingface.py
+++ b/llama_index/llms/huggingface.py
@@ -8,6 +8,17 @@ from llama_index.constants import (
     DEFAULT_CONTEXT_WINDOW,
     DEFAULT_NUM_OUTPUTS,
 )
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
 from llama_index.llms.base import (
     llm_chat_callback,
     llm_completion_callback,
@@ -20,17 +31,6 @@ from llama_index.llms.generic_utils import (
 from llama_index.llms.generic_utils import (
     messages_to_prompt as generic_messages_to_prompt,
 )
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-    CompletionResponseGen,
-    LLMMetadata,
-    MessageRole,
-)
 from llama_index.prompts.base import PromptTemplate
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
diff --git a/llama_index/llms/konko.py b/llama_index/llms/konko.py
index ecb0562585fe80a02ee86ff5b71e51d05bd03123..3ab7cd2bdb180d156bc1239b667a86b7a0a21923 100644
--- a/llama_index/llms/konko.py
+++ b/llama_index/llms/konko.py
@@ -3,6 +3,16 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Sequence
 from llama_index.bridge.pydantic import Field
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_NUM_OUTPUTS, DEFAULT_TEMPERATURE
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+)
 from llama_index.llms.base import llm_chat_callback, llm_completion_callback
 from llama_index.llms.generic_utils import (
     achat_to_completion_decorator,
@@ -24,16 +34,6 @@ from llama_index.llms.konko_utils import (
     to_openai_message_dicts,
 )
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-    CompletionResponseGen,
-    LLMMetadata,
-)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 DEFAULT_KONKO_MODEL = "meta-llama/Llama-2-13b-chat-hf"
diff --git a/llama_index/llms/konko_utils.py b/llama_index/llms/konko_utils.py
index a097aab4cb9f97d9a64a173174de225c34ab2d30..c285e30e9e0bf5f1ddf22a43ab1ef20f1ad2d383 100644
--- a/llama_index/llms/konko_utils.py
+++ b/llama_index/llms/konko_utils.py
@@ -11,8 +11,8 @@ from tenacity import (
 )
 
 from llama_index.bridge.pydantic import BaseModel
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.generic_utils import get_from_param_or_env
-from llama_index.llms.types import ChatMessage
 
 DEFAULT_KONKO_API_TYPE = "open_ai"
 DEFAULT_KONKO_API_BASE = "https://api.konko.ai/v1"
diff --git a/llama_index/llms/langchain.py b/llama_index/llms/langchain.py
index 56b093759997a458d309890aebee03b6140c61e7..873ee9ab29760ab223c81a8ebf51457f38b7da3a 100644
--- a/llama_index/llms/langchain.py
+++ b/llama_index/llms/langchain.py
@@ -6,13 +6,7 @@ if TYPE_CHECKING:
 
 from llama_index.bridge.pydantic import PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.generic_utils import (
-    completion_response_to_chat_response,
-    stream_completion_response_to_chat_response,
-)
-from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -22,6 +16,12 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.generic_utils import (
+    completion_response_to_chat_response,
+    stream_completion_response_to_chat_response,
+)
+from llama_index.llms.llm import LLM
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 
diff --git a/llama_index/llms/langchain_utils.py b/llama_index/llms/langchain_utils.py
index accc029b082b3b74e0ddac719d50acc1a553ab5c..90fefcd77455560f176784ccfba05741cc9a0ab6 100644
--- a/llama_index/llms/langchain_utils.py
+++ b/llama_index/llms/langchain_utils.py
@@ -15,9 +15,9 @@ from llama_index.bridge.langchain import (
 )
 from llama_index.bridge.langchain import BaseMessage as LCMessage
 from llama_index.constants import AI21_J2_CONTEXT_WINDOW, COHERE_CONTEXT_WINDOW
+from llama_index.core.llms.types import ChatMessage, LLMMetadata, MessageRole
 from llama_index.llms.anyscale_utils import anyscale_modelname_to_contextsize
 from llama_index.llms.openai_utils import openai_modelname_to_contextsize
-from llama_index.llms.types import ChatMessage, LLMMetadata, MessageRole
 
 
 def is_chat_model(llm: BaseLanguageModel) -> bool:
diff --git a/llama_index/llms/litellm.py b/llama_index/llms/litellm.py
index e1524c630676120776337f5edd1e3dd59615a724..4ddfea7386d030c8bf2bdd6005119f5e96786a50 100644
--- a/llama_index/llms/litellm.py
+++ b/llama_index/llms/litellm.py
@@ -3,6 +3,16 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Sequence
 from llama_index.bridge.pydantic import Field
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_TEMPERATURE
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+)
 from llama_index.llms.base import llm_chat_callback, llm_completion_callback
 from llama_index.llms.generic_utils import (
     achat_to_completion_decorator,
@@ -24,16 +34,6 @@ from llama_index.llms.litellm_utils import (
     validate_litellm_api_key,
 )
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-    CompletionResponseGen,
-    LLMMetadata,
-)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 DEFAULT_LITELLM_MODEL = "gpt-3.5-turbo"
diff --git a/llama_index/llms/litellm_utils.py b/llama_index/llms/litellm_utils.py
index 2af40dab67e42e501a5f6aacb42df85f14fc0f79..ab4cefe49038fee6cc2a340e0900e183475dd25b 100644
--- a/llama_index/llms/litellm_utils.py
+++ b/llama_index/llms/litellm_utils.py
@@ -11,7 +11,7 @@ from tenacity import (
 )
 
 from llama_index.bridge.pydantic import BaseModel
-from llama_index.llms.types import ChatMessage
+from llama_index.core.llms.types import ChatMessage
 
 MISSING_API_KEY_ERROR_MESSAGE = """No API key found for LLM.
 E.g. to use openai Please set the OPENAI_API_KEY environment variable or \
diff --git a/llama_index/llms/llama_api.py b/llama_index/llms/llama_api.py
index 9f7e07e13731553ca1a97d4a0c2b39a2cfc36d04..1364a4a79713ab53bc87547db75aca21caf51a0a 100644
--- a/llama_index/llms/llama_api.py
+++ b/llama_index/llms/llama_api.py
@@ -3,14 +3,7 @@ from typing import Any, Callable, Dict, Optional, Sequence
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_NUM_OUTPUTS
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.generic_utils import chat_to_completion_decorator
-from llama_index.llms.openai_utils import (
-    from_openai_message_dict,
-    to_openai_message_dicts,
-)
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseGen,
@@ -18,6 +11,13 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.custom import CustomLLM
+from llama_index.llms.generic_utils import chat_to_completion_decorator
+from llama_index.llms.openai_utils import (
+    from_openai_message_dict,
+    to_openai_message_dicts,
+)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 
diff --git a/llama_index/llms/llama_cpp.py b/llama_index/llms/llama_cpp.py
index 7ab0bd1cda5fec8574ed8f6f105ed02f4463b67f..124554c92a4701b1401781577cf32fb0e02e4230 100644
--- a/llama_index/llms/llama_cpp.py
+++ b/llama_index/llms/llama_cpp.py
@@ -11,13 +11,7 @@ from llama_index.constants import (
     DEFAULT_NUM_OUTPUTS,
     DEFAULT_TEMPERATURE,
 )
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.generic_utils import (
-    completion_response_to_chat_response,
-    stream_completion_response_to_chat_response,
-)
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseGen,
@@ -25,6 +19,12 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.custom import CustomLLM
+from llama_index.llms.generic_utils import (
+    completion_response_to_chat_response,
+    stream_completion_response_to_chat_response,
+)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 from llama_index.utils import get_cache_dir
 
diff --git a/llama_index/llms/llama_utils.py b/llama_index/llms/llama_utils.py
index 2ee0e950b7510c509ccaccdd1b734a5c8c0c228f..642bd5b7c0e4a137ff909b7b43b1e559b815c4f4 100644
--- a/llama_index/llms/llama_utils.py
+++ b/llama_index/llms/llama_utils.py
@@ -1,6 +1,6 @@
 from typing import List, Optional, Sequence
 
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 
 BOS, EOS = "<s>", "</s>"
 B_INST, E_INST = "[INST]", "[/INST]"
diff --git a/llama_index/llms/llm.py b/llama_index/llms/llm.py
index afe5442a5a03bfe31aec7123630d2ec45d5d6f43..850d3340c3c4ef8cdc991c81f694a08b4db49c88 100644
--- a/llama_index/llms/llm.py
+++ b/llama_index/llms/llm.py
@@ -3,11 +3,7 @@ from typing import Any, List, Optional, Protocol, Sequence, runtime_checkable
 
 from llama_index.bridge.pydantic import BaseModel, Field, validator
 from llama_index.callbacks import CBEventType, EventPayload
-from llama_index.llms.base import BaseLLM
-from llama_index.llms.generic_utils import (
-    messages_to_prompt as generic_messages_to_prompt,
-)
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponseAsyncGen,
     ChatResponseGen,
@@ -15,6 +11,10 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     MessageRole,
 )
+from llama_index.llms.base import BaseLLM
+from llama_index.llms.generic_utils import (
+    messages_to_prompt as generic_messages_to_prompt,
+)
 from llama_index.prompts import BasePromptTemplate, PromptTemplate
 from llama_index.types import (
     BaseOutputParser,
diff --git a/llama_index/llms/localai.py b/llama_index/llms/localai.py
index 4da2dc03b8dd1e6cc94a3dc23c1e2a036251d237..15ca2a463e5a2e401f658ca4d68793ead329a58d 100644
--- a/llama_index/llms/localai.py
+++ b/llama_index/llms/localai.py
@@ -11,10 +11,10 @@ from typing import Any, Callable, Dict, Optional, Sequence
 
 from llama_index.bridge.pydantic import Field
 from llama_index.constants import DEFAULT_CONTEXT_WINDOW
+from llama_index.core.llms.types import ChatMessage, LLMMetadata
 from llama_index.llms.openai import OpenAI
 from llama_index.llms.openai_like import OpenAILike
 from llama_index.llms.openai_utils import is_function_calling_model
-from llama_index.llms.types import ChatMessage, LLMMetadata
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 # Use these as kwargs for OpenAILike to connect to LocalAIs
diff --git a/llama_index/llms/mistral.py b/llama_index/llms/mistral.py
index e72f22fc488a0252f989d32486eef32468cac2fb..e4615253763d4fb4e4c7db1b44c9b2e549d9b940 100644
--- a/llama_index/llms/mistral.py
+++ b/llama_index/llms/mistral.py
@@ -3,6 +3,19 @@ from typing import Any, Callable, Dict, Optional, Sequence
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_TEMPERATURE
+
+# from mistralai.models.chat_completion import ChatMessage
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
 from llama_index.llms.base import (
     llm_chat_callback,
     llm_completion_callback,
@@ -18,19 +31,6 @@ from llama_index.llms.llm import LLM
 from llama_index.llms.mistralai_utils import (
     mistralai_modelname_to_contextsize,
 )
-
-# from mistralai.models.chat_completion import ChatMessage
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-    CompletionResponseGen,
-    LLMMetadata,
-    MessageRole,
-)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 DEFAULT_MISTRALAI_MODEL = "mistral-tiny"
diff --git a/llama_index/llms/mock.py b/llama_index/llms/mock.py
index 9e3cf32e2d2536bde51024c44f6148879ab8fbb3..0cce089a8a6614496a08e9ab548cfe56f4b81ad5 100644
--- a/llama_index/llms/mock.py
+++ b/llama_index/llms/mock.py
@@ -1,14 +1,14 @@
 from typing import Any, Callable, Optional, Sequence
 
 from llama_index.callbacks import CallbackManager
-from llama_index.llms.base import llm_completion_callback
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     CompletionResponse,
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_completion_callback
+from llama_index.llms.custom import CustomLLM
 from llama_index.types import PydanticProgramMode
 
 
diff --git a/llama_index/llms/monsterapi.py b/llama_index/llms/monsterapi.py
index 0e21207cb05d2ccf672526979d7ad7d95fe30f6e..aaa1090e57bc1cca2ec685331f8c423b275d2220 100644
--- a/llama_index/llms/monsterapi.py
+++ b/llama_index/llms/monsterapi.py
@@ -3,15 +3,15 @@ from typing import Any, Callable, Dict, Optional, Sequence
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     CompletionResponse,
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.custom import CustomLLM
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 DEFAULT_MONSTER_TEMP = 0.75
diff --git a/llama_index/llms/ollama.py b/llama_index/llms/ollama.py
index a2b3f9773ce092ce068d3472c0a43b3288009334..1801c49b46aae75bcc5a2539ab41b7d81af67be1 100644
--- a/llama_index/llms/ollama.py
+++ b/llama_index/llms/ollama.py
@@ -6,9 +6,7 @@ from httpx import Timeout
 
 from llama_index.bridge.pydantic import Field
 from llama_index.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseGen,
@@ -17,6 +15,8 @@ from llama_index.llms.types import (
     LLMMetadata,
     MessageRole,
 )
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.custom import CustomLLM
 
 DEFAULT_REQUEST_TIMEOUT = 30.0
 
diff --git a/llama_index/llms/openai.py b/llama_index/llms/openai.py
index 5a7ff6946b841ead9b3a9064a6fd1ef2ed0aa776..d30af239b82edc014c4ff074b0570c30e8273141 100644
--- a/llama_index/llms/openai.py
+++ b/llama_index/llms/openai.py
@@ -26,6 +26,17 @@ from llama_index.callbacks import CallbackManager
 from llama_index.constants import (
     DEFAULT_TEMPERATURE,
 )
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
 from llama_index.llms.base import (
     llm_chat_callback,
     llm_completion_callback,
@@ -49,17 +60,6 @@ from llama_index.llms.openai_utils import (
     resolve_openai_credentials,
     to_openai_message_dicts,
 )
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-    CompletionResponseGen,
-    LLMMetadata,
-    MessageRole,
-)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 DEFAULT_OPENAI_MODEL = "gpt-3.5-turbo"
diff --git a/llama_index/llms/openai_like.py b/llama_index/llms/openai_like.py
index ced6bda325cf1c12b0ef78ae8bb36e8bd088f621..09ef4241e6075c16725aa288e54317a929a64347 100644
--- a/llama_index/llms/openai_like.py
+++ b/llama_index/llms/openai_like.py
@@ -2,8 +2,8 @@ from typing import Optional, Union
 
 from llama_index.bridge.pydantic import Field
 from llama_index.constants import DEFAULT_CONTEXT_WINDOW
+from llama_index.core.llms.types import LLMMetadata
 from llama_index.llms.openai import OpenAI, Tokenizer
-from llama_index.llms.types import LLMMetadata
 
 
 class OpenAILike(OpenAI):
diff --git a/llama_index/llms/openai_utils.py b/llama_index/llms/openai_utils.py
index 099e048cbd1c5c9d6a7032263cbdd68845b22af4..830425c6af3b482e1406700a760e2c216ce94693 100644
--- a/llama_index/llms/openai_utils.py
+++ b/llama_index/llms/openai_utils.py
@@ -20,8 +20,8 @@ from tenacity import (
 from tenacity.stop import stop_base
 
 from llama_index.bridge.pydantic import BaseModel
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.generic_utils import get_from_param_or_env
-from llama_index.llms.types import ChatMessage
 
 DEFAULT_OPENAI_API_TYPE = "open_ai"
 DEFAULT_OPENAI_API_BASE = "https://api.openai.com/v1"
diff --git a/llama_index/llms/openllm.py b/llama_index/llms/openllm.py
index fde68121bfdff59925857532da8790355dedd8b5..dea6d54d059bdf2dc50a1eb21f40daa4a8561f89 100644
--- a/llama_index/llms/openllm.py
+++ b/llama_index/llms/openllm.py
@@ -13,6 +13,16 @@ from typing import (
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+)
 from llama_index.llms.base import (
     llm_chat_callback,
     llm_completion_callback,
@@ -24,16 +34,6 @@ from llama_index.llms.generic_utils import (
     messages_to_prompt as generic_messages_to_prompt,
 )
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseAsyncGen,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseAsyncGen,
-    CompletionResponseGen,
-    LLMMetadata,
-)
 from llama_index.types import PydanticProgramMode
 
 logger = logging.getLogger(__name__)
diff --git a/llama_index/llms/openrouter.py b/llama_index/llms/openrouter.py
index b8ff7024a2d0ff5b1809f65e7ed114997beed6ca..77ac299a1472ee03170d9bb6d6b07ed952a34a28 100644
--- a/llama_index/llms/openrouter.py
+++ b/llama_index/llms/openrouter.py
@@ -6,9 +6,9 @@ from llama_index.constants import (
     DEFAULT_NUM_OUTPUTS,
     DEFAULT_TEMPERATURE,
 )
+from llama_index.core.llms.types import LLMMetadata
 from llama_index.llms.generic_utils import get_from_param_or_env
 from llama_index.llms.openai_like import OpenAILike
-from llama_index.llms.types import LLMMetadata
 
 DEFAULT_API_BASE = "https://openrouter.ai/api/v1"
 DEFAULT_MODEL = "gryphe/mythomax-l2-13b"
diff --git a/llama_index/llms/palm.py b/llama_index/llms/palm.py
index 30b49b6e3248acb53142da4e37642c15f1a164d0..1e0200001eca23d0c21df58dde2160c55f83fb11 100644
--- a/llama_index/llms/palm.py
+++ b/llama_index/llms/palm.py
@@ -5,14 +5,14 @@ from typing import Any, Callable, Optional, Sequence
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_NUM_OUTPUTS
-from llama_index.llms.base import llm_completion_callback
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     CompletionResponse,
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_completion_callback
+from llama_index.llms.custom import CustomLLM
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 DEFAULT_PALM_MODEL = "models/text-bison-001"
diff --git a/llama_index/llms/perplexity.py b/llama_index/llms/perplexity.py
index dd36e6bb22bb5ea8e49f4fdf7f01f85cb5b956ff..005e010ba39e6ae57960f6e0a9a14e8686647762 100644
--- a/llama_index/llms/perplexity.py
+++ b/llama_index/llms/perplexity.py
@@ -6,9 +6,7 @@ import requests
 
 from llama_index.bridge.pydantic import Field
 from llama_index.callbacks import CallbackManager
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -18,6 +16,8 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.llm import LLM
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 
diff --git a/llama_index/llms/portkey.py b/llama_index/llms/portkey.py
index 48c92ca63860e1964d942d9882a1796ed6066779..1c1f1ba234864278e0cbf875b83b5223cd54edea 100644
--- a/llama_index/llms/portkey.py
+++ b/llama_index/llms/portkey.py
@@ -4,6 +4,14 @@ Portkey integration with Llama_index for enhanced monitoring.
 from typing import TYPE_CHECKING, Any, Callable, List, Optional, Sequence, Union, cast
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseGen,
+    LLMMetadata,
+)
 from llama_index.llms.base import llm_chat_callback, llm_completion_callback
 from llama_index.llms.custom import CustomLLM
 from llama_index.llms.generic_utils import (
@@ -18,14 +26,6 @@ from llama_index.llms.portkey_utils import (
     get_llm,
     is_chat_model,
 )
-from llama_index.llms.types import (
-    ChatMessage,
-    ChatResponse,
-    ChatResponseGen,
-    CompletionResponse,
-    CompletionResponseGen,
-    LLMMetadata,
-)
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 if TYPE_CHECKING:
diff --git a/llama_index/llms/portkey_utils.py b/llama_index/llms/portkey_utils.py
index e23e6b5ee5455c58cb8d092c7faa57a74a0e143f..e2da09c1025dd8ef2e2bf56c726b2619d71430d4 100644
--- a/llama_index/llms/portkey_utils.py
+++ b/llama_index/llms/portkey_utils.py
@@ -6,6 +6,7 @@ the functionality and usability of the Portkey class
 """
 from typing import TYPE_CHECKING, List
 
+from llama_index.core.llms.types import LLMMetadata
 from llama_index.llms.anthropic import Anthropic
 from llama_index.llms.anthropic_utils import CLAUDE_MODELS
 from llama_index.llms.openai import OpenAI
@@ -16,7 +17,6 @@ from llama_index.llms.openai_utils import (
     GPT4_MODELS,
     TURBO_MODELS,
 )
-from llama_index.llms.types import LLMMetadata
 
 if TYPE_CHECKING:
     from portkey import (
diff --git a/llama_index/llms/predibase.py b/llama_index/llms/predibase.py
index 38b86216f1016d8683c34f793da3896967810542..cca2997a31fb537a41d55e90d9dca8a48d3ac1cb 100644
--- a/llama_index/llms/predibase.py
+++ b/llama_index/llms/predibase.py
@@ -8,14 +8,14 @@ from llama_index.constants import (
     DEFAULT_NUM_OUTPUTS,
     DEFAULT_TEMPERATURE,
 )
-from llama_index.llms.base import llm_completion_callback
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     CompletionResponse,
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_completion_callback
+from llama_index.llms.custom import CustomLLM
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 
diff --git a/llama_index/llms/replicate.py b/llama_index/llms/replicate.py
index 16c8adae63b45f6001ff1cef01c29c3652d8f861..bfbd95eaba1fecd29e520c4a241033ac1020f4f6 100644
--- a/llama_index/llms/replicate.py
+++ b/llama_index/llms/replicate.py
@@ -2,13 +2,7 @@ from typing import Any, Dict, Sequence
 
 from llama_index.bridge.pydantic import Field
 from llama_index.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.generic_utils import (
-    completion_response_to_chat_response,
-    stream_completion_response_to_chat_response,
-)
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseGen,
@@ -16,6 +10,12 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.custom import CustomLLM
+from llama_index.llms.generic_utils import (
+    completion_response_to_chat_response,
+    stream_completion_response_to_chat_response,
+)
 
 DEFAULT_REPLICATE_TEMP = 0.75
 
diff --git a/llama_index/llms/rungpt.py b/llama_index/llms/rungpt.py
index 8351635325802089482f6b4744d628b9146df4f0..e0296ac1f825e8875f17fcc6c8d49a3d5118968a 100644
--- a/llama_index/llms/rungpt.py
+++ b/llama_index/llms/rungpt.py
@@ -4,9 +4,7 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 from llama_index.bridge.pydantic import Field
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -17,6 +15,8 @@ from llama_index.llms.types import (
     LLMMetadata,
     MessageRole,
 )
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.llm import LLM
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
 DEFAULT_RUNGPT_MODEL = "rungpt"
diff --git a/llama_index/llms/types.py b/llama_index/llms/types.py
index 9db785861d0a20d2b702d9aabe24c6bb07202d7b..ebc949983eb299a0c1e99e51a26a4720814a46a3 100644
--- a/llama_index/llms/types.py
+++ b/llama_index/llms/types.py
@@ -1,110 +1,29 @@
-from enum import Enum
-from typing import Any, AsyncGenerator, Generator, Optional
-
-from llama_index.bridge.pydantic import BaseModel, Field
-from llama_index.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
-
-
-class MessageRole(str, Enum):
-    """Message role."""
-
-    SYSTEM = "system"
-    USER = "user"
-    ASSISTANT = "assistant"
-    FUNCTION = "function"
-    TOOL = "tool"
-
-
-# ===== Generic Model Input - Chat =====
-class ChatMessage(BaseModel):
-    """Chat message."""
-
-    role: MessageRole = MessageRole.USER
-    content: Optional[Any] = ""
-    additional_kwargs: dict = Field(default_factory=dict)
-
-    def __str__(self) -> str:
-        return f"{self.role.value}: {self.content}"
-
-
-# ===== Generic Model Output - Chat =====
-class ChatResponse(BaseModel):
-    """Chat response."""
-
-    message: ChatMessage
-    raw: Optional[dict] = None
-    delta: Optional[str] = None
-    additional_kwargs: dict = Field(default_factory=dict)
-
-    def __str__(self) -> str:
-        return str(self.message)
-
-
-ChatResponseGen = Generator[ChatResponse, None, None]
-ChatResponseAsyncGen = AsyncGenerator[ChatResponse, None]
-
-
-# ===== Generic Model Output - Completion =====
-class CompletionResponse(BaseModel):
-    """
-    Completion response.
-
-    Fields:
-        text: Text content of the response if not streaming, or if streaming,
-            the current extent of streamed text.
-        additional_kwargs: Additional information on the response(i.e. token
-            counts, function calling information).
-        raw: Optional raw JSON that was parsed to populate text, if relevant.
-        delta: New text that just streamed in (only relevant when streaming).
-    """
-
-    text: str
-    additional_kwargs: dict = Field(default_factory=dict)
-    raw: Optional[dict] = None
-    delta: Optional[str] = None
-
-    def __str__(self) -> str:
-        return self.text
-
-
-CompletionResponseGen = Generator[CompletionResponse, None, None]
-CompletionResponseAsyncGen = AsyncGenerator[CompletionResponse, None]
-
-
-class LLMMetadata(BaseModel):
-    context_window: int = Field(
-        default=DEFAULT_CONTEXT_WINDOW,
-        description=(
-            "Total number of tokens the model can be input and output for one response."
-        ),
-    )
-    num_output: int = Field(
-        default=DEFAULT_NUM_OUTPUTS,
-        description="Number of tokens the model can output when generating a response.",
-    )
-    is_chat_model: bool = Field(
-        default=False,
-        description=(
-            "Set True if the model exposes a chat interface (i.e. can be passed a"
-            " sequence of messages, rather than text), like OpenAI's"
-            " /v1/chat/completions endpoint."
-        ),
-    )
-    is_function_calling_model: bool = Field(
-        default=False,
-        # SEE: https://openai.com/blog/function-calling-and-other-api-updates
-        description=(
-            "Set True if the model supports function calling messages, similar to"
-            " OpenAI's function calling API. For example, converting 'Email Anya to"
-            " see if she wants to get coffee next Friday' to a function call like"
-            " `send_email(to: string, body: string)`."
-        ),
-    )
-    model_name: str = Field(
-        default="unknown",
-        description=(
-            "The model's name used for logging, testing, and sanity checking. For some"
-            " models this can be automatically discerned. For other models, like"
-            " locally loaded models, this must be manually specified."
-        ),
-    )
+"""LLM Types.
+
+Maintain this file for backwards compat.
+
+"""
+
+from llama_index.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
+
+__all__ = [
+    "ChatMessage",
+    "ChatResponse",
+    "ChatResponseAsyncGen",
+    "ChatResponseGen",
+    "CompletionResponse",
+    "CompletionResponseAsyncGen",
+    "CompletionResponseGen",
+    "LLMMetadata",
+    "MessageRole",
+]
diff --git a/llama_index/llms/vertex.py b/llama_index/llms/vertex.py
index f878381bbe76b62010ef7c2716608548a6cfc77f..9abae84e7b52aed34e40ec607e562c163c2f46c2 100644
--- a/llama_index/llms/vertex.py
+++ b/llama_index/llms/vertex.py
@@ -2,12 +2,7 @@ from typing import Any, Callable, Dict, Optional, Sequence
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.llms.base import (
-    llm_chat_callback,
-    llm_completion_callback,
-)
-from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -18,6 +13,11 @@ from llama_index.llms.types import (
     LLMMetadata,
     MessageRole,
 )
+from llama_index.llms.base import (
+    llm_chat_callback,
+    llm_completion_callback,
+)
+from llama_index.llms.llm import LLM
 from llama_index.llms.vertex_gemini_utils import is_gemini_model
 from llama_index.llms.vertex_utils import (
     CHAT_MODELS,
diff --git a/llama_index/llms/vertex_utils.py b/llama_index/llms/vertex_utils.py
index e25d1a7cd0d21823f911d17d7700b148a1c6374d..0ec14f1c33137a1af1ae55283759ff22c4ca1082 100644
--- a/llama_index/llms/vertex_utils.py
+++ b/llama_index/llms/vertex_utils.py
@@ -12,7 +12,7 @@ from tenacity import (
     wait_exponential,
 )
 
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 
 CHAT_MODELS = ["chat-bison", "chat-bison-32k", "chat-bison@001"]
 TEXT_MODELS = ["text-bison", "text-bison-32k", "text-bison@001"]
diff --git a/llama_index/llms/vllm.py b/llama_index/llms/vllm.py
index e52870e93a95a77e125c7c59f70ba5928f966c40..25b38c1970490aac37431449f523884f7eeed437 100644
--- a/llama_index/llms/vllm.py
+++ b/llama_index/llms/vllm.py
@@ -3,16 +3,7 @@ from typing import Any, Callable, Dict, List, Optional, Sequence
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.generic_utils import (
-    completion_response_to_chat_response,
-    stream_completion_response_to_chat_response,
-)
-from llama_index.llms.generic_utils import (
-    messages_to_prompt as generic_messages_to_prompt,
-)
-from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -22,6 +13,15 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.generic_utils import (
+    completion_response_to_chat_response,
+    stream_completion_response_to_chat_response,
+)
+from llama_index.llms.generic_utils import (
+    messages_to_prompt as generic_messages_to_prompt,
+)
+from llama_index.llms.llm import LLM
 from llama_index.llms.vllm_utils import get_response, post_http_request
 from llama_index.types import BaseOutputParser, PydanticProgramMode
 
diff --git a/llama_index/llms/watsonx.py b/llama_index/llms/watsonx.py
index 15c69392c06af1205133c274276b1e25956ca56f..765cf0f5a85a973931e616c1b1e126b0fd2f688a 100644
--- a/llama_index/llms/watsonx.py
+++ b/llama_index/llms/watsonx.py
@@ -2,13 +2,7 @@ from typing import Any, Callable, Dict, Optional, Sequence
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.llms.generic_utils import (
-    completion_to_chat_decorator,
-    stream_completion_to_chat_decorator,
-)
-from llama_index.llms.llm import LLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -18,6 +12,12 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.llms.generic_utils import (
+    completion_to_chat_decorator,
+    stream_completion_to_chat_decorator,
+)
+from llama_index.llms.llm import LLM
 from llama_index.llms.watsonx_utils import (
     WATSONX_MODELS,
     get_from_param_or_env_without_error,
diff --git a/llama_index/llms/xinference.py b/llama_index/llms/xinference.py
index 62c02e90f4e3564a4825d2ba9e0a9a2cc01f7171..f4b970bcff60ad7d18c1e10b744c7b90350ca378 100644
--- a/llama_index/llms/xinference.py
+++ b/llama_index/llms/xinference.py
@@ -3,12 +3,7 @@ from typing import Any, Callable, Dict, Optional, Sequence, Tuple
 
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
-from llama_index.llms.base import (
-    llm_chat_callback,
-    llm_completion_callback,
-)
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseGen,
@@ -17,6 +12,11 @@ from llama_index.llms.types import (
     LLMMetadata,
     MessageRole,
 )
+from llama_index.llms.base import (
+    llm_chat_callback,
+    llm_completion_callback,
+)
+from llama_index.llms.custom import CustomLLM
 from llama_index.llms.xinference_utils import (
     xinference_message_to_history,
     xinference_modelname_to_contextsize,
diff --git a/llama_index/llms/xinference_utils.py b/llama_index/llms/xinference_utils.py
index bc1be05157a563848cbf6c34924685a18e27a4b5..224df573fd640a97c13a3f3bd3d77a7774c02c1e 100644
--- a/llama_index/llms/xinference_utils.py
+++ b/llama_index/llms/xinference_utils.py
@@ -2,7 +2,7 @@ from typing import Optional
 
 from typing_extensions import NotRequired, TypedDict
 
-from llama_index.llms.types import ChatMessage
+from llama_index.core.llms.types import ChatMessage
 
 XINFERENCE_MODEL_SIZES = {
     "baichuan": 2048,
diff --git a/llama_index/memory/chat_memory_buffer.py b/llama_index/memory/chat_memory_buffer.py
index a8fcb64cdb9d3c0710129f6add489a9754f6f272..5aa96189cc6a76277553be178ad55642e2d897e6 100644
--- a/llama_index/memory/chat_memory_buffer.py
+++ b/llama_index/memory/chat_memory_buffer.py
@@ -2,6 +2,7 @@ import json
 from typing import Any, Callable, Dict, List, Optional
 
 from llama_index.bridge.pydantic import Field, root_validator
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.llms.llm import LLM
 from llama_index.llms.types import ChatMessage, MessageRole
 from llama_index.memory.types import DEFAULT_CHAT_STORE_KEY, BaseMemory
diff --git a/llama_index/memory/types.py b/llama_index/memory/types.py
index 42ea49cb673ebf8c6d72dcaac51d8fee46eb7d61..a84a18858e45214291bed5e21b7770154612c2cd 100644
--- a/llama_index/memory/types.py
+++ b/llama_index/memory/types.py
@@ -1,8 +1,8 @@
 from abc import abstractmethod
 from typing import Any, List, Optional
 
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.llm import LLM
-from llama_index.llms.types import ChatMessage
 from llama_index.schema import BaseComponent
 
 DEFAULT_CHAT_STORE_KEY = "chat_history"
diff --git a/llama_index/multi_modal_llms/base.py b/llama_index/multi_modal_llms/base.py
index fd6f16878c438ed60761ad7564fa1bae0b7eb2d9..068ae2e07221b22da23018975bfdfebf0b3771de 100644
--- a/llama_index/multi_modal_llms/base.py
+++ b/llama_index/multi_modal_llms/base.py
@@ -7,7 +7,7 @@ from llama_index.constants import (
     DEFAULT_NUM_INPUT_FILES,
     DEFAULT_NUM_OUTPUTS,
 )
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
diff --git a/llama_index/multi_modal_llms/gemini.py b/llama_index/multi_modal_llms/gemini.py
index 13935cad2036ef600c25a049b2087a5f8b535668..aa6920a3b602cfb77817282e1f6b72bb24f7a1c6 100644
--- a/llama_index/multi_modal_llms/gemini.py
+++ b/llama_index/multi_modal_llms/gemini.py
@@ -6,13 +6,7 @@ from typing import Any, Dict, Optional, Sequence
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_NUM_OUTPUTS, DEFAULT_TEMPERATURE
-from llama_index.llms.gemini_utils import (
-    ROLES_FROM_GEMINI,
-    chat_from_gemini_response,
-    chat_message_to_gemini,
-    completion_from_gemini_response,
-)
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -21,6 +15,12 @@ from llama_index.llms.types import (
     CompletionResponseAsyncGen,
     CompletionResponseGen,
 )
+from llama_index.llms.gemini_utils import (
+    ROLES_FROM_GEMINI,
+    chat_from_gemini_response,
+    chat_message_to_gemini,
+    completion_from_gemini_response,
+)
 from llama_index.multi_modal_llms import (
     MultiModalLLM,
     MultiModalLLMMetadata,
diff --git a/llama_index/multi_modal_llms/openai.py b/llama_index/multi_modal_llms/openai.py
index e92c8a625a72f7f2345e2e1be8ceb5d3c4000a62..2d37fc1194a84cec9c287fdba695c26f948ce893 100644
--- a/llama_index/multi_modal_llms/openai.py
+++ b/llama_index/multi_modal_llms/openai.py
@@ -16,15 +16,7 @@ from llama_index.constants import (
     DEFAULT_NUM_OUTPUTS,
     DEFAULT_TEMPERATURE,
 )
-from llama_index.llms.generic_utils import (
-    messages_to_prompt as generic_messages_to_prompt,
-)
-from llama_index.llms.openai_utils import (
-    from_openai_message,
-    resolve_openai_credentials,
-    to_openai_message_dicts,
-)
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -34,6 +26,14 @@ from llama_index.llms.types import (
     CompletionResponseGen,
     MessageRole,
 )
+from llama_index.llms.generic_utils import (
+    messages_to_prompt as generic_messages_to_prompt,
+)
+from llama_index.llms.openai_utils import (
+    from_openai_message,
+    resolve_openai_credentials,
+    to_openai_message_dicts,
+)
 from llama_index.multi_modal_llms import (
     MultiModalLLM,
     MultiModalLLMMetadata,
diff --git a/llama_index/multi_modal_llms/replicate_multi_modal.py b/llama_index/multi_modal_llms/replicate_multi_modal.py
index b0ae63ca6aa2e399e984ee9ee263f709d5baca16..3cf2a33163f6c97488decfd61e5ab493bc3dd735 100644
--- a/llama_index/multi_modal_llms/replicate_multi_modal.py
+++ b/llama_index/multi_modal_llms/replicate_multi_modal.py
@@ -4,10 +4,7 @@ from typing import Any, Callable, Dict, Optional, Sequence
 from llama_index.bridge.pydantic import Field, PrivateAttr
 from llama_index.callbacks import CallbackManager
 from llama_index.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
-from llama_index.llms.generic_utils import (
-    messages_to_prompt as generic_messages_to_prompt,
-)
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -16,6 +13,9 @@ from llama_index.llms.types import (
     CompletionResponseAsyncGen,
     CompletionResponseGen,
 )
+from llama_index.llms.generic_utils import (
+    messages_to_prompt as generic_messages_to_prompt,
+)
 from llama_index.multi_modal_llms import (
     MultiModalLLM,
     MultiModalLLMMetadata,
diff --git a/llama_index/node_parser/relational/base_element.py b/llama_index/node_parser/relational/base_element.py
index 85ac34c6725b26a907a9b3715b0314a46781fc98..9758f8eda6f24786d041aab246574bcda4b1771e 100644
--- a/llama_index/node_parser/relational/base_element.py
+++ b/llama_index/node_parser/relational/base_element.py
@@ -6,10 +6,10 @@ from tqdm import tqdm
 
 from llama_index.bridge.pydantic import BaseModel, Field, ValidationError
 from llama_index.callbacks.base import CallbackManager
+from llama_index.core.response.schema import PydanticResponse
 from llama_index.llms.llm import LLM
 from llama_index.llms.openai import OpenAI
 from llama_index.node_parser.interface import NodeParser
-from llama_index.response.schema import PydanticResponse
 from llama_index.schema import BaseNode, Document, IndexNode, TextNode
 from llama_index.utils import get_tqdm_iterable
 
diff --git a/llama_index/objects/base.py b/llama_index/objects/base.py
index 210859b766373a85de89968cc3a60851b6708dc9..e6aeb7678d84fc8315a2a3602f720e693fa81dbb 100644
--- a/llama_index/objects/base.py
+++ b/llama_index/objects/base.py
@@ -4,7 +4,7 @@ import pickle
 import warnings
 from typing import Any, Generic, List, Optional, Sequence, Type, TypeVar
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.base import BaseIndex
 from llama_index.indices.vector_store.base import VectorStoreIndex
 from llama_index.objects.base_node_mapping import (
diff --git a/llama_index/prompts/__init__.py b/llama_index/prompts/__init__.py
index 40955a80679572f8d7b2c841e3477251e4028482..9f9ec2b3b281ca650b122e486a7615f9399139c3 100644
--- a/llama_index/prompts/__init__.py
+++ b/llama_index/prompts/__init__.py
@@ -1,6 +1,6 @@
 """Prompt class."""
 
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.prompts.base import (
     BasePromptTemplate,
     ChatPromptTemplate,
diff --git a/llama_index/prompts/base.py b/llama_index/prompts/base.py
index 18f0532b25921861b1e07dde77881a6dcf8d69ee..4b3a717f55726a5dadc1e5647519ace78e0cd436 100644
--- a/llama_index/prompts/base.py
+++ b/llama_index/prompts/base.py
@@ -13,6 +13,7 @@ if TYPE_CHECKING:
         ConditionalPromptSelector as LangchainSelector,
     )
 from llama_index.bridge.pydantic import BaseModel
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.base import BaseLLM
 from llama_index.llms.generic_utils import (
     messages_to_prompt as default_messages_to_prompt,
@@ -20,7 +21,6 @@ from llama_index.llms.generic_utils import (
 from llama_index.llms.generic_utils import (
     prompt_to_messages,
 )
-from llama_index.llms.types import ChatMessage
 from llama_index.prompts.prompt_type import PromptType
 from llama_index.prompts.utils import get_template_vars
 from llama_index.types import BaseOutputParser
diff --git a/llama_index/prompts/chat_prompts.py b/llama_index/prompts/chat_prompts.py
index 3fb855103103e1d34d184d204c17154776d784ee..f83ac5584f00b551308f682f0d45fd0cabda7b94 100644
--- a/llama_index/prompts/chat_prompts.py
+++ b/llama_index/prompts/chat_prompts.py
@@ -1,6 +1,6 @@
 """Prompts for ChatGPT."""
 
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.prompts.base import ChatPromptTemplate
 
 # text qa prompt
diff --git a/llama_index/query_engine/__init__.py b/llama_index/query_engine/__init__.py
index 8aa6632eb1ca1777526b40c04bc92363cd867259..f3bdfd462afc19dfff1dd4d29194782b8b8dcb6c 100644
--- a/llama_index/query_engine/__init__.py
+++ b/llama_index/query_engine/__init__.py
@@ -1,4 +1,4 @@
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
 
 # SQL
 from llama_index.indices.struct_store.sql_query import (
diff --git a/llama_index/query_engine/citation_query_engine.py b/llama_index/query_engine/citation_query_engine.py
index 2268a866eb6639b89737ca58f594a2246d16390f..6c4aa6af92054c12d333750cc0b775cb23eddf0d 100644
--- a/llama_index/query_engine/citation_query_engine.py
+++ b/llama_index/query_engine/citation_query_engine.py
@@ -2,14 +2,15 @@ from typing import Any, List, Optional, Sequence
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
+from llama_index.core.response.schema import RESPONSE_TYPE
 from llama_index.indices.base import BaseGPTIndex
 from llama_index.node_parser import SentenceSplitter, TextSplitter
 from llama_index.postprocessor.types import BaseNodePostprocessor
 from llama_index.prompts import PromptTemplate
 from llama_index.prompts.base import BasePromptTemplate
 from llama_index.prompts.mixin import PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE
 from llama_index.response_synthesizers import (
     BaseSynthesizer,
     ResponseMode,
diff --git a/llama_index/query_engine/cogniswitch_query_engine.py b/llama_index/query_engine/cogniswitch_query_engine.py
index 072c0512f1f51682b06ba1a867d71e59a04cf2f6..c6886f275c3484571f7513db200e6a149911dd9c 100644
--- a/llama_index/query_engine/cogniswitch_query_engine.py
+++ b/llama_index/query_engine/cogniswitch_query_engine.py
@@ -2,8 +2,8 @@ from typing import Any, Dict
 
 import requests
 
-from llama_index.core import BaseQueryEngine
-from llama_index.response.schema import Response
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import Response
 from llama_index.schema import QueryBundle
 
 
diff --git a/llama_index/query_engine/custom.py b/llama_index/query_engine/custom.py
index bbee3e9b7b7bd1a74af05c0e259da58431cef068..7b534edbb774f6ef620b7053f033f08c299746e5 100644
--- a/llama_index/query_engine/custom.py
+++ b/llama_index/query_engine/custom.py
@@ -5,9 +5,9 @@ from typing import Union
 
 from llama_index.bridge.pydantic import BaseModel, Field
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE, Response
 from llama_index.prompts.mixin import PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE, Response
 from llama_index.schema import QueryBundle, QueryType
 
 STR_OR_RESPONSE_TYPE = Union[RESPONSE_TYPE, str]
diff --git a/llama_index/query_engine/flare/base.py b/llama_index/query_engine/flare/base.py
index c83473f8e2ff78c6a7e081ef3dd1004bfffe7f5c..89c19e82033db1be783815f92a48588b51c88610 100644
--- a/llama_index/query_engine/flare/base.py
+++ b/llama_index/query_engine/flare/base.py
@@ -7,7 +7,8 @@ Active Retrieval Augmented Generation.
 from typing import Any, Dict, Optional
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE, Response
 from llama_index.prompts.base import BasePromptTemplate, PromptTemplate
 from llama_index.prompts.mixin import PromptDictType, PromptMixinType
 from llama_index.query_engine.flare.answer_inserter import (
@@ -18,7 +19,6 @@ from llama_index.query_engine.flare.output_parser import (
     IsDoneOutputParser,
     QueryTaskOutputParser,
 )
-from llama_index.response.schema import RESPONSE_TYPE, Response
 from llama_index.schema import QueryBundle
 from llama_index.service_context import ServiceContext
 from llama_index.utils import print_text
diff --git a/llama_index/query_engine/graph_query_engine.py b/llama_index/query_engine/graph_query_engine.py
index 98b594724e1c121c5c19de28f569e4b3317430b6..b97ea0add71a1be26f3272287e7ed3bc7cdd364e 100644
--- a/llama_index/query_engine/graph_query_engine.py
+++ b/llama_index/query_engine/graph_query_engine.py
@@ -1,9 +1,9 @@
 from typing import Any, Dict, List, Optional, Tuple
 
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE
 from llama_index.indices.composability.graph import ComposableGraph
-from llama_index.response.schema import RESPONSE_TYPE
 from llama_index.schema import IndexNode, NodeWithScore, QueryBundle, TextNode
 
 
diff --git a/llama_index/query_engine/knowledge_graph_query_engine.py b/llama_index/query_engine/knowledge_graph_query_engine.py
index 0c156d97368b1bf0c28864e04af516ce97041200..cf0d002afc2f41cfea0687ac3b7a42380dc897dd 100644
--- a/llama_index/query_engine/knowledge_graph_query_engine.py
+++ b/llama_index/query_engine/knowledge_graph_query_engine.py
@@ -4,14 +4,14 @@ import logging
 from typing import Any, Dict, List, Optional, Sequence
 
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE
 from llama_index.graph_stores.registry import (
     GRAPH_STORE_CLASS_TO_GRAPH_STORE_TYPE,
     GraphStoreType,
 )
 from llama_index.prompts.base import BasePromptTemplate, PromptTemplate, PromptType
 from llama_index.prompts.mixin import PromptDictType, PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE
 from llama_index.response_synthesizers import BaseSynthesizer, get_response_synthesizer
 from llama_index.schema import NodeWithScore, QueryBundle, TextNode
 from llama_index.service_context import ServiceContext
diff --git a/llama_index/query_engine/multi_modal.py b/llama_index/query_engine/multi_modal.py
index 886b8acb17d44867dda876842c080c3265ce9bd7..a207f3e197944cd1103b0d361b7132aefe36bb99 100644
--- a/llama_index/query_engine/multi_modal.py
+++ b/llama_index/query_engine/multi_modal.py
@@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.callbacks.schema import CBEventType, EventPayload
+from llama_index.core.response.schema import RESPONSE_TYPE, Response
 from llama_index.indices.multi_modal import MultiModalVectorIndexRetriever
 from llama_index.indices.query.base import BaseQueryEngine
 from llama_index.indices.query.schema import QueryBundle, QueryType
@@ -11,7 +12,6 @@ from llama_index.postprocessor.types import BaseNodePostprocessor
 from llama_index.prompts import BasePromptTemplate
 from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT
 from llama_index.prompts.mixin import PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE, Response
 from llama_index.schema import ImageNode, NodeWithScore
 
 
diff --git a/llama_index/query_engine/multistep_query_engine.py b/llama_index/query_engine/multistep_query_engine.py
index fc875ce65ad6c7de1d95db699388a215b7b78df7..68e42820a5ae0cef0cf31977586973bdc4f6014b 100644
--- a/llama_index/query_engine/multistep_query_engine.py
+++ b/llama_index/query_engine/multistep_query_engine.py
@@ -1,10 +1,10 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple, cast
 
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE
 from llama_index.indices.query.query_transform.base import StepDecomposeQueryTransform
 from llama_index.prompts.mixin import PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE
 from llama_index.response_synthesizers import BaseSynthesizer, get_response_synthesizer
 from llama_index.schema import NodeWithScore, QueryBundle, TextNode
 
diff --git a/llama_index/query_engine/pandas_query_engine.py b/llama_index/query_engine/pandas_query_engine.py
index a6ebb95307f3b7323be781f4c750dbc5aaceff9e..24e4ab40df8a968b7e69452fc378488e42a30c38 100644
--- a/llama_index/query_engine/pandas_query_engine.py
+++ b/llama_index/query_engine/pandas_query_engine.py
@@ -13,13 +13,13 @@ from typing import Any, Callable, Optional
 import numpy as np
 import pandas as pd
 
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import Response
 from llama_index.exec_utils import safe_eval, safe_exec
 from llama_index.indices.struct_store.pandas import PandasIndex
 from llama_index.prompts import BasePromptTemplate
 from llama_index.prompts.default_prompts import DEFAULT_PANDAS_PROMPT
 from llama_index.prompts.mixin import PromptMixinType
-from llama_index.response.schema import Response
 from llama_index.schema import QueryBundle
 from llama_index.service_context import ServiceContext
 from llama_index.utils import print_text
diff --git a/llama_index/query_engine/retriever_query_engine.py b/llama_index/query_engine/retriever_query_engine.py
index 1fa0355e7e209c3bbaafb981d238908e1ae5f363..f3696b391f7c987d8755c05e732fb64e0daaf9de 100644
--- a/llama_index/query_engine/retriever_query_engine.py
+++ b/llama_index/query_engine/retriever_query_engine.py
@@ -3,11 +3,12 @@ from typing import Any, List, Optional, Sequence
 from llama_index.bridge.pydantic import BaseModel
 from llama_index.callbacks.base import CallbackManager
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
+from llama_index.core.response.schema import RESPONSE_TYPE
 from llama_index.postprocessor.types import BaseNodePostprocessor
 from llama_index.prompts import BasePromptTemplate
 from llama_index.prompts.mixin import PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE
 from llama_index.response_synthesizers import (
     BaseSynthesizer,
     ResponseMode,
diff --git a/llama_index/query_engine/retry_query_engine.py b/llama_index/query_engine/retry_query_engine.py
index 7a7b20fdb87827d12281e67087c4dcc95277bb9b..7cdad01f6ac897ea375d3e33af7b469b47a09a8c 100644
--- a/llama_index/query_engine/retry_query_engine.py
+++ b/llama_index/query_engine/retry_query_engine.py
@@ -2,14 +2,14 @@ import logging
 from typing import Optional
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE, Response
 from llama_index.evaluation.base import BaseEvaluator
 from llama_index.evaluation.guideline import GuidelineEvaluator
 from llama_index.indices.query.query_transform.feedback_transform import (
     FeedbackQueryTransformation,
 )
 from llama_index.prompts.mixin import PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE, Response
 from llama_index.schema import QueryBundle
 
 logger = logging.getLogger(__name__)
diff --git a/llama_index/query_engine/retry_source_query_engine.py b/llama_index/query_engine/retry_source_query_engine.py
index 13be39f1377702cae21d46dc6d36a274af37ba41..7ff9eba4a2f5b43bab188f3c59f2f6d1de5cb60c 100644
--- a/llama_index/query_engine/retry_source_query_engine.py
+++ b/llama_index/query_engine/retry_source_query_engine.py
@@ -2,12 +2,12 @@ import logging
 from typing import Optional
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE, Response
 from llama_index.evaluation import BaseEvaluator
 from llama_index.indices.list.base import SummaryIndex
 from llama_index.prompts.mixin import PromptMixinType
 from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
-from llama_index.response.schema import RESPONSE_TYPE, Response
 from llama_index.schema import Document, QueryBundle
 from llama_index.service_context import ServiceContext
 
diff --git a/llama_index/query_engine/router_query_engine.py b/llama_index/query_engine/router_query_engine.py
index 0e31b2a3f00ecdd11ea5acc8940ccedf10aed35e..a17daa79712aa61e6ffb2426a2750f93c4c3958b 100644
--- a/llama_index/query_engine/router_query_engine.py
+++ b/llama_index/query_engine/router_query_engine.py
@@ -5,18 +5,19 @@ from llama_index.async_utils import run_async_tasks
 from llama_index.bridge.pydantic import BaseModel
 from llama_index.callbacks.base import CallbackManager
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.core import BaseQueryEngine, BaseRetriever
-from llama_index.objects.base import ObjectRetriever
-from llama_index.prompts.default_prompt_selectors import (
-    DEFAULT_TREE_SUMMARIZE_PROMPT_SEL,
-)
-from llama_index.prompts.mixin import PromptMixinType
-from llama_index.response.schema import (
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
+from llama_index.core.response.schema import (
     RESPONSE_TYPE,
     PydanticResponse,
     Response,
     StreamingResponse,
 )
+from llama_index.objects.base import ObjectRetriever
+from llama_index.prompts.default_prompt_selectors import (
+    DEFAULT_TREE_SUMMARIZE_PROMPT_SEL,
+)
+from llama_index.prompts.mixin import PromptMixinType
 from llama_index.response_synthesizers import TreeSummarize
 from llama_index.schema import BaseNode, QueryBundle
 from llama_index.selectors.types import BaseSelector
diff --git a/llama_index/query_engine/sql_join_query_engine.py b/llama_index/query_engine/sql_join_query_engine.py
index faf5821fadb1e2e86060c70bef83c0d0b0fe9a43..98bb210c0333b9c75015f46565fbb51602603fd0 100644
--- a/llama_index/query_engine/sql_join_query_engine.py
+++ b/llama_index/query_engine/sql_join_query_engine.py
@@ -4,7 +4,8 @@ import logging
 from typing import Callable, Dict, Optional, Union
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE, Response
 from llama_index.indices.query.query_transform.base import BaseQueryTransform
 from llama_index.indices.struct_store.sql_query import (
     BaseSQLTableQueryEngine,
@@ -14,7 +15,6 @@ from llama_index.llm_predictor.base import LLMPredictorType
 from llama_index.llms.utils import resolve_llm
 from llama_index.prompts.base import BasePromptTemplate, PromptTemplate
 from llama_index.prompts.mixin import PromptDictType, PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE, Response
 from llama_index.schema import QueryBundle
 from llama_index.selectors.llm_selectors import LLMSingleSelector
 from llama_index.selectors.pydantic_selectors import PydanticSingleSelector
diff --git a/llama_index/query_engine/sub_question_query_engine.py b/llama_index/query_engine/sub_question_query_engine.py
index 6bf4efd8d44a64d1ded456b0f2db446491e9cd86..8272e10d36640f1988591f0bed178c4cbce7fe89 100644
--- a/llama_index/query_engine/sub_question_query_engine.py
+++ b/llama_index/query_engine/sub_question_query_engine.py
@@ -6,12 +6,12 @@ from llama_index.async_utils import run_async_tasks
 from llama_index.bridge.pydantic import BaseModel, Field
 from llama_index.callbacks.base import CallbackManager
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE
 from llama_index.prompts.mixin import PromptMixinType
 from llama_index.question_gen.llm_generators import LLMQuestionGenerator
 from llama_index.question_gen.openai_generator import OpenAIQuestionGenerator
 from llama_index.question_gen.types import BaseQuestionGenerator, SubQuestion
-from llama_index.response.schema import RESPONSE_TYPE
 from llama_index.response_synthesizers import BaseSynthesizer, get_response_synthesizer
 from llama_index.schema import NodeWithScore, QueryBundle, TextNode
 from llama_index.service_context import ServiceContext
diff --git a/llama_index/query_engine/transform_query_engine.py b/llama_index/query_engine/transform_query_engine.py
index 219d8ecf7e9b117ea23d61f4a1e2476cdd0e5d78..64f757419e0186e192d638cf564b63db5043efdd 100644
--- a/llama_index/query_engine/transform_query_engine.py
+++ b/llama_index/query_engine/transform_query_engine.py
@@ -1,10 +1,10 @@
 from typing import List, Optional, Sequence
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.response.schema import RESPONSE_TYPE
 from llama_index.indices.query.query_transform.base import BaseQueryTransform
 from llama_index.prompts.mixin import PromptMixinType
-from llama_index.response.schema import RESPONSE_TYPE
 from llama_index.schema import NodeWithScore, QueryBundle
 
 
diff --git a/llama_index/readers/make_com/wrapper.py b/llama_index/readers/make_com/wrapper.py
index 2cb79e6f3a67abcc17eb1dbc55651e5e84b9a344..4c9c9c18b7ce550aa02eba4b985c25916c528d88 100644
--- a/llama_index/readers/make_com/wrapper.py
+++ b/llama_index/readers/make_com/wrapper.py
@@ -8,8 +8,8 @@ from typing import Any, List, Optional
 
 import requests
 
+from llama_index.core.response.schema import Response
 from llama_index.readers.base import BaseReader
-from llama_index.response.schema import Response
 from llama_index.schema import Document, NodeWithScore, TextNode
 
 
diff --git a/llama_index/response/__init__.py b/llama_index/response/__init__.py
index 294a5bc8ec9d789837c3227ac21172c644059026..b99207e3d9d2a195420f5f4d332e56714e8c6b80 100644
--- a/llama_index/response/__init__.py
+++ b/llama_index/response/__init__.py
@@ -1,5 +1,5 @@
 """Init params."""
 
-from llama_index.response.schema import Response
+from llama_index.core.response.schema import Response
 
 __all__ = ["Response"]
diff --git a/llama_index/response/notebook_utils.py b/llama_index/response/notebook_utils.py
index b037c4a4973903bbc2298c6ef5c3c9185ffa4ea4..fc8b976407f9e0e0e195d86850e863331a01048f 100644
--- a/llama_index/response/notebook_utils.py
+++ b/llama_index/response/notebook_utils.py
@@ -8,8 +8,8 @@ import requests
 from IPython.display import Markdown, display
 from PIL import Image
 
+from llama_index.core.response.schema import Response
 from llama_index.img_utils import b64_2_img
-from llama_index.response.schema import Response
 from llama_index.schema import ImageNode, MetadataMode, NodeWithScore
 from llama_index.utils import truncate_text
 
diff --git a/llama_index/response/pprint_utils.py b/llama_index/response/pprint_utils.py
index 26a86c9f34077c1526c9bdaf2c17d4ec3307161e..1b047ad852311832f160aea8c7bb2fe6df4980a2 100644
--- a/llama_index/response/pprint_utils.py
+++ b/llama_index/response/pprint_utils.py
@@ -3,7 +3,7 @@ import textwrap
 from pprint import pprint
 from typing import Any, Dict
 
-from llama_index.response.schema import Response
+from llama_index.core.response.schema import Response
 from llama_index.schema import NodeWithScore
 from llama_index.utils import truncate_text
 
diff --git a/llama_index/response/schema.py b/llama_index/response/schema.py
index 1834b6ccf1f5c820fb076993937349605170cfeb..b9a6459b355d6042adf36bb7c231e066b6ac11c2 100644
--- a/llama_index/response/schema.py
+++ b/llama_index/response/schema.py
@@ -1,142 +1,14 @@
-"""Response schema."""
+"""Response schema.
 
-from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Union
+Maintain this file for backwards compat.
 
-from llama_index.bridge.pydantic import BaseModel
-from llama_index.schema import NodeWithScore
-from llama_index.types import TokenGen
-from llama_index.utils import truncate_text
+"""
 
+from llama_index.core.response.schema import (
+    RESPONSE_TYPE,
+    PydanticResponse,
+    Response,
+    StreamingResponse,
+)
 
-@dataclass
-class Response:
-    """Response object.
-
-    Returned if streaming=False.
-
-    Attributes:
-        response: The response text.
-
-    """
-
-    response: Optional[str]
-    source_nodes: List[NodeWithScore] = field(default_factory=list)
-    metadata: Optional[Dict[str, Any]] = None
-
-    def __str__(self) -> str:
-        """Convert to string representation."""
-        return self.response or "None"
-
-    def get_formatted_sources(self, length: int = 100) -> str:
-        """Get formatted sources text."""
-        texts = []
-        for source_node in self.source_nodes:
-            fmt_text_chunk = truncate_text(source_node.node.get_content(), length)
-            doc_id = source_node.node.node_id or "None"
-            source_text = f"> Source (Doc id: {doc_id}): {fmt_text_chunk}"
-            texts.append(source_text)
-        return "\n\n".join(texts)
-
-
-@dataclass
-class PydanticResponse:
-    """PydanticResponse object.
-
-    Returned if streaming=False.
-
-    Attributes:
-        response: The response text.
-
-    """
-
-    response: Optional[BaseModel]
-    source_nodes: List[NodeWithScore] = field(default_factory=list)
-    metadata: Optional[Dict[str, Any]] = None
-
-    def __str__(self) -> str:
-        """Convert to string representation."""
-        return self.response.json() if self.response else "None"
-
-    def __getattr__(self, name: str) -> Any:
-        """Get attribute, but prioritize the pydantic  response object."""
-        if self.response is not None and name in self.response.dict():
-            return getattr(self.response, name)
-        else:
-            return None
-
-    def get_formatted_sources(self, length: int = 100) -> str:
-        """Get formatted sources text."""
-        texts = []
-        for source_node in self.source_nodes:
-            fmt_text_chunk = truncate_text(source_node.node.get_content(), length)
-            doc_id = source_node.node.node_id or "None"
-            source_text = f"> Source (Doc id: {doc_id}): {fmt_text_chunk}"
-            texts.append(source_text)
-        return "\n\n".join(texts)
-
-    def get_response(self) -> Response:
-        """Get a standard response object."""
-        response_txt = self.response.json() if self.response else "None"
-        return Response(response_txt, self.source_nodes, self.metadata)
-
-
-@dataclass
-class StreamingResponse:
-    """StreamingResponse object.
-
-    Returned if streaming=True.
-
-    Attributes:
-        response_gen: The response generator.
-
-    """
-
-    response_gen: TokenGen
-    source_nodes: List[NodeWithScore] = field(default_factory=list)
-    metadata: Optional[Dict[str, Any]] = None
-    response_txt: Optional[str] = None
-
-    def __str__(self) -> str:
-        """Convert to string representation."""
-        if self.response_txt is None and self.response_gen is not None:
-            response_txt = ""
-            for text in self.response_gen:
-                response_txt += text
-            self.response_txt = response_txt
-        return self.response_txt or "None"
-
-    def get_response(self) -> Response:
-        """Get a standard response object."""
-        if self.response_txt is None and self.response_gen is not None:
-            response_txt = ""
-            for text in self.response_gen:
-                response_txt += text
-            self.response_txt = response_txt
-        return Response(self.response_txt, self.source_nodes, self.metadata)
-
-    def print_response_stream(self) -> None:
-        """Print the response stream."""
-        if self.response_txt is None and self.response_gen is not None:
-            response_txt = ""
-            for text in self.response_gen:
-                print(text, end="", flush=True)
-                response_txt += text
-            self.response_txt = response_txt
-        else:
-            print(self.response_txt)
-
-    def get_formatted_sources(self, length: int = 100, trim_text: int = True) -> str:
-        """Get formatted sources text."""
-        texts = []
-        for source_node in self.source_nodes:
-            fmt_text_chunk = source_node.node.get_content()
-            if trim_text:
-                fmt_text_chunk = truncate_text(fmt_text_chunk, length)
-            node_id = source_node.node.node_id or "None"
-            source_text = f"> Source (Node id: {node_id}): {fmt_text_chunk}"
-            texts.append(source_text)
-        return "\n\n".join(texts)
-
-
-RESPONSE_TYPE = Union[Response, StreamingResponse, PydanticResponse]
+__all__ = ["Response", "PydanticResponse", "StreamingResponse", "RESPONSE_TYPE"]
diff --git a/llama_index/response_synthesizers/base.py b/llama_index/response_synthesizers/base.py
index 9f77d4a5871b8013e4f5d35004f85e8b939a3e46..5790b331343f43f9bbf68082d54e4f53358afc89 100644
--- a/llama_index/response_synthesizers/base.py
+++ b/llama_index/response_synthesizers/base.py
@@ -13,13 +13,13 @@ from typing import Any, Dict, Generator, List, Optional, Sequence, Union
 
 from llama_index.bridge.pydantic import BaseModel
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.prompts.mixin import PromptMixin
-from llama_index.response.schema import (
+from llama_index.core.response.schema import (
     RESPONSE_TYPE,
     PydanticResponse,
     Response,
     StreamingResponse,
 )
+from llama_index.prompts.mixin import PromptMixin
 from llama_index.schema import BaseNode, MetadataMode, NodeWithScore, QueryBundle
 from llama_index.service_context import ServiceContext
 from llama_index.types import RESPONSE_TEXT_TYPE
diff --git a/llama_index/response_synthesizers/google/generativeai/base.py b/llama_index/response_synthesizers/google/generativeai/base.py
index e9daa9cbf39d46a42bae67a145d74f85f45f71e0..cbc1246cd3a99edb5aeb59728d6f543f42d218a3 100644
--- a/llama_index/response_synthesizers/google/generativeai/base.py
+++ b/llama_index/response_synthesizers/google/generativeai/base.py
@@ -11,9 +11,9 @@ from typing import TYPE_CHECKING, Any, List, Optional, Sequence, cast
 
 from llama_index.bridge.pydantic import BaseModel  # type: ignore
 from llama_index.callbacks.schema import CBEventType, EventPayload
+from llama_index.core.response.schema import Response
 from llama_index.indices.query.schema import QueryBundle
 from llama_index.prompts.mixin import PromptDictType
-from llama_index.response.schema import Response
 from llama_index.response_synthesizers.base import BaseSynthesizer, QueryTextType
 from llama_index.schema import MetadataMode, NodeWithScore, TextNode
 from llama_index.types import RESPONSE_TEXT_TYPE
diff --git a/llama_index/retrievers/__init__.py b/llama_index/retrievers/__init__.py
index 1e4c66d2ad6dba3c4d8e75be334143705a538406..1716799433f5f4d965c7f962276eb4582b548838 100644
--- a/llama_index/retrievers/__init__.py
+++ b/llama_index/retrievers/__init__.py
@@ -1,4 +1,5 @@
-from llama_index.core import BaseImageRetriever, BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
+from llama_index.core.image_retriever import BaseImageRetriever
 from llama_index.indices.empty.retrievers import EmptyIndexRetriever
 from llama_index.indices.keyword_table.retrievers import KeywordTableSimpleRetriever
 from llama_index.indices.knowledge_graph.retrievers import (
diff --git a/llama_index/retrievers/auto_merging_retriever.py b/llama_index/retrievers/auto_merging_retriever.py
index f27d4284c58e429297067aca2f31d0fba09f4e5e..4a1f0a60c85880809837a512f678cb810ba3d58e 100644
--- a/llama_index/retrievers/auto_merging_retriever.py
+++ b/llama_index/retrievers/auto_merging_retriever.py
@@ -5,7 +5,7 @@ from collections import defaultdict
 from typing import Dict, List, Optional, Tuple, cast
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.query.schema import QueryBundle
 from llama_index.indices.utils import truncate_text
 from llama_index.indices.vector_store.retrievers.retriever import VectorIndexRetriever
diff --git a/llama_index/retrievers/bm25_retriever.py b/llama_index/retrievers/bm25_retriever.py
index 61cc5be388e1cf45ca6e2d31a8c7fddc970c70e1..3604c93fcfe8feeff127cfefa6510e530f8e2e02 100644
--- a/llama_index/retrievers/bm25_retriever.py
+++ b/llama_index/retrievers/bm25_retriever.py
@@ -5,7 +5,7 @@ from nltk.stem import PorterStemmer
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.constants import DEFAULT_SIMILARITY_TOP_K
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.keyword_table.utils import simple_extract_keywords
 from llama_index.indices.vector_store.base import VectorStoreIndex
 from llama_index.schema import BaseNode, NodeWithScore, QueryBundle
diff --git a/llama_index/retrievers/pathway_retriever.py b/llama_index/retrievers/pathway_retriever.py
index fd4040f6030658d86837812405cb415b84a07b7d..e7b6e311a6adc42aa72e7f633a03dc112a1db885 100644
--- a/llama_index/retrievers/pathway_retriever.py
+++ b/llama_index/retrievers/pathway_retriever.py
@@ -5,7 +5,7 @@ from typing import Any, Callable, List, Optional, Tuple, Union
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.constants import DEFAULT_SIMILARITY_TOP_K
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.embeddings import BaseEmbedding
 from llama_index.indices.query.schema import QueryBundle
 from llama_index.ingestion.pipeline import run_transformations
diff --git a/llama_index/retrievers/recursive_retriever.py b/llama_index/retrievers/recursive_retriever.py
index bc5817b1f155496bff73e3526c491727c0b072e1..4ad3cd0609aa9fbb8b5806607fb14a2ec8ccb6c1 100644
--- a/llama_index/retrievers/recursive_retriever.py
+++ b/llama_index/retrievers/recursive_retriever.py
@@ -2,7 +2,8 @@ from typing import Dict, List, Optional, Tuple, Union
 
 from llama_index.callbacks.base import CallbackManager
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.core import BaseQueryEngine, BaseRetriever
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.schema import BaseNode, IndexNode, NodeWithScore, QueryBundle, TextNode
 from llama_index.utils import print_text
 
diff --git a/llama_index/retrievers/router_retriever.py b/llama_index/retrievers/router_retriever.py
index b1f964846d62bd8b703b188edac25a39f821af95..72740a88c1c9fe1a56da6d11c3bb69870cec0a89 100644
--- a/llama_index/retrievers/router_retriever.py
+++ b/llama_index/retrievers/router_retriever.py
@@ -5,7 +5,7 @@ import logging
 from typing import List, Optional, Sequence
 
 from llama_index.callbacks.schema import CBEventType, EventPayload
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.prompts.mixin import PromptMixinType
 from llama_index.schema import NodeWithScore, QueryBundle
 from llama_index.selectors.types import BaseSelector
diff --git a/llama_index/retrievers/transform_retriever.py b/llama_index/retrievers/transform_retriever.py
index f200f751005bd3f7c0eddf002ecf2df4d2e6ad4c..df8228aca978d41f674b8c611aa38c3a9d73522d 100644
--- a/llama_index/retrievers/transform_retriever.py
+++ b/llama_index/retrievers/transform_retriever.py
@@ -1,7 +1,7 @@
 from typing import List, Optional
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.query.query_transform.base import BaseQueryTransform
 from llama_index.prompts.mixin import PromptMixinType
 from llama_index.schema import NodeWithScore, QueryBundle
diff --git a/llama_index/retrievers/you_retriever.py b/llama_index/retrievers/you_retriever.py
index df042b6ce1a0798f210d5d960357f5f4dcd7065f..f29f2ab63e9407bcb801d9d7cbc2c7f5f0e42b6d 100644
--- a/llama_index/retrievers/you_retriever.py
+++ b/llama_index/retrievers/you_retriever.py
@@ -7,7 +7,7 @@ from typing import List, Optional
 import requests
 
 from llama_index.callbacks.base import CallbackManager
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.query.schema import QueryBundle
 from llama_index.schema import NodeWithScore, QueryBundle, TextNode
 
diff --git a/llama_index/schema.py b/llama_index/schema.py
index 0ce2e62a13f67ea2ec85a99c3e8e9e3107143e1f..cd6acd19fcf60a4d4e755d307b754a3fe022a99c 100644
--- a/llama_index/schema.py
+++ b/llama_index/schema.py
@@ -760,5 +760,9 @@ class QueryBundle(DataClassJsonMixin):
             return []
         return [self.image_path]
 
+    def __str__(self) -> str:
+        """Convert to string representation."""
+        return self.query_str
+
 
 QueryType = Union[str, QueryBundle]
diff --git a/llama_index/service_context.py b/llama_index/service_context.py
index c4378f24896dec9d13c94f0f4725e284c26ff1c8..13070115b737fa3695d2d0f7257a073c361f741d 100644
--- a/llama_index/service_context.py
+++ b/llama_index/service_context.py
@@ -1,12 +1,11 @@
 import logging
 from dataclasses import dataclass
-from typing import List, Optional
+from typing import Any, List, Optional, cast
 
 import llama_index
 from llama_index.bridge.pydantic import BaseModel
 from llama_index.callbacks.base import CallbackManager
-from llama_index.embeddings.base import BaseEmbedding
-from llama_index.embeddings.utils import EmbedType, resolve_embed_model
+from llama_index.core.embeddings.base import BaseEmbedding
 from llama_index.indices.prompt_helper import PromptHelper
 from llama_index.llm_predictor import LLMPredictor
 from llama_index.llm_predictor.base import BaseLLMPredictor, LLMMetadata
@@ -88,7 +87,7 @@ class ServiceContext:
         llm_predictor: Optional[BaseLLMPredictor] = None,
         llm: Optional[LLMType] = "default",
         prompt_helper: Optional[PromptHelper] = None,
-        embed_model: Optional[EmbedType] = "default",
+        embed_model: Optional[Any] = "default",
         node_parser: Optional[NodeParser] = None,
         text_splitter: Optional[TextSplitter] = None,
         transformations: Optional[List[TransformComponent]] = None,
@@ -132,6 +131,10 @@ class ServiceContext:
             chunk_size_limit (Optional[int]): renamed to chunk_size
 
         """
+        from llama_index.embeddings.utils import EmbedType, resolve_embed_model
+
+        embed_model = cast(EmbedType, embed_model)
+
         if chunk_size_limit is not None and chunk_size is None:
             logger.warning(
                 "chunk_size_limit is deprecated, please specify chunk_size instead"
@@ -227,7 +230,7 @@ class ServiceContext:
         llm_predictor: Optional[BaseLLMPredictor] = None,
         llm: Optional[LLMType] = "default",
         prompt_helper: Optional[PromptHelper] = None,
-        embed_model: Optional[EmbedType] = "default",
+        embed_model: Optional[Any] = "default",
         node_parser: Optional[NodeParser] = None,
         text_splitter: Optional[TextSplitter] = None,
         transformations: Optional[List[TransformComponent]] = None,
@@ -245,6 +248,10 @@ class ServiceContext:
         chunk_size_limit: Optional[int] = None,
     ) -> "ServiceContext":
         """Instantiate a new service context using a previous as the defaults."""
+        from llama_index.embeddings.utils import EmbedType, resolve_embed_model
+
+        embed_model = cast(EmbedType, embed_model)
+
         if chunk_size_limit is not None and chunk_size is None:
             logger.warning(
                 "chunk_size_limit is deprecated, please specify chunk_size",
diff --git a/llama_index/tools/query_engine.py b/llama_index/tools/query_engine.py
index b0b9de791042857e68685f1edc7500c990d2c202..8e151ac8a770b932566bad1c8f188fc9d4a1a9b7 100644
--- a/llama_index/tools/query_engine.py
+++ b/llama_index/tools/query_engine.py
@@ -1,6 +1,6 @@
 from typing import TYPE_CHECKING, Any, Optional
 
-from llama_index.core import BaseQueryEngine
+from llama_index.core.base_query_engine import BaseQueryEngine
 
 if TYPE_CHECKING:
     from llama_index.langchain_helpers.agents.tools import (
diff --git a/llama_index/tools/retriever_tool.py b/llama_index/tools/retriever_tool.py
index 029d320c4660ed3004155d6fc4639ace32314d19..9d2bbb7121bc21589b5ba027626640db9a234c2f 100644
--- a/llama_index/tools/retriever_tool.py
+++ b/llama_index/tools/retriever_tool.py
@@ -2,7 +2,7 @@
 
 from typing import TYPE_CHECKING, Any, Optional
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 
 if TYPE_CHECKING:
     from llama_index.langchain_helpers.agents.tools import LlamaIndexTool
diff --git a/llama_index/types.py b/llama_index/types.py
index e454b18e8ed12b7e29c836fe498bea1324ea863d..9197d04c2ea5b55225f836025118ca0d949dae68 100644
--- a/llama_index/types.py
+++ b/llama_index/types.py
@@ -14,7 +14,7 @@ from typing import (
 )
 
 from llama_index.bridge.pydantic import BaseModel
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 
 Model = TypeVar("Model", bound=BaseModel)
 
diff --git a/tests/agent/openai/test_openai_agent.py b/tests/agent/openai/test_openai_agent.py
index 81b60ae247938b6b44c154f8a471d8442c5d8175..6e8798266e84debdc70b83055db1c92cb1653c47 100644
--- a/tests/agent/openai/test_openai_agent.py
+++ b/tests/agent/openai/test_openai_agent.py
@@ -5,10 +5,10 @@ import pytest
 from llama_index.agent.openai.base import OpenAIAgent
 from llama_index.agent.openai.step import call_tool_with_error_handling
 from llama_index.chat_engine.types import AgentChatResponse, StreamingAgentChatResponse
+from llama_index.core.llms.types import ChatMessage, ChatResponse
 from llama_index.llms.base import ChatMessage, ChatResponse
 from llama_index.llms.mock import MockLLM
 from llama_index.llms.openai import OpenAI
-from llama_index.llms.types import ChatMessage, ChatResponse
 from llama_index.tools.function_tool import FunctionTool
 from openai.types.chat.chat_completion import ChatCompletion, Choice
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, ChoiceDelta
diff --git a/tests/agent/react/test_react_agent.py b/tests/agent/react/test_react_agent.py
index c728a5562d0f15487ebd22394a31ce6049234a3c..af45634993967227393c8f97d2ef6161ab67d776 100644
--- a/tests/agent/react/test_react_agent.py
+++ b/tests/agent/react/test_react_agent.py
@@ -7,13 +7,13 @@ from llama_index.agent.react.types import ObservationReasoningStep
 from llama_index.agent.types import Task
 from llama_index.bridge.pydantic import PrivateAttr
 from llama_index.chat_engine.types import AgentChatResponse, StreamingAgentChatResponse
-from llama_index.llms.mock import MockLLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseGen,
     MessageRole,
 )
+from llama_index.llms.mock import MockLLM
 from llama_index.tools.function_tool import FunctionTool
 from llama_index.tools.types import BaseTool
 
diff --git a/tests/chat_engine/test_condense_question.py b/tests/chat_engine/test_condense_question.py
index fb249a5a3d5ea169adf11c557798b00b0dd90db5..5a7a8d4c8c8234a244673a2562606d3bee0990e9 100644
--- a/tests/chat_engine/test_condense_question.py
+++ b/tests/chat_engine/test_condense_question.py
@@ -1,9 +1,9 @@
 from unittest.mock import Mock
 
 from llama_index.chat_engine.condense_question import CondenseQuestionChatEngine
-from llama_index.core import BaseQueryEngine
-from llama_index.llms.types import ChatMessage, MessageRole
-from llama_index.response.schema import Response
+from llama_index.core.base_query_engine import BaseQueryEngine
+from llama_index.core.llms.types import ChatMessage, MessageRole
+from llama_index.core.response.schema import Response
 from llama_index.service_context import ServiceContext
 
 
diff --git a/tests/chat_engine/test_simple.py b/tests/chat_engine/test_simple.py
index fa6e191b25776285702d845c2415e3d9d6186abb..f0d38d432ca8bb273d63b32c8c022a8842c8ece5 100644
--- a/tests/chat_engine/test_simple.py
+++ b/tests/chat_engine/test_simple.py
@@ -1,5 +1,5 @@
 from llama_index.chat_engine.simple import SimpleChatEngine
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.service_context import ServiceContext
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 5d6b5e2d1586e4c9a79f56d69bbad56081732fb8..cbbf5065c25f207035570628947eed9e6c602969 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,9 +5,9 @@ from typing import Any, List, Optional
 
 import openai
 import pytest
+from llama_index.core.llms.types import LLMMetadata
 from llama_index.llm_predictor.base import LLMPredictor
 from llama_index.llms.mock import MockLLM
-from llama_index.llms.types import LLMMetadata
 from llama_index.node_parser.text import SentenceSplitter, TokenTextSplitter
 from llama_index.service_context import ServiceContext
 
diff --git a/tests/embeddings/test_base.py b/tests/embeddings/test_base.py
index 3eb3d1bec96df1a6e3a0171e36f535156e2f4dcb..ce4df5f90fb42c03312207071344ac274d5359eb 100644
--- a/tests/embeddings/test_base.py
+++ b/tests/embeddings/test_base.py
@@ -3,7 +3,7 @@ import os
 from typing import Any, List
 from unittest.mock import patch
 
-from llama_index.embeddings.base import SimilarityMode, mean_agg
+from llama_index.core.embeddings.base import SimilarityMode, mean_agg
 from llama_index.embeddings.openai import OpenAIEmbedding
 
 from tests.conftest import CachedOpenAIApiKeys
diff --git a/tests/evaluation/test_base.py b/tests/evaluation/test_base.py
index d4ce1d3f8b17d6c614f2e85e4af30343af3053f4..93c9f1f697cbd711a2e4da4a99917b00db652dfa 100644
--- a/tests/evaluation/test_base.py
+++ b/tests/evaluation/test_base.py
@@ -1,9 +1,9 @@
 from typing import Any, Optional, Sequence
 
+from llama_index.core.response.schema import NodeWithScore, Response
 from llama_index.evaluation import BaseEvaluator
 from llama_index.evaluation.base import EvaluationResult
 from llama_index.prompts.mixin import PromptDictType
-from llama_index.response.schema import NodeWithScore, Response
 from llama_index.schema import TextNode
 
 
diff --git a/tests/indices/list/test_index.py b/tests/indices/list/test_index.py
index 3ff7499906c7773332d76b5375c0dd02f38c02e7..358e4b934e001e96a49b83c56551e02ee6438588 100644
--- a/tests/indices/list/test_index.py
+++ b/tests/indices/list/test_index.py
@@ -2,7 +2,7 @@
 
 from typing import Dict, List, Tuple
 
-from llama_index.core import BaseRetriever
+from llama_index.core.base_retriever import BaseRetriever
 from llama_index.indices.list.base import ListRetrieverMode, SummaryIndex
 from llama_index.schema import BaseNode, Document
 from llama_index.service_context import ServiceContext
diff --git a/tests/indices/managed/test_google.py b/tests/indices/managed/test_google.py
index 644ec60437fdbbf2440c89434c5a391ebd794ef4..225ddc77dcb771637bdd1c269bb287a4d92add0a 100644
--- a/tests/indices/managed/test_google.py
+++ b/tests/indices/managed/test_google.py
@@ -1,7 +1,7 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
-from llama_index.response.schema import Response
+from llama_index.core.response.schema import Response
 from llama_index.schema import Document
 
 try:
diff --git a/tests/indices/struct_store/test_json_query.py b/tests/indices/struct_store/test_json_query.py
index a84b13ad5cd7e8de3ade1b3e3760a120805088f3..fdca94bf6320d53f1c2aabd109c41d0388aa2a5d 100644
--- a/tests/indices/struct_store/test_json_query.py
+++ b/tests/indices/struct_store/test_json_query.py
@@ -6,11 +6,11 @@ from typing import Any, Dict, cast
 from unittest.mock import patch
 
 import pytest
+from llama_index.core.response.schema import Response
 from llama_index.indices.struct_store.json_query import JSONQueryEngine, JSONType
 from llama_index.llm_predictor import LLMPredictor
 from llama_index.llms.mock import MockLLM
 from llama_index.prompts.base import BasePromptTemplate
-from llama_index.response.schema import Response
 from llama_index.schema import QueryBundle
 from llama_index.service_context import ServiceContext
 
diff --git a/tests/llms/test_anthropic.py b/tests/llms/test_anthropic.py
index c7386ffbd46a2eeb4c769a8f4135d26d64903497..187a67718d535e1e89e00178bdd5fd31e7e860ed 100644
--- a/tests/llms/test_anthropic.py
+++ b/tests/llms/test_anthropic.py
@@ -1,6 +1,6 @@
 import pytest
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.anthropic import Anthropic
-from llama_index.llms.types import ChatMessage
 
 try:
     import anthropic
diff --git a/tests/llms/test_anthropic_utils.py b/tests/llms/test_anthropic_utils.py
index 76b4dce62cae9741b414d5d620d1a9ff1938468b..c0f7c179b287a159c8a6fd19eda095d0cee6e330 100644
--- a/tests/llms/test_anthropic_utils.py
+++ b/tests/llms/test_anthropic_utils.py
@@ -1,9 +1,9 @@
 import pytest
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.llms.anthropic_utils import (
     anthropic_modelname_to_contextsize,
     messages_to_anthropic_prompt,
 )
-from llama_index.llms.types import ChatMessage, MessageRole
 
 
 def test_messages_to_anthropic_prompt() -> None:
diff --git a/tests/llms/test_bedrock.py b/tests/llms/test_bedrock.py
index 26db8bddb9043230ca75abf8c63f0e139ceaf845..d26661d40f88b023e6b5f80d6ea936fc3f43d6ed 100644
--- a/tests/llms/test_bedrock.py
+++ b/tests/llms/test_bedrock.py
@@ -5,8 +5,8 @@ from typing import Any, Generator
 import pytest
 from botocore.response import StreamingBody
 from botocore.stub import Stubber
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms import Bedrock
-from llama_index.llms.types import ChatMessage
 from pytest import MonkeyPatch
 
 
diff --git a/tests/llms/test_cohere.py b/tests/llms/test_cohere.py
index 1d65c83a36dc41196a1b1d53a00183a421998a22..cd9eff0af66166b62183a55f4942308543264351 100644
--- a/tests/llms/test_cohere.py
+++ b/tests/llms/test_cohere.py
@@ -1,7 +1,7 @@
 from typing import Any
 
 import pytest
-from llama_index.llms.types import ChatMessage
+from llama_index.core.llms.types import ChatMessage
 from pytest import MonkeyPatch
 
 try:
diff --git a/tests/llms/test_custom.py b/tests/llms/test_custom.py
index 3cd79eca330d4879384ef2d1408281efbf4d9691..90e874e3d5b0fa62ca7893c9d757aef7bd47cb31 100644
--- a/tests/llms/test_custom.py
+++ b/tests/llms/test_custom.py
@@ -1,12 +1,12 @@
 from typing import Any
 
-from llama_index.llms.custom import CustomLLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     CompletionResponse,
     CompletionResponseGen,
     LLMMetadata,
 )
+from llama_index.llms.custom import CustomLLM
 
 
 class TestLLM(CustomLLM):
diff --git a/tests/llms/test_gradient.py b/tests/llms/test_gradient.py
index 87db5148b11c02aa737a6e52ae924313b93f13e5..1b9817e7bad1661ddedcedfd1929a2c31457ac8c 100644
--- a/tests/llms/test_gradient.py
+++ b/tests/llms/test_gradient.py
@@ -5,8 +5,8 @@ from typing import Any
 from unittest.mock import MagicMock, patch
 
 import pytest
+from llama_index.core.llms.types import CompletionResponse
 from llama_index.llms.gradient import GradientBaseModelLLM, GradientModelAdapterLLM
-from llama_index.llms.types import CompletionResponse
 
 
 class GradientModel(MagicMock):
diff --git a/tests/llms/test_konko.py b/tests/llms/test_konko.py
index 8b62dd0b6113a8cf4b5b7b6a66e50b729a1a2b4c..848ac54ccbaccc0a03594b2fadde5b4eba5324bb 100644
--- a/tests/llms/test_konko.py
+++ b/tests/llms/test_konko.py
@@ -1,8 +1,8 @@
 from typing import Any, Generator
 
 import pytest
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.konko import Konko
-from llama_index.llms.types import ChatMessage
 from pytest import MonkeyPatch
 
 try:
diff --git a/tests/llms/test_langchain.py b/tests/llms/test_langchain.py
index 15b1c03f583a01538a03324ab2e2a565f52451a6..dbbbdc40bbab0106ec8aea1af9713bcac84cd880 100644
--- a/tests/llms/test_langchain.py
+++ b/tests/llms/test_langchain.py
@@ -1,7 +1,7 @@
 from typing import List
 
 import pytest
-from llama_index.llms.types import ChatMessage, MessageRole
+from llama_index.core.llms.types import ChatMessage, MessageRole
 
 try:
     import cohere
diff --git a/tests/llms/test_litellm.py b/tests/llms/test_litellm.py
index 8786f7b506793283dd298f0c667833f477cf020d..2dc7a5d24bfc4e18c544c601bcd59afe66bb6245 100644
--- a/tests/llms/test_litellm.py
+++ b/tests/llms/test_litellm.py
@@ -6,8 +6,8 @@ except ImportError:
     litellm = None  # type: ignore
 
 import pytest
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.litellm import LiteLLM
-from llama_index.llms.types import ChatMessage
 from pytest import MonkeyPatch
 
 from tests.conftest import CachedOpenAIApiKeys
diff --git a/tests/llms/test_llama_utils.py b/tests/llms/test_llama_utils.py
index b8587d7a5d2b8c09491b80c1f2d0b53d64ded916..23c8e6ee2b34d17c43af903d4e9e1b51951bc275 100644
--- a/tests/llms/test_llama_utils.py
+++ b/tests/llms/test_llama_utils.py
@@ -1,6 +1,7 @@
 from typing import Sequence
 
 import pytest
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.llms.llama_utils import (
     B_INST,
     B_SYS,
@@ -12,7 +13,6 @@ from llama_index.llms.llama_utils import (
     completion_to_prompt,
     messages_to_prompt,
 )
-from llama_index.llms.types import ChatMessage, MessageRole
 
 
 @pytest.fixture()
diff --git a/tests/llms/test_localai.py b/tests/llms/test_localai.py
index eda548c0ab3f7a9d6ef99316d0bd9bd1fe0d3ac6..d1035678a70360c88e6d3954226609b22ac2d599 100644
--- a/tests/llms/test_localai.py
+++ b/tests/llms/test_localai.py
@@ -1,8 +1,8 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms import LocalAI
-from llama_index.llms.types import ChatMessage
 from openai.types import Completion, CompletionChoice
 from openai.types.chat.chat_completion import ChatCompletion, Choice
 from openai.types.chat.chat_completion_message import ChatCompletionMessage
diff --git a/tests/llms/test_openai.py b/tests/llms/test_openai.py
index ebc42b20929bec45147b195b87cb2306ab863b18..3058575814e805322994d56104abf2ec33817873 100644
--- a/tests/llms/test_openai.py
+++ b/tests/llms/test_openai.py
@@ -3,8 +3,8 @@ from typing import Any, AsyncGenerator, Generator
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+from llama_index.core.llms.types import ChatMessage
 from llama_index.llms.openai import OpenAI
-from llama_index.llms.types import ChatMessage
 from openai.types.chat.chat_completion import (
     ChatCompletion,
     ChatCompletionMessage,
diff --git a/tests/llms/test_openai_like.py b/tests/llms/test_openai_like.py
index 99a96f6f44e948e7219265ee698f3158a836433c..f6bbaa83b4d24a5079efb9bd96d2a136fb12d86a 100644
--- a/tests/llms/test_openai_like.py
+++ b/tests/llms/test_openai_like.py
@@ -1,9 +1,9 @@
 from typing import List
 from unittest.mock import MagicMock, call, patch
 
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.llms import LOCALAI_DEFAULTS, OpenAILike
 from llama_index.llms.openai import Tokenizer
-from llama_index.llms.types import ChatMessage, MessageRole
 from openai.types import Completion, CompletionChoice
 from openai.types.chat.chat_completion import ChatCompletion, Choice
 from openai.types.chat.chat_completion_message import ChatCompletionMessage
diff --git a/tests/llms/test_openai_utils.py b/tests/llms/test_openai_utils.py
index 712b1857b741a3b5ed6473edd60cf6dc967c566c..1acf0b94ea9bb207a24465c5a1f22b5958582fbb 100644
--- a/tests/llms/test_openai_utils.py
+++ b/tests/llms/test_openai_utils.py
@@ -2,13 +2,13 @@ from typing import List
 
 import pytest
 from llama_index.bridge.pydantic import BaseModel
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.llms.openai_utils import (
     from_openai_message_dicts,
     from_openai_messages,
     to_openai_message_dicts,
     to_openai_tool,
 )
-from llama_index.llms.types import ChatMessage, MessageRole
 from openai.types.chat.chat_completion_assistant_message_param import (
     FunctionCall as FunctionCallParam,
 )
diff --git a/tests/llms/test_palm.py b/tests/llms/test_palm.py
index c36f2b7eeee12145fda8efe7985b6b9674addb1b..bc221db004db98920a3bcd52d9ac5b604ec10538 100644
--- a/tests/llms/test_palm.py
+++ b/tests/llms/test_palm.py
@@ -31,8 +31,8 @@ class MockPalmPackage(MagicMock):
         return self._mock_models()
 
 
+from llama_index.core.llms.types import CompletionResponse
 from llama_index.llms.palm import PaLM
-from llama_index.llms.types import CompletionResponse
 
 
 @pytest.mark.skipif(
diff --git a/tests/llms/test_rungpt.py b/tests/llms/test_rungpt.py
index 475e719c8a73bf8a0e4cf366588b6493b83b66ee..163246f7a40fb043dfd5c0f16a47f94965951876 100644
--- a/tests/llms/test_rungpt.py
+++ b/tests/llms/test_rungpt.py
@@ -2,11 +2,11 @@ from typing import Any, Dict, Generator, List
 from unittest.mock import MagicMock, patch
 
 import pytest
-from llama_index.llms.rungpt import RunGptLLM
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     MessageRole,
 )
+from llama_index.llms.rungpt import RunGptLLM
 
 try:
     import sseclient
diff --git a/tests/llms/test_vertex.py b/tests/llms/test_vertex.py
index 3037ba36cc7a84c6f5e005a57ee3c6602484edd9..9703983977d2cc25eaf5aeffb4063da58334459a 100644
--- a/tests/llms/test_vertex.py
+++ b/tests/llms/test_vertex.py
@@ -1,7 +1,7 @@
 from typing import Sequence
 
 import pytest
-from llama_index.llms.types import ChatMessage, CompletionResponse
+from llama_index.core.llms.types import ChatMessage, CompletionResponse
 from llama_index.llms.vertex import Vertex
 from llama_index.llms.vertex_utils import init_vertexai
 
diff --git a/tests/llms/test_watsonx.py b/tests/llms/test_watsonx.py
index 990028bc9116592c2eff30822f8c5646d8e2836d..006eace819b2016535c133d89fc1f006adbd552a 100644
--- a/tests/llms/test_watsonx.py
+++ b/tests/llms/test_watsonx.py
@@ -3,7 +3,7 @@ from typing import Any, Dict, Generator, Optional
 from unittest.mock import MagicMock
 
 import pytest
-from llama_index.llms.types import ChatMessage
+from llama_index.core.llms.types import ChatMessage
 
 try:
     import ibm_watson_machine_learning
diff --git a/tests/llms/test_xinference.py b/tests/llms/test_xinference.py
index 3c2000746a106ea70f7da1e5737ed3d128ff8dd8..8299e8a1028e33c587e9b1198a3222355356bc95 100644
--- a/tests/llms/test_xinference.py
+++ b/tests/llms/test_xinference.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, Generator, Iterator, List, Mapping, Sequence, Tuple, Union
 
 import pytest
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     CompletionResponse,
diff --git a/tests/program/test_llm_program.py b/tests/program/test_llm_program.py
index ae8d4dcab16b1bb6b443c803fdf9970c69b27e07..fca35966a6fd691e4643848ecaad696b82966c9a 100644
--- a/tests/program/test_llm_program.py
+++ b/tests/program/test_llm_program.py
@@ -4,7 +4,7 @@ import json
 from unittest.mock import MagicMock
 
 from llama_index.bridge.pydantic import BaseModel
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     ChatMessage,
     ChatResponse,
     CompletionResponse,
diff --git a/tests/program/test_lmformatenforcer.py b/tests/program/test_lmformatenforcer.py
index 9b9468c3d694fef3bc4cc14c85ca2d654f933268..1e3af9124619324afff8171e5aa013925d763d81 100644
--- a/tests/program/test_lmformatenforcer.py
+++ b/tests/program/test_lmformatenforcer.py
@@ -3,8 +3,8 @@ from unittest.mock import MagicMock
 
 import pytest
 from llama_index.bridge.pydantic import BaseModel
+from llama_index.core.llms.types import CompletionResponse
 from llama_index.llms.huggingface import HuggingFaceLLM
-from llama_index.llms.types import CompletionResponse
 from llama_index.program.lmformatenforcer_program import LMFormatEnforcerPydanticProgram
 
 has_lmformatenforcer = find_spec("lmformatenforcer") is not None
diff --git a/tests/program/test_multi_modal_llm_program.py b/tests/program/test_multi_modal_llm_program.py
index 7d1fe9b848c1f0b3ecfec513f95e1651541e2fa0..96022532df8c61a48f9a28062704445e4f9c0719 100644
--- a/tests/program/test_multi_modal_llm_program.py
+++ b/tests/program/test_multi_modal_llm_program.py
@@ -5,7 +5,7 @@ from typing import Sequence
 from unittest.mock import MagicMock
 
 from llama_index.bridge.pydantic import BaseModel
-from llama_index.llms.types import (
+from llama_index.core.llms.types import (
     CompletionResponse,
 )
 from llama_index.multi_modal_llms import MultiModalLLMMetadata
diff --git a/tests/prompts/test_base.py b/tests/prompts/test_base.py
index 00993b6f610aa3f0b39838b9ecd8da59dc3b30fa..25d011d5d8eb9d6ae75d817251eda2517fd27719 100644
--- a/tests/prompts/test_base.py
+++ b/tests/prompts/test_base.py
@@ -4,8 +4,8 @@
 from typing import Any
 
 import pytest
+from llama_index.core.llms.types import ChatMessage, MessageRole
 from llama_index.llms import MockLLM
-from llama_index.llms.types import ChatMessage, MessageRole
 from llama_index.prompts import (
     ChatPromptTemplate,
     LangchainPromptTemplate,
diff --git a/tests/query_engine/test_cogniswitch_query_engine.py b/tests/query_engine/test_cogniswitch_query_engine.py
index 842f4d8fd677dec974fb11a2edee882f03d18c2f..c5bedeaf0bd546a5b722645528a53dd7a8370689 100644
--- a/tests/query_engine/test_cogniswitch_query_engine.py
+++ b/tests/query_engine/test_cogniswitch_query_engine.py
@@ -2,8 +2,8 @@ from typing import Any
 from unittest.mock import patch
 
 import pytest
+from llama_index.core.response.schema import Response
 from llama_index.query_engine.cogniswitch_query_engine import CogniswitchQueryEngine
-from llama_index.response.schema import Response
 
 
 @pytest.fixture()
diff --git a/tests/query_engine/test_pandas.py b/tests/query_engine/test_pandas.py
index 0c7acedd55518eb81d27e9d2bafceebc9b7cc579..b95e14961670594ebec686735c63447e785c5530 100644
--- a/tests/query_engine/test_pandas.py
+++ b/tests/query_engine/test_pandas.py
@@ -7,13 +7,13 @@ from typing import Any, Dict, cast
 
 import pandas as pd
 import pytest
+from llama_index.core.response.schema import Response
 from llama_index.indices.query.schema import QueryBundle
 from llama_index.indices.service_context import ServiceContext
 from llama_index.query_engine.pandas_query_engine import (
     PandasQueryEngine,
     default_output_processor,
 )
-from llama_index.response.schema import Response
 
 
 def test_pandas_query_engine(mock_service_context: ServiceContext) -> None: