diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml
index 68e04c3308e7b7cb15d802fb242e339dad07b762..b805eb928b94c1509f3b0bd9fd748e291bacdb59 100644
--- a/.github/workflows/unit_test.yml
+++ b/.github/workflows/unit_test.yml
@@ -11,7 +11,7 @@ env:
 
 jobs:
   test:
-    runs-on: ubuntu-latest-4core
+    runs-on: ubuntu-latest-unit-tester
     strategy:
       # You can use PyPy versions in python-version.
       # For example, pypy-2.7 and pypy-3.8
diff --git a/docs/examples/pipeline/query_pipeline.ipynb b/docs/examples/pipeline/query_pipeline.ipynb
index dbb496330a3d6b2350d41ef50a4140b79aab4c8f..9bdecd3dbf1c51a727bc1a5b78cee33f0d6e9100 100644
--- a/docs/examples/pipeline/query_pipeline.ipynb
+++ b/docs/examples/pipeline/query_pipeline.ipynb
@@ -982,14 +982,14 @@
     "    OutputKeys,\n",
     ")\n",
     "from typing import Dict, Any\n",
-    "from llama_index.core.llms.llm import BaseLLM\n",
+    "from llama_index.core.llms.llm import LLM\n",
     "from pydantic import Field\n",
     "\n",
     "\n",
     "class RelatedMovieComponent(CustomQueryComponent):\n",
     "    \"\"\"Related movie component.\"\"\"\n",
     "\n",
-    "    llm: BaseLLM = Field(..., description=\"OpenAI LLM\")\n",
+    "    llm: LLM = Field(..., description=\"OpenAI LLM\")\n",
     "\n",
     "    def _validate_component_inputs(\n",
     "        self, input: Dict[str, Any]\n",
diff --git a/docs/module_guides/models/llms/usage_custom.md b/docs/module_guides/models/llms/usage_custom.md
index 742b719cc34a8d4818867aa7798c9b0475bffb37..b9a61334eff5617a206204faf672f9cacc341ea2 100644
--- a/docs/module_guides/models/llms/usage_custom.md
+++ b/docs/module_guides/models/llms/usage_custom.md
@@ -180,7 +180,7 @@ from llama_index.core.llms import (
     CompletionResponseGen,
     LLMMetadata,
 )
-from llama_index.core.llms.base import llm_completion_callback
+from llama_index.core.llms.callbacks import llm_completion_callback
 from llama_index.core import Settings
 
 
diff --git a/llama-index-core/llama_index/core/__init__.py b/llama-index-core/llama_index/core/__init__.py
index 5f1aa3fa6e8800a3474f79b1820eca3b5bf11a04..9c92b685aa1fb4111aed5149f0398053e28f34f2 100644
--- a/llama-index-core/llama_index/core/__init__.py
+++ b/llama-index-core/llama_index/core/__init__.py
@@ -1,6 +1,6 @@
 """Init file of LlamaIndex."""
 
-__version__ = "0.10.7"
+__version__ = "0.10.8.post1"
 
 import logging
 from logging import NullHandler
diff --git a/llama-index-core/llama_index/core/agent/legacy/react/base.py b/llama-index-core/llama_index/core/agent/legacy/react/base.py
index d69593043fd584a95089c089368d7681ef5bde77..d394772ec624f6ff299573fb67f5671977160d90 100644
--- a/llama-index-core/llama_index/core/agent/legacy/react/base.py
+++ b/llama-index-core/llama_index/core/agent/legacy/react/base.py
@@ -34,7 +34,7 @@ from llama_index.core.chat_engine.types import (
     AgentChatResponse,
     StreamingAgentChatResponse,
 )
-from llama_index.core.llms.base import ChatMessage, ChatResponse
+from llama_index.core.base.llms.types import ChatMessage, ChatResponse
 from llama_index.core.llms.llm import LLM
 from llama_index.core.memory.chat_memory_buffer import ChatMemoryBuffer
 from llama_index.core.memory.types import BaseMemory
diff --git a/llama-index-core/llama_index/core/agent/react/step.py b/llama-index-core/llama_index/core/agent/react/step.py
index 9e3cd3f61008ad6c783d1c1cd344c178879115a7..69df6ed4dca3de801ed503d7d0a86846f3089c9f 100644
--- a/llama-index-core/llama_index/core/agent/react/step.py
+++ b/llama-index-core/llama_index/core/agent/react/step.py
@@ -42,7 +42,7 @@ from llama_index.core.chat_engine.types import (
     AgentChatResponse,
     StreamingAgentChatResponse,
 )
-from llama_index.core.llms.base import ChatMessage, ChatResponse
+from llama_index.core.base.llms.types import ChatMessage, ChatResponse
 from llama_index.core.llms.llm import LLM
 from llama_index.core.memory.chat_memory_buffer import ChatMemoryBuffer
 from llama_index.core.memory.types import BaseMemory
diff --git a/llama-index-core/llama_index/core/agent/react_multimodal/step.py b/llama-index-core/llama_index/core/agent/react_multimodal/step.py
index 1f6a127fee1a09f5be54fdd662aa4f8cc3a3e4db..b25d747cf94448d639429b69bace1b41761938f5 100644
--- a/llama-index-core/llama_index/core/agent/react_multimodal/step.py
+++ b/llama-index-core/llama_index/core/agent/react_multimodal/step.py
@@ -32,7 +32,7 @@ from llama_index.core.chat_engine.types import (
     AGENT_CHAT_RESPONSE_TYPE,
     AgentChatResponse,
 )
-from llama_index.core.llms.base import ChatMessage, ChatResponse
+from llama_index.core.base.llms.types import ChatMessage, ChatResponse
 from llama_index.core.memory.chat_memory_buffer import ChatMemoryBuffer
 from llama_index.core.memory.types import BaseMemory
 from llama_index.core.multi_modal_llms.base import MultiModalLLM
diff --git a/llama-index-core/llama_index/core/agent/runner/base.py b/llama-index-core/llama_index/core/agent/runner/base.py
index b8442a1f159c43ccf37ac59b99592f1de048c2f5..bfdc17b94b43b6f411851cf31724db0c2ad88952 100644
--- a/llama-index-core/llama_index/core/agent/runner/base.py
+++ b/llama-index-core/llama_index/core/agent/runner/base.py
@@ -23,7 +23,7 @@ from llama_index.core.chat_engine.types import (
     ChatResponseMode,
     StreamingAgentChatResponse,
 )
-from llama_index.core.llms.base import ChatMessage
+from llama_index.core.base.llms.types import ChatMessage
 from llama_index.core.llms.llm import LLM
 from llama_index.core.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.core.memory.types import BaseMemory
diff --git a/llama-index-core/llama_index/core/agent/runner/parallel.py b/llama-index-core/llama_index/core/agent/runner/parallel.py
index 8a145c38415b27cae54c498cbc010834b16e3fa6..5c5ae0ace2fdab6324e9a6d380eee67ca3433b8e 100644
--- a/llama-index-core/llama_index/core/agent/runner/parallel.py
+++ b/llama-index-core/llama_index/core/agent/runner/parallel.py
@@ -24,7 +24,7 @@ from llama_index.core.chat_engine.types import (
     ChatResponseMode,
     StreamingAgentChatResponse,
 )
-from llama_index.core.llms.base import ChatMessage
+from llama_index.core.base.llms.types import ChatMessage
 from llama_index.core.llms.llm import LLM
 from llama_index.core.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.core.memory.types import BaseMemory
diff --git a/llama-index-core/llama_index/core/agent/utils.py b/llama-index-core/llama_index/core/agent/utils.py
index 187695079d23a40302bb285cecfba6f6f37e4a18..0fcb2d03963a07c6333fcca1012eb7e628fa966c 100644
--- a/llama-index-core/llama_index/core/agent/utils.py
+++ b/llama-index-core/llama_index/core/agent/utils.py
@@ -1,9 +1,7 @@
 """Agent utils."""
 
-
 from llama_index.core.agent.types import TaskStep
-from llama_index.core.base.llms.types import MessageRole
-from llama_index.core.llms.base import ChatMessage
+from llama_index.core.base.llms.types import ChatMessage, MessageRole
 from llama_index.core.memory import BaseMemory
 
 
diff --git a/llama-index-core/llama_index/core/llms/base.py b/llama-index-core/llama_index/core/base/llms/base.py
similarity index 100%
rename from llama-index-core/llama_index/core/llms/base.py
rename to llama-index-core/llama_index/core/base/llms/base.py
diff --git a/llama-index-core/llama_index/core/llms/generic_utils.py b/llama-index-core/llama_index/core/base/llms/generic_utils.py
similarity index 100%
rename from llama-index-core/llama_index/core/llms/generic_utils.py
rename to llama-index-core/llama_index/core/base/llms/generic_utils.py
diff --git a/llama-index-core/llama_index/core/chat_engine/condense_plus_context.py b/llama-index-core/llama_index/core/chat_engine/condense_plus_context.py
index 605905767de79f46969fd35838c243d8fa81e475..2013d56bfc43d85c059a8f1b4fe0601f52185099 100644
--- a/llama-index-core/llama_index/core/chat_engine/condense_plus_context.py
+++ b/llama-index-core/llama_index/core/chat_engine/condense_plus_context.py
@@ -14,7 +14,7 @@ from llama_index.core.chat_engine.types import (
 from llama_index.core.indices.base_retriever import BaseRetriever
 from llama_index.core.indices.query.schema import QueryBundle
 from llama_index.core.indices.service_context import ServiceContext
-from llama_index.core.llms.generic_utils import messages_to_history_str
+from llama_index.core.base.llms.generic_utils import messages_to_history_str
 from llama_index.core.llms.llm import LLM
 from llama_index.core.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.core.postprocessor.types import BaseNodePostprocessor
diff --git a/llama-index-core/llama_index/core/chat_engine/condense_question.py b/llama-index-core/llama_index/core/chat_engine/condense_question.py
index f802b715b5686a1a07e66f926cbfb127f5059dbf..31b7d6d9b0bcd328aab67c35b4379522e1fa9dae 100644
--- a/llama-index-core/llama_index/core/chat_engine/condense_question.py
+++ b/llama-index-core/llama_index/core/chat_engine/condense_question.py
@@ -13,7 +13,7 @@ from llama_index.core.chat_engine.types import (
 )
 from llama_index.core.chat_engine.utils import response_gen_from_query_engine
 from llama_index.core.embeddings.mock_embed_model import MockEmbedding
-from llama_index.core.llms.generic_utils import messages_to_history_str
+from llama_index.core.base.llms.generic_utils import messages_to_history_str
 from llama_index.core.llms.llm import LLM
 from llama_index.core.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.core.prompts.base import BasePromptTemplate, PromptTemplate
diff --git a/llama-index-core/llama_index/core/llms/custom.py b/llama-index-core/llama_index/core/llms/custom.py
index 52bd1c685b42cdf9e72e7d9f74291cf5f1e45a1a..3cadc5d0ce23ee1a7a0f4595073bf05aae08ae99 100644
--- a/llama-index-core/llama_index/core/llms/custom.py
+++ b/llama-index-core/llama_index/core/llms/custom.py
@@ -12,7 +12,7 @@ from llama_index.core.llms.callbacks import (
     llm_chat_callback,
     llm_completion_callback,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_response_to_chat_response,
     stream_completion_response_to_chat_response,
 )
diff --git a/llama-index-core/llama_index/core/llms/llm.py b/llama-index-core/llama_index/core/llms/llm.py
index 658cbb727c506ea628561713df8f6e1f46e3f56c..21fd79876aeaa027ff86087e7ecd33fc8351d7c6 100644
--- a/llama-index-core/llama_index/core/llms/llm.py
+++ b/llama-index-core/llama_index/core/llms/llm.py
@@ -33,11 +33,11 @@ from llama_index.core.bridge.pydantic import (
     validator,
 )
 from llama_index.core.callbacks import CBEventType, EventPayload
-from llama_index.core.llms.base import BaseLLM
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.base import BaseLLM
+from llama_index.core.base.llms.generic_utils import (
     messages_to_prompt as generic_messages_to_prompt,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     prompt_to_messages,
 )
 from llama_index.core.prompts import BasePromptTemplate, PromptTemplate
diff --git a/llama-index-core/llama_index/core/prompts/base.py b/llama-index-core/llama_index/core/prompts/base.py
index 4c3089b0b4478cd070beb8399349984154a21773..c7910763519768ae5d7134385df00518d9e8838d 100644
--- a/llama-index-core/llama_index/core/prompts/base.py
+++ b/llama-index-core/llama_index/core/prompts/base.py
@@ -34,11 +34,11 @@ from llama_index.core.base.query_pipeline.query import (
     validate_and_convert_stringable,
 )
 from llama_index.core.bridge.pydantic import BaseModel
-from llama_index.core.llms.base import BaseLLM
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.base import BaseLLM
+from llama_index.core.base.llms.generic_utils import (
     messages_to_prompt as default_messages_to_prompt,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     prompt_to_messages,
 )
 from llama_index.core.prompts.prompt_type import PromptType
diff --git a/llama-index-core/llama_index/core/prompts/utils.py b/llama-index-core/llama_index/core/prompts/utils.py
index 87a5de0db6c3e05832bcda587ec088cc6c671383..956424f7c3d6ad0680a39d1b70b840e3f36c8997 100644
--- a/llama-index-core/llama_index/core/prompts/utils.py
+++ b/llama-index-core/llama_index/core/prompts/utils.py
@@ -1,7 +1,7 @@
 from string import Formatter
 from typing import List
 
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 
 
 def get_template_vars(template_str: str) -> List[str]:
diff --git a/llama-index-core/llama_index/core/service_context.py b/llama-index-core/llama_index/core/service_context.py
index 3338d7513272d791d2ad58c4a78758185e265baf..534b9016f35aa40aa83192eef6d6ba0dd892f027 100644
--- a/llama-index-core/llama_index/core/service_context.py
+++ b/llama-index-core/llama_index/core/service_context.py
@@ -13,7 +13,7 @@ from llama_index.core.service_context_elements.llm_predictor import (
     LLMPredictor,
     BaseLLMPredictor,
 )
-from llama_index.core.llms.base import LLMMetadata
+from llama_index.core.base.llms.types import LLMMetadata
 from llama_index.core.llms.llm import LLM
 from llama_index.core.llms.utils import LLMType, resolve_llm
 from llama_index.core.service_context_elements.llama_logger import LlamaLogger
diff --git a/llama-index-core/pyproject.toml b/llama-index-core/pyproject.toml
index 1d32d01a1d3fd12a2c4af943708ce5a36179a82a..258e9a3f320df8b240fbcb1aa508dfa8d34fc091 100644
--- a/llama-index-core/pyproject.toml
+++ b/llama-index-core/pyproject.toml
@@ -42,7 +42,7 @@ name = "llama-index-core"
 packages = [{include = "llama_index"}]
 readme = "README.md"
 repository = "https://github.com/run-llama/llama_index"
-version = "0.10.8"
+version = "0.10.8.post1"
 
 [tool.poetry.dependencies]
 SQLAlchemy = {extras = ["asyncio"], version = ">=1.4.49"}
diff --git a/llama-index-finetuning/llama_index/finetuning/callbacks/finetuning_handler.py b/llama-index-finetuning/llama_index/finetuning/callbacks/finetuning_handler.py
index 037df668b9a1ec08d9468d270bac1b58b9120851..553bfa593c7d7d330795344b90cd97ab33bc87c0 100644
--- a/llama-index-finetuning/llama_index/finetuning/callbacks/finetuning_handler.py
+++ b/llama-index-finetuning/llama_index/finetuning/callbacks/finetuning_handler.py
@@ -187,7 +187,7 @@ class GradientAIFineTuningHandler(BaseFinetuningHandler):
         },
         ...
         """
-        from llama_index.core.llms.generic_utils import messages_to_history_str
+        from llama_index.core.base.llms.generic_utils import messages_to_history_str
 
         events_dict = self.get_finetuning_events()
         json_strs = []
diff --git a/llama-index-integrations/agent/llama-index-agent-openai/llama_index/agent/openai/base.py b/llama-index-integrations/agent/llama-index-agent-openai/llama_index/agent/openai/base.py
index e78151575b29df057627a06a42df3a812d7832ab..b604ecf13e08f85369485db4da78466d95385ced 100644
--- a/llama-index-integrations/agent/llama-index-agent-openai/llama_index/agent/openai/base.py
+++ b/llama-index-integrations/agent/llama-index-agent-openai/llama_index/agent/openai/base.py
@@ -18,7 +18,7 @@ from typing import (
 from llama_index.agent.openai.step import OpenAIAgentWorker
 from llama_index.core.agent.runner.base import AgentRunner
 from llama_index.core.callbacks import CallbackManager
-from llama_index.core.llms.base import ChatMessage
+from llama_index.core.base.llms.types import ChatMessage
 from llama_index.core.llms.llm import LLM
 from llama_index.core.memory.chat_memory_buffer import ChatMemoryBuffer
 from llama_index.core.memory.types import BaseMemory
diff --git a/llama-index-integrations/agent/llama-index-agent-openai/llama_index/agent/openai/step.py b/llama-index-integrations/agent/llama-index-agent-openai/llama_index/agent/openai/step.py
index a6ad95867435ed483e43ebb9e29eec729ee4cfc1..08d13368aa11415ff07b75ee43447490ddb92b4a 100644
--- a/llama-index-integrations/agent/llama-index-agent-openai/llama_index/agent/openai/step.py
+++ b/llama-index-integrations/agent/llama-index-agent-openai/llama_index/agent/openai/step.py
@@ -28,7 +28,7 @@ from llama_index.core.chat_engine.types import (
     ChatResponseMode,
     StreamingAgentChatResponse,
 )
-from llama_index.core.llms.base import ChatMessage, ChatResponse
+from llama_index.core.base.llms.types import ChatMessage, ChatResponse
 from llama_index.core.llms.llm import LLM
 from llama_index.core.memory import BaseMemory, ChatMemoryBuffer
 from llama_index.core.objects.base import ObjectRetriever
diff --git a/llama-index-integrations/callbacks/llama-index-callbacks-arize-phoenix/examples/query_pipeline.ipynb b/llama-index-integrations/callbacks/llama-index-callbacks-arize-phoenix/examples/query_pipeline.ipynb
index df3bd64306fc4a3c1a6dc6a087ebcb9604119af3..f1dcf3a4bd1339470a160e3b1be12e47237eb1a7 100644
--- a/llama-index-integrations/callbacks/llama-index-callbacks-arize-phoenix/examples/query_pipeline.ipynb
+++ b/llama-index-integrations/callbacks/llama-index-callbacks-arize-phoenix/examples/query_pipeline.ipynb
@@ -937,20 +937,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.query_pipeline import (\n",
+    "from llama_index.core.query_pipeline import (\n",
     "    CustomQueryComponent,\n",
-    "    InputKeys,\n",
-    "    OutputKeys,\n",
     ")\n",
+    "from llama_index.core.base.query_pipeline.query import InputKeys, OutputKeys\n",
     "from typing import Dict, Any\n",
-    "from llama_index.llms.llm import BaseLLM\n",
+    "from llama_index.core.llms.llm import LLM\n",
     "from pydantic import Field\n",
     "\n",
     "\n",
     "class RelatedMovieComponent(CustomQueryComponent):\n",
     "    \"\"\"Related movie component.\"\"\"\n",
     "\n",
-    "    llm: BaseLLM = Field(..., description=\"OpenAI LLM\")\n",
+    "    llm: LLM = Field(..., description=\"OpenAI LLM\")\n",
     "\n",
     "    def _validate_component_inputs(self, input: Dict[str, Any]) -> Dict[str, Any]:\n",
     "        \"\"\"Validate component inputs during run_component.\"\"\"\n",
@@ -1080,9 +1079,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llama_integrations",
+   "display_name": "llama-index-4aB9_5sa-py3.10",
    "language": "python",
-   "name": "llama_integrations"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-anyscale/llama_index/embeddings/anyscale/utils.py b/llama-index-integrations/embeddings/llama-index-embeddings-anyscale/llama_index/embeddings/anyscale/utils.py
index 556cbc7ca6def215de97b18ab0423043b48d5289..895857cfd0e97f825ce46e4858f423095c4f7267 100644
--- a/llama-index-integrations/embeddings/llama-index-embeddings-anyscale/llama_index/embeddings/anyscale/utils.py
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-anyscale/llama_index/embeddings/anyscale/utils.py
@@ -1,6 +1,6 @@
 from typing import Optional, Tuple
 
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 
 DEFAULT_ANYSCALE_API_BASE = "https://api.endpoints.anyscale.com/v1"
 DEFAULT_ANYSCALE_API_VERSION = ""
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/llama_index/embeddings/azure_openai/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/llama_index/embeddings/azure_openai/base.py
index fc699fee9fbfe2e44d9df42577b284ab5e62dbfb..204c035a9dcfaf63852a48397d9576590a595f1e 100644
--- a/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/llama_index/embeddings/azure_openai/base.py
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/llama_index/embeddings/azure_openai/base.py
@@ -4,7 +4,7 @@ import httpx
 from llama_index.core.bridge.pydantic import Field, PrivateAttr, root_validator
 from llama_index.core.callbacks.base import CallbackManager
 from llama_index.core.constants import DEFAULT_EMBED_BATCH_SIZE
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 from llama_index.embeddings.openai import (
     OpenAIEmbedding,
     OpenAIEmbeddingMode,
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-jinaai/llama_index/embeddings/jinaai/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-jinaai/llama_index/embeddings/jinaai/base.py
index f64cb48988e70c8ba699fb80076e3f12ab781bce..0a8ee4285eeee36ed61778b3413c0b27dbfccb47 100644
--- a/llama-index-integrations/embeddings/llama-index-embeddings-jinaai/llama_index/embeddings/jinaai/base.py
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-jinaai/llama_index/embeddings/jinaai/base.py
@@ -9,7 +9,7 @@ from llama_index.core.base.embeddings.base import (
 )
 from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.callbacks.base import CallbackManager
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 
 MAX_BATCH_SIZE = 2048
 
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-mistralai/llama_index/embeddings/mistralai/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-mistralai/llama_index/embeddings/mistralai/base.py
index 138f78bf58f1f1db11f1798b6620734d935b8c62..80c471c7bad816347d9e64ad1e86d1f95cc2384a 100644
--- a/llama-index-integrations/embeddings/llama-index-embeddings-mistralai/llama_index/embeddings/mistralai/base.py
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-mistralai/llama_index/embeddings/mistralai/base.py
@@ -8,7 +8,7 @@ from llama_index.core.base.embeddings.base import (
 )
 from llama_index.core.bridge.pydantic import PrivateAttr
 from llama_index.core.callbacks.base import CallbackManager
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 
 from mistralai.async_client import MistralAsyncClient
 from mistralai.client import MistralClient
diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-openai/llama_index/embeddings/openai/utils.py b/llama-index-integrations/embeddings/llama-index-embeddings-openai/llama_index/embeddings/openai/utils.py
index 72e909da803535b8fe2cce6f6d7da07e9012242f..2b9bf4254c9cd6aba9c1cf957542d05d506357a9 100644
--- a/llama-index-integrations/embeddings/llama-index-embeddings-openai/llama_index/embeddings/openai/utils.py
+++ b/llama-index-integrations/embeddings/llama-index-embeddings-openai/llama_index/embeddings/openai/utils.py
@@ -2,7 +2,7 @@ import logging
 import os
 from typing import Any, Callable, Optional, Tuple, Union
 
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 from tenacity import (
     before_sleep_log,
     retry,
diff --git a/llama-index-integrations/llms/llama-index-llms-ai21/llama_index/llms/ai21/base.py b/llama-index-integrations/llms/llama-index-llms-ai21/llama_index/llms/ai21/base.py
index 3805e5d53e6eaf16c4f19918a6b7619e22ced588..4e290bb962870ddef7cbaa89cec23de1f89b6a30 100644
--- a/llama-index-integrations/llms/llama-index-llms-ai21/llama_index/llms/ai21/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-ai21/llama_index/llms/ai21/base.py
@@ -12,7 +12,7 @@ from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
 from llama_index.core.llms.custom import CustomLLM
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_to_chat_decorator,
     get_from_param_or_env,
 )
diff --git a/llama-index-integrations/llms/llama-index-llms-ai21/tests/test_llms_ai21.py b/llama-index-integrations/llms/llama-index-llms-ai21/tests/test_llms_ai21.py
index fcc355e09e831d69f47baab4cd63419119c109d8..3f71faba559c59cd53dd8bbc1af66cf7cd376839 100644
--- a/llama-index-integrations/llms/llama-index-llms-ai21/tests/test_llms_ai21.py
+++ b/llama-index-integrations/llms/llama-index-llms-ai21/tests/test_llms_ai21.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.ai21 import AI21
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/base.py b/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/base.py
index 9623f5ca07ec7f75c2e26d37ed92f95a2825357a..7fc634cfa05a051dd2ece4bb0e62d1ec7ad1564d 100644
--- a/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/base.py
@@ -18,7 +18,7 @@ from llama_index.core.llms.callbacks import (
     llm_chat_callback,
     llm_completion_callback,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     achat_to_completion_decorator,
     astream_chat_to_completion_decorator,
     chat_to_completion_decorator,
diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/tests/test_llms_anthropic.py b/llama-index-integrations/llms/llama-index-llms-anthropic/tests/test_llms_anthropic.py
index 991430cb98c3afcbd172c396abbe63bac25b2f77..e5215cd3cf06f0fdc00fc8c7efbfed9c4cd12074 100644
--- a/llama-index-integrations/llms/llama-index-llms-anthropic/tests/test_llms_anthropic.py
+++ b/llama-index-integrations/llms/llama-index-llms-anthropic/tests/test_llms_anthropic.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.anthropic import Anthropic
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-anyscale/llama_index/llms/anyscale/base.py b/llama-index-integrations/llms/llama-index-llms-anyscale/llama_index/llms/anyscale/base.py
index 5d84ea3e0d6bd46cc978ca290f457c63540ce8c3..d7f9849495e460509820dd64cc8b6a4d1c590633 100644
--- a/llama-index-integrations/llms/llama-index-llms-anyscale/llama_index/llms/anyscale/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-anyscale/llama_index/llms/anyscale/base.py
@@ -3,7 +3,7 @@ from typing import Any, Callable, Dict, Optional, Sequence
 from llama_index.core.base.llms.types import ChatMessage, LLMMetadata
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.constants import DEFAULT_NUM_OUTPUTS, DEFAULT_TEMPERATURE
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 from llama_index.core.types import BaseOutputParser, PydanticProgramMode
 from llama_index.llms.anyscale.utils import (
     anyscale_modelname_to_contextsize,
diff --git a/llama-index-integrations/llms/llama-index-llms-anyscale/llama_index/llms/anyscale/utils.py b/llama-index-integrations/llms/llama-index-llms-anyscale/llama_index/llms/anyscale/utils.py
index ed5a235dc0d532ede212c2aa77ee14b289728ec2..c2fc2925b9488f6751c92bc9388d1ece5d1d9dca 100644
--- a/llama-index-integrations/llms/llama-index-llms-anyscale/llama_index/llms/anyscale/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-anyscale/llama_index/llms/anyscale/utils.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 
 from llama_index.core.base.llms.types import ChatMessage, MessageRole
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 
 DEFAULT_ANYSCALE_API_BASE = "https://api.endpoints.anyscale.com/v1"
 DEFAULT_ANYSCALE_API_VERSION = ""
diff --git a/llama-index-integrations/llms/llama-index-llms-anyscale/tests/test_llms_anyscale.py b/llama-index-integrations/llms/llama-index-llms-anyscale/tests/test_llms_anyscale.py
index 5c91579cd700e51efbdbcbeb0269b17eafcefe11..68129f5ab03131c6d951cbfa17b23ad8c85ff988 100644
--- a/llama-index-integrations/llms/llama-index-llms-anyscale/tests/test_llms_anyscale.py
+++ b/llama-index-integrations/llms/llama-index-llms-anyscale/tests/test_llms_anyscale.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.anyscale import Anyscale
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-azure-openai/llama_index/llms/azure_openai/base.py b/llama-index-integrations/llms/llama-index-llms-azure-openai/llama_index/llms/azure_openai/base.py
index b41006eee63a09f5f0ddfa44e70385c2bd7d381a..6297019c26127526721e711bed5bc18f9975fd18 100644
--- a/llama-index-integrations/llms/llama-index-llms-azure-openai/llama_index/llms/azure_openai/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-azure-openai/llama_index/llms/azure_openai/base.py
@@ -4,7 +4,7 @@ import httpx
 from llama_index.core.base.llms.types import ChatMessage
 from llama_index.core.bridge.pydantic import Field, PrivateAttr, root_validator
 from llama_index.core.callbacks import CallbackManager
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 from llama_index.core.types import BaseOutputParser, PydanticProgramMode
 from llama_index.llms.azure_openai.utils import (
     refresh_openai_azuread_token,
diff --git a/llama-index-integrations/llms/llama-index-llms-azure-openai/tests/test_llms_azure_openai.py b/llama-index-integrations/llms/llama-index-llms-azure-openai/tests/test_llms_azure_openai.py
index 6fd623d8d22a64f2e55d37d1ef647a3fb8189160..1ba4a3c69164b32287b94ffb050c3d0ebb0d6485 100644
--- a/llama-index-integrations/llms/llama-index-llms-azure-openai/tests/test_llms_azure_openai.py
+++ b/llama-index-integrations/llms/llama-index-llms-azure-openai/tests/test_llms_azure_openai.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.azure_openai import AzureOpenAI
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/base.py b/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/base.py
index d5ecfe0e51e1f2b840a66eea8c35c51fcb97e9ae..ae9c1b4b730211c617e83d88f3eaf558685522e8 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/base.py
@@ -20,7 +20,7 @@ from llama_index.core.llms.callbacks import (
     llm_chat_callback,
     llm_completion_callback,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_response_to_chat_response,
     stream_completion_response_to_chat_response,
 )
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/utils.py b/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/utils.py
index 1eb9b62ee67b18d0aeed21da23b6f0a13904c0e3..df1f3a484803172c6bba54e8c09727cf74ba06c7 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock/llama_index/llms/bedrock/utils.py
@@ -3,7 +3,7 @@ from abc import ABC, abstractmethod
 from typing import Any, Callable, Optional, Sequence
 
 from llama_index.core.base.llms.types import ChatMessage
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     prompt_to_messages,
 )
 from llama_index.llms.anthropic.utils import messages_to_anthropic_prompt
diff --git a/llama-index-integrations/llms/llama-index-llms-bedrock/tests/test_llms_bedrock.py b/llama-index-integrations/llms/llama-index-llms-bedrock/tests/test_llms_bedrock.py
index e65f8a18bae00ff5d47b7ddb1a8e1c635dd6047d..af0c4aa14253a8cbaa2312f8e042f1d7b1fe6d69 100644
--- a/llama-index-integrations/llms/llama-index-llms-bedrock/tests/test_llms_bedrock.py
+++ b/llama-index-integrations/llms/llama-index-llms-bedrock/tests/test_llms_bedrock.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.bedrock import Bedrock
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-cohere/tests/test_llms_cohere.py b/llama-index-integrations/llms/llama-index-llms-cohere/tests/test_llms_cohere.py
index 11a37d801a98b05f4be2d3065bf6d18ad42cae9d..f18b9558c399bd58132764752fa2182cb79cc8e2 100644
--- a/llama-index-integrations/llms/llama-index-llms-cohere/tests/test_llms_cohere.py
+++ b/llama-index-integrations/llms/llama-index-llms-cohere/tests/test_llms_cohere.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.cohere import Cohere
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-dashscope/tests/test_llms_dashscope.py b/llama-index-integrations/llms/llama-index-llms-dashscope/tests/test_llms_dashscope.py
index 885e6e25b511e40ded448198c1eeceba6da1003c..375e24fbfc2d6b0bbca0f905462661a1d1816b3f 100644
--- a/llama-index-integrations/llms/llama-index-llms-dashscope/tests/test_llms_dashscope.py
+++ b/llama-index-integrations/llms/llama-index-llms-dashscope/tests/test_llms_dashscope.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.dashscope import DashScope
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-everlyai/llama_index/llms/everlyai/base.py b/llama-index-integrations/llms/llama-index-llms-everlyai/llama_index/llms/everlyai/base.py
index c8a7afcab795f81c34995ae68b78239484e85438..fb0d81bdb13d9d67d87c6fb00f7eb51065e8c877 100644
--- a/llama-index-integrations/llms/llama-index-llms-everlyai/llama_index/llms/everlyai/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-everlyai/llama_index/llms/everlyai/base.py
@@ -3,7 +3,7 @@ from typing import Any, Callable, Dict, Optional, Sequence
 from llama_index.core.base.llms.types import ChatMessage, LLMMetadata
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.constants import DEFAULT_NUM_OUTPUTS, DEFAULT_TEMPERATURE
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 from llama_index.core.types import BaseOutputParser, PydanticProgramMode
 from llama_index.llms.everlyai.utils import everlyai_modelname_to_contextsize
 from llama_index.llms.openai import OpenAI
diff --git a/llama-index-integrations/llms/llama-index-llms-everlyai/tests/test_llms_everlyai.py b/llama-index-integrations/llms/llama-index-llms-everlyai/tests/test_llms_everlyai.py
index 4eddba63e1da98b62022dfe2b17a297f1f503db0..970d2b0326a7472aaf1c59a9e45584faecece6c1 100644
--- a/llama-index-integrations/llms/llama-index-llms-everlyai/tests/test_llms_everlyai.py
+++ b/llama-index-integrations/llms/llama-index-llms-everlyai/tests/test_llms_everlyai.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.everlyai import EverlyAI
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-gemini/llama_index/llms/gemini/utils.py b/llama-index-integrations/llms/llama-index-llms-gemini/llama_index/llms/gemini/utils.py
index dd65f4784cf118fa1cc8a28c01d2a7d3792b2342..8bc01a53c076bfc59db31b308e5c674e6adf5b58 100644
--- a/llama-index-integrations/llms/llama-index-llms-gemini/llama_index/llms/gemini/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-gemini/llama_index/llms/gemini/utils.py
@@ -4,7 +4,7 @@ import google.ai.generativelanguage as glm
 import google.generativeai as genai
 import PIL
 from llama_index.core.base.llms.types import MessageRole
-from llama_index.core.llms.base import (
+from llama_index.core.base.llms.types import (
     ChatMessage,
     ChatResponse,
     CompletionResponse,
diff --git a/llama-index-integrations/llms/llama-index-llms-gemini/tests/test_llms_gemini.py b/llama-index-integrations/llms/llama-index-llms-gemini/tests/test_llms_gemini.py
index b9b238d1e35426a7c768e269f53755da974d51a1..3e458877ad97ab177eefc3a755cc7cf44264a83b 100644
--- a/llama-index-integrations/llms/llama-index-llms-gemini/tests/test_llms_gemini.py
+++ b/llama-index-integrations/llms/llama-index-llms-gemini/tests/test_llms_gemini.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.gemini import Gemini
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-gradient/tests/test_llms_gradient.py b/llama-index-integrations/llms/llama-index-llms-gradient/tests/test_llms_gradient.py
index 0c8ab23af6f70db60c192ddb812643c47050fc3b..a04347ccbf689f1ff514a7294c7bfdd4fa0c2e93 100644
--- a/llama-index-integrations/llms/llama-index-llms-gradient/tests/test_llms_gradient.py
+++ b/llama-index-integrations/llms/llama-index-llms-gradient/tests/test_llms_gradient.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.gradient import GradientBaseModelLLM, GradientModelAdapterLLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-huggingface/llama_index/llms/huggingface/base.py b/llama-index-integrations/llms/llama-index-llms-huggingface/llama_index/llms/huggingface/base.py
index 0d7e6e9120872bc8a15a8e9eec7d17d1070ba284..0d1407e9239b7de5abbb3dea06b00e2f23cf96f3 100644
--- a/llama-index-integrations/llms/llama-index-llms-huggingface/llama_index/llms/huggingface/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-huggingface/llama_index/llms/huggingface/base.py
@@ -28,11 +28,11 @@ from llama_index.core.llms.callbacks import (
     llm_completion_callback,
 )
 from llama_index.core.llms.custom import CustomLLM
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_response_to_chat_response,
     stream_completion_response_to_chat_response,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     messages_to_prompt as generic_messages_to_prompt,
 )
 from llama_index.core.prompts.base import PromptTemplate
diff --git a/llama-index-integrations/llms/llama-index-llms-huggingface/tests/test_llms_huggingface.py b/llama-index-integrations/llms/llama-index-llms-huggingface/tests/test_llms_huggingface.py
index c065b856d2d75bd417d397ded67dd553c94f4670..c65dc8fd0f9c697246ef2e40592914f041435703 100644
--- a/llama-index-integrations/llms/llama-index-llms-huggingface/tests/test_llms_huggingface.py
+++ b/llama-index-integrations/llms/llama-index-llms-huggingface/tests/test_llms_huggingface.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.huggingface import HuggingFaceInferenceAPI, HuggingFaceLLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-konko/llama_index/llms/konko/base.py b/llama-index-integrations/llms/llama-index-llms-konko/llama_index/llms/konko/base.py
index 036709c642f86f8201908abc608613efaa1787fa..49fdd7f0fcac160a09a56b536bfb960429ee0008 100644
--- a/llama-index-integrations/llms/llama-index-llms-konko/llama_index/llms/konko/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-konko/llama_index/llms/konko/base.py
@@ -15,7 +15,7 @@ from llama_index.core.bridge.pydantic import Field
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.constants import DEFAULT_NUM_OUTPUTS, DEFAULT_TEMPERATURE
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     achat_to_completion_decorator,
     acompletion_to_chat_decorator,
     astream_chat_to_completion_decorator,
diff --git a/llama-index-integrations/llms/llama-index-llms-konko/llama_index/llms/konko/utils.py b/llama-index-integrations/llms/llama-index-llms-konko/llama_index/llms/konko/utils.py
index bba9fc5ec6bb9e7c8a62d3b4a79e9c48a921ad60..5d310e27e5a7743c47c20796c8ae190e7b93dd6e 100644
--- a/llama-index-integrations/llms/llama-index-llms-konko/llama_index/llms/konko/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-konko/llama_index/llms/konko/utils.py
@@ -5,7 +5,7 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type
 import openai
 from llama_index.core.base.llms.types import ChatMessage
 from llama_index.core.bridge.pydantic import BaseModel
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 from packaging.version import parse
 from tenacity import (
     before_sleep_log,
diff --git a/llama-index-integrations/llms/llama-index-llms-konko/tests/test_llms_konko.py b/llama-index-integrations/llms/llama-index-llms-konko/tests/test_llms_konko.py
index d4a7b1dcbaf9a16cdad348947233b168300396b9..39aeddc95c9590ba17989a268ec71abc0eba9fce 100644
--- a/llama-index-integrations/llms/llama-index-llms-konko/tests/test_llms_konko.py
+++ b/llama-index-integrations/llms/llama-index-llms-konko/tests/test_llms_konko.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.konko import Konko
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-langchain/llama_index/llms/langchain/base.py b/llama-index-integrations/llms/llama-index-llms-langchain/llama_index/llms/langchain/base.py
index 01fb8b2723c5f2bc789b916bb34c305061f9d325..4e463cf7f525b63f1650aecc873a103b90ab4f81 100644
--- a/llama-index-integrations/llms/llama-index-llms-langchain/llama_index/llms/langchain/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-langchain/llama_index/llms/langchain/base.py
@@ -14,7 +14,7 @@ from llama_index.core.base.llms.types import (
 from llama_index.core.bridge.pydantic import PrivateAttr
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_response_to_chat_response,
     stream_completion_response_to_chat_response,
 )
diff --git a/llama-index-integrations/llms/llama-index-llms-langchain/tests/test_llms_langchain.py b/llama-index-integrations/llms/llama-index-llms-langchain/tests/test_llms_langchain.py
index 9a101969c0aa406e526d09c8a87aae5c9c01d1d1..4d01c5f1ec907ff46685c76f79e88a7f6c41b3ec 100644
--- a/llama-index-integrations/llms/llama-index-llms-langchain/tests/test_llms_langchain.py
+++ b/llama-index-integrations/llms/llama-index-llms-langchain/tests/test_llms_langchain.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.langchain import LangChainLLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-litellm/llama_index/llms/litellm/base.py b/llama-index-integrations/llms/llama-index-llms-litellm/llama_index/llms/litellm/base.py
index 17c2a415de04175f39e129bd51a0c26aceabc192..52e6fe74a4fc72fe660fb7a8a2599ead2d0a5c6f 100644
--- a/llama-index-integrations/llms/llama-index-llms-litellm/llama_index/llms/litellm/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-litellm/llama_index/llms/litellm/base.py
@@ -14,7 +14,7 @@ from llama_index.core.bridge.pydantic import Field
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.constants import DEFAULT_TEMPERATURE
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     achat_to_completion_decorator,
     acompletion_to_chat_decorator,
     astream_chat_to_completion_decorator,
diff --git a/llama-index-integrations/llms/llama-index-llms-litellm/tests/test_llms_litellm.py b/llama-index-integrations/llms/llama-index-llms-litellm/tests/test_llms_litellm.py
index 8c295f53bbf5a649b517cb8e9b1bc28ef7328254..fd55aa0a333ab6cd438ecdca6cb148ff918c43dd 100644
--- a/llama-index-integrations/llms/llama-index-llms-litellm/tests/test_llms_litellm.py
+++ b/llama-index-integrations/llms/llama-index-llms-litellm/tests/test_llms_litellm.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.litellm import LiteLLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-llama-api/llama_index/llms/llama_api/base.py b/llama-index-integrations/llms/llama-index-llms-llama-api/llama_index/llms/llama_api/base.py
index 657bac868d450ad50d3c79226fcebec6ab00c321..84f6f79f405fcb85ecead2ecbb6261bb2b541629 100644
--- a/llama-index-integrations/llms/llama-index-llms-llama-api/llama_index/llms/llama_api/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-llama-api/llama_index/llms/llama_api/base.py
@@ -13,7 +13,7 @@ from llama_index.core.callbacks import CallbackManager
 from llama_index.core.constants import DEFAULT_NUM_OUTPUTS
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
 from llama_index.core.llms.custom import CustomLLM
-from llama_index.core.llms.generic_utils import chat_to_completion_decorator
+from llama_index.core.base.llms.generic_utils import chat_to_completion_decorator
 from llama_index.core.types import BaseOutputParser, PydanticProgramMode
 from llama_index.llms.openai.utils import (
     from_openai_message_dict,
diff --git a/llama-index-integrations/llms/llama-index-llms-llama-api/tests/test_llms_llama_api.py b/llama-index-integrations/llms/llama-index-llms-llama-api/tests/test_llms_llama_api.py
index b9593bac9e3ccfdd224468793008d265e33f4b66..feecc7c06690b58d11606c163312ab6010a641c1 100644
--- a/llama-index-integrations/llms/llama-index-llms-llama-api/tests/test_llms_llama_api.py
+++ b/llama-index-integrations/llms/llama-index-llms-llama-api/tests/test_llms_llama_api.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.llama_api import LlamaAPI
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-llama-cpp/llama_index/llms/llama_cpp/base.py b/llama-index-integrations/llms/llama-index-llms-llama-cpp/llama_index/llms/llama_cpp/base.py
index f8c1f65dbbe027850021797d071da435d1fc83c2..809b19276d940a4ac1aac739013dc80d3d785389 100644
--- a/llama-index-integrations/llms/llama-index-llms-llama-cpp/llama_index/llms/llama_cpp/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-llama-cpp/llama_index/llms/llama_cpp/base.py
@@ -19,7 +19,7 @@ from llama_index.core.constants import (
 )
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
 from llama_index.core.llms.custom import CustomLLM
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_response_to_chat_response,
     stream_completion_response_to_chat_response,
 )
diff --git a/llama-index-integrations/llms/llama-index-llms-llama-cpp/tests/test_llms_llama_cpp.py b/llama-index-integrations/llms/llama-index-llms-llama-cpp/tests/test_llms_llama_cpp.py
index deb9d0d48d4d49a9203654d9869589fd834df526..d72ca524294d60a38a81c34523d4f6600c53c7a5 100644
--- a/llama-index-integrations/llms/llama-index-llms-llama-cpp/tests/test_llms_llama_cpp.py
+++ b/llama-index-integrations/llms/llama-index-llms-llama-cpp/tests/test_llms_llama_cpp.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.llama_cpp import LlamaCPP
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-localai/tests/test_llms_localai.py b/llama-index-integrations/llms/llama-index-llms-localai/tests/test_llms_localai.py
index a1ef1894867ff43a98350ef00a9e727312200f99..2ebb0203b567e3fb9429654c056af129b9f40d36 100644
--- a/llama-index-integrations/llms/llama-index-llms-localai/tests/test_llms_localai.py
+++ b/llama-index-integrations/llms/llama-index-llms-localai/tests/test_llms_localai.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.localai import LocalAI
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-mistralai/llama_index/llms/mistralai/base.py b/llama-index-integrations/llms/llama-index-llms-mistralai/llama_index/llms/mistralai/base.py
index 9eb7640e241f7d24cb61888f35d643dede458e4e..05977726e8d1af562d72ca177253092626020542 100644
--- a/llama-index-integrations/llms/llama-index-llms-mistralai/llama_index/llms/mistralai/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-mistralai/llama_index/llms/mistralai/base.py
@@ -19,7 +19,7 @@ from llama_index.core.llms.callbacks import (
     llm_chat_callback,
     llm_completion_callback,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     achat_to_completion_decorator,
     astream_chat_to_completion_decorator,
     chat_to_completion_decorator,
diff --git a/llama-index-integrations/llms/llama-index-llms-mistralai/tests/test_llms_mistral.py b/llama-index-integrations/llms/llama-index-llms-mistralai/tests/test_llms_mistral.py
index 5df2423c197b076326abcf2b7d94a6108aafa5b2..c89091e66b7dfe536ae5125ac51d03a0ba19dbf5 100644
--- a/llama-index-integrations/llms/llama-index-llms-mistralai/tests/test_llms_mistral.py
+++ b/llama-index-integrations/llms/llama-index-llms-mistralai/tests/test_llms_mistral.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.mistralai import MistralAI
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-monsterapi/tests/test_llms_monsterapi.py b/llama-index-integrations/llms/llama-index-llms-monsterapi/tests/test_llms_monsterapi.py
index f5fb195247fcc1f180aaaf2f24a212b18f7a3dcb..f8d4434d4b28282c60e4b8238fb38470f3369960 100644
--- a/llama-index-integrations/llms/llama-index-llms-monsterapi/tests/test_llms_monsterapi.py
+++ b/llama-index-integrations/llms/llama-index-llms-monsterapi/tests/test_llms_monsterapi.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.monsterapi import MonsterLLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-neutrino/llama_index/llms/neutrino/base.py b/llama-index-integrations/llms/llama-index-llms-neutrino/llama_index/llms/neutrino/base.py
index 8b0abea1aa0e33fa780201ba4a11a82a356cc12e..b1470353b648fc2f7bc92ada30a59b069574bc86 100644
--- a/llama-index-integrations/llms/llama-index-llms-neutrino/llama_index/llms/neutrino/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-neutrino/llama_index/llms/neutrino/base.py
@@ -6,7 +6,7 @@ from llama_index.core.constants import (
     DEFAULT_NUM_OUTPUTS,
     DEFAULT_TEMPERATURE,
 )
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 from llama_index.llms.openai_like import OpenAILike
 
 DEFAULT_API_BASE = "https://router.neutrinoapp.com/api/llm-router"
diff --git a/llama-index-integrations/llms/llama-index-llms-neutrino/tests/test_llms_neutrino.py b/llama-index-integrations/llms/llama-index-llms-neutrino/tests/test_llms_neutrino.py
index 5b1583f0be2b417f8f03f0e031da23c3b8fbfc99..cff486909cc16300247077c6bc6386658d1cdd17 100644
--- a/llama-index-integrations/llms/llama-index-llms-neutrino/tests/test_llms_neutrino.py
+++ b/llama-index-integrations/llms/llama-index-llms-neutrino/tests/test_llms_neutrino.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.neutrino import Neutrino
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia-tensorrt/llama_index/llms/nvidia_tensorrt/base.py b/llama-index-integrations/llms/llama-index-llms-nvidia-tensorrt/llama_index/llms/nvidia_tensorrt/base.py
index dc707bae3119effb32fb09cfa95e9028d96d978a..7114c4c04e229ac996620071feff5f53ccd320a8 100644
--- a/llama-index-integrations/llms/llama-index-llms-nvidia-tensorrt/llama_index/llms/nvidia_tensorrt/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-nvidia-tensorrt/llama_index/llms/nvidia_tensorrt/base.py
@@ -9,7 +9,7 @@ import torch
 from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
-from llama_index.core.llms.base import (
+from llama_index.core.base.llms.types import (
     ChatMessage,
     ChatResponse,
     CompletionResponse,
@@ -20,7 +20,9 @@ from llama_index.core.llms.callbacks import (
     llm_completion_callback,
 )
 from llama_index.core.llms.custom import CustomLLM
-from llama_index.core.llms.generic_utils import completion_response_to_chat_response
+from llama_index.core.base.llms.generic_utils import (
+    completion_response_to_chat_response,
+)
 from llama_index.llms.nvidia_tensorrt.utils import (
     generate_completion_dict,
     get_output,
diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia-tensorrt/tests/test_llms_nvidia_tensorrt.py b/llama-index-integrations/llms/llama-index-llms-nvidia-tensorrt/tests/test_llms_nvidia_tensorrt.py
index 458b505f7606f4143779dc4b3e4b47ae0bb5e03e..224e9a38f5b6ca79498f672378d8d33c46be6f45 100644
--- a/llama-index-integrations/llms/llama-index-llms-nvidia-tensorrt/tests/test_llms_nvidia_tensorrt.py
+++ b/llama-index-integrations/llms/llama-index-llms-nvidia-tensorrt/tests/test_llms_nvidia_tensorrt.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.nvidia_tensorrt import LocalTensorRTLLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia-triton/llama_index/llms/nvidia_triton/base.py b/llama-index-integrations/llms/llama-index-llms-nvidia-triton/llama_index/llms/nvidia_triton/base.py
index c9af03ae2fcaa78875c2f5872cd2e6dae1f58373..430a4eab21dae346e49ad523a7e39635e3f0c4d6 100644
--- a/llama-index-integrations/llms/llama-index-llms-nvidia-triton/llama_index/llms/nvidia_triton/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-nvidia-triton/llama_index/llms/nvidia_triton/base.py
@@ -8,7 +8,7 @@ from typing import (
 
 from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.callbacks import CallbackManager
-from llama_index.core.llms.base import (
+from llama_index.core.base.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseAsyncGen,
@@ -19,7 +19,7 @@ from llama_index.core.llms.base import (
     LLMMetadata,
 )
 from llama_index.core.llms.callbacks import llm_chat_callback
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_to_chat_decorator,
 )
 from llama_index.core.llms.llm import LLM
diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia-triton/tests/test_llms_nvidia_triton.py b/llama-index-integrations/llms/llama-index-llms-nvidia-triton/tests/test_llms_nvidia_triton.py
index 26239092cb93836315fd8a5c99dba86faf49a4d4..c0058b15a1ce159fc558888e4a5f0bafcdc184aa 100644
--- a/llama-index-integrations/llms/llama-index-llms-nvidia-triton/tests/test_llms_nvidia_triton.py
+++ b/llama-index-integrations/llms/llama-index-llms-nvidia-triton/tests/test_llms_nvidia_triton.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.nvidia_triton import NvidiaTriton
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-ollama/tests/test_llms_ollama.py b/llama-index-integrations/llms/llama-index-llms-ollama/tests/test_llms_ollama.py
index 2a73532a45d13dd2420cf9cda5c3153e28bbed82..695a83a08ada1039c5b0cba7555a6ab494a538a3 100644
--- a/llama-index-integrations/llms/llama-index-llms-ollama/tests/test_llms_ollama.py
+++ b/llama-index-integrations/llms/llama-index-llms-ollama/tests/test_llms_ollama.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.ollama import Ollama
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-openai-like/llama_index/llms/openai_like/base.py b/llama-index-integrations/llms/llama-index-llms-openai-like/llama_index/llms/openai_like/base.py
index 424e57babc4d7e631e00c1a1d1c074838595abf9..f5184a8753e01781634a8e042758ff82086181b8 100644
--- a/llama-index-integrations/llms/llama-index-llms-openai-like/llama_index/llms/openai_like/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-openai-like/llama_index/llms/openai_like/base.py
@@ -12,7 +12,7 @@ from llama_index.core.base.llms.types import (
 )
 from llama_index.core.bridge.pydantic import Field
 from llama_index.core.constants import DEFAULT_CONTEXT_WINDOW
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     async_stream_completion_response_to_chat_response,
     completion_response_to_chat_response,
     stream_completion_response_to_chat_response,
diff --git a/llama-index-integrations/llms/llama-index-llms-openai-like/tests/test_llms_openai_like.py b/llama-index-integrations/llms/llama-index-llms-openai-like/tests/test_llms_openai_like.py
index 50e86b53d290a275303f65ee4fabb7fb964c114c..d87a63b738e9fef18d81f85fe1ee3d3885a91e22 100644
--- a/llama-index-integrations/llms/llama-index-llms-openai-like/tests/test_llms_openai_like.py
+++ b/llama-index-integrations/llms/llama-index-llms-openai-like/tests/test_llms_openai_like.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.openai_like import OpenAILike
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/base.py b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/base.py
index 1a8da01a3312ec66436efbceb578f0d767e4cdf2..379a0f65ac6c8292dc06bfe5289ba40a9fe6eff3 100644
--- a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/base.py
@@ -33,7 +33,7 @@ from llama_index.core.llms.callbacks import (
     llm_chat_callback,
     llm_completion_callback,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     achat_to_completion_decorator,
     acompletion_to_chat_decorator,
     astream_chat_to_completion_decorator,
diff --git a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/utils.py b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/utils.py
index 23fb6c14edea244ab3a15aa756cad7ca21719bb6..d3cd4cdc0afb98efb8297c49e9ad499959d75b88 100644
--- a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/utils.py
+++ b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/utils.py
@@ -5,7 +5,7 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, U
 from deprecated import deprecated
 from llama_index.core.base.llms.types import ChatMessage
 from llama_index.core.bridge.pydantic import BaseModel
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 from tenacity import (
     before_sleep_log,
     retry,
diff --git a/llama-index-integrations/llms/llama-index-llms-openai/tests/test_llms_openai.py b/llama-index-integrations/llms/llama-index-llms-openai/tests/test_llms_openai.py
index 2c8eb24ffdf08c43e07d6e0ff9d255a95fd5f13a..096898b908e59cd87c77b2fff9648e54b6c1c941 100644
--- a/llama-index-integrations/llms/llama-index-llms-openai/tests/test_llms_openai.py
+++ b/llama-index-integrations/llms/llama-index-llms-openai/tests/test_llms_openai.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.openai import OpenAI
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-openllm/llama_index/llms/openllm/base.py b/llama-index-integrations/llms/llama-index-llms-openllm/llama_index/llms/openllm/base.py
index a1b2dc49ee85e0fdd695ea2eb6e8133809d53919..b0e50390372bd5baf1fb1a23d780069e66bf76b8 100644
--- a/llama-index-integrations/llms/llama-index-llms-openllm/llama_index/llms/openllm/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-openllm/llama_index/llms/openllm/base.py
@@ -27,10 +27,10 @@ from llama_index.core.llms.callbacks import (
     llm_chat_callback,
     llm_completion_callback,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_response_to_chat_response,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     messages_to_prompt as generic_messages_to_prompt,
 )
 from llama_index.core.llms.llm import LLM
diff --git a/llama-index-integrations/llms/llama-index-llms-openrouter/llama_index/llms/openrouter/base.py b/llama-index-integrations/llms/llama-index-llms-openrouter/llama_index/llms/openrouter/base.py
index 3743f2691d2dd2f658560d5772da26bf38dc92b1..0b1e92829477780060c261a03e6510fa7892778f 100644
--- a/llama-index-integrations/llms/llama-index-llms-openrouter/llama_index/llms/openrouter/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-openrouter/llama_index/llms/openrouter/base.py
@@ -7,7 +7,7 @@ from llama_index.core.constants import (
     DEFAULT_NUM_OUTPUTS,
     DEFAULT_TEMPERATURE,
 )
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 from llama_index.llms.openai_like import OpenAILike
 
 DEFAULT_API_BASE = "https://openrouter.ai/api/v1"
diff --git a/llama-index-integrations/llms/llama-index-llms-openrouter/tests/test_llms_openrouter.py b/llama-index-integrations/llms/llama-index-llms-openrouter/tests/test_llms_openrouter.py
index 29cf183dd7fb5f55136bca7f39e2765a5534a720..ad037c1749e269452c2ff658b53ce4fe1a7e0be8 100644
--- a/llama-index-integrations/llms/llama-index-llms-openrouter/tests/test_llms_openrouter.py
+++ b/llama-index-integrations/llms/llama-index-llms-openrouter/tests/test_llms_openrouter.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.openrouter import OpenRouter
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-palm/tests/test_llms_palm.py b/llama-index-integrations/llms/llama-index-llms-palm/tests/test_llms_palm.py
index 019689d3c538ee4b3cd91c81151bd09b495af7a3..686da75b3f858a56c3045f245e57e12412fff193 100644
--- a/llama-index-integrations/llms/llama-index-llms-palm/tests/test_llms_palm.py
+++ b/llama-index-integrations/llms/llama-index-llms-palm/tests/test_llms_palm.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.palm import PaLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-perplexity/tests/test_llms_perplexity.py b/llama-index-integrations/llms/llama-index-llms-perplexity/tests/test_llms_perplexity.py
index 9c3c5e5161c9168931ee3ad3bf8897bf3e3b4bb0..85981c61fd2088d2b0275aa4f35866499dbf1597 100644
--- a/llama-index-integrations/llms/llama-index-llms-perplexity/tests/test_llms_perplexity.py
+++ b/llama-index-integrations/llms/llama-index-llms-perplexity/tests/test_llms_perplexity.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.perplexity import Perplexity
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-portkey/llama_index/llms/portkey/base.py b/llama-index-integrations/llms/llama-index-llms-portkey/llama_index/llms/portkey/base.py
index 0bafff1e2b8a388b8ff2c6630480c682648cbc8a..30cb1e744c33a955cdd7182a1b481218b4c58333 100644
--- a/llama-index-integrations/llms/llama-index-llms-portkey/llama_index/llms/portkey/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-portkey/llama_index/llms/portkey/base.py
@@ -1,6 +1,7 @@
 """
 Portkey integration with Llama_index for enhanced monitoring.
 """
+
 from typing import TYPE_CHECKING, Any, Callable, List, Optional, Sequence, Union, cast
 
 from llama_index.core.base.llms.types import (
@@ -14,7 +15,7 @@ from llama_index.core.base.llms.types import (
 from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
 from llama_index.core.llms.custom import CustomLLM
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     chat_to_completion_decorator,
     completion_to_chat_decorator,
     stream_chat_to_completion_decorator,
diff --git a/llama-index-integrations/llms/llama-index-llms-portkey/tests/test_llms_portkey.py b/llama-index-integrations/llms/llama-index-llms-portkey/tests/test_llms_portkey.py
index 305c8e7947af617d399c0a2e5ef3ac9df4f535a9..035cea31b746d1c89de28ef4c408b68b46dc857b 100644
--- a/llama-index-integrations/llms/llama-index-llms-portkey/tests/test_llms_portkey.py
+++ b/llama-index-integrations/llms/llama-index-llms-portkey/tests/test_llms_portkey.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.portkey import Portkey
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-predibase/tests/test_llms_predibase.py b/llama-index-integrations/llms/llama-index-llms-predibase/tests/test_llms_predibase.py
index 6adbd916c3e0b7834b81d22a6bf4af7d02bdf0da..234c824829c929b79dccff0d211cdf668d49752d 100644
--- a/llama-index-integrations/llms/llama-index-llms-predibase/tests/test_llms_predibase.py
+++ b/llama-index-integrations/llms/llama-index-llms-predibase/tests/test_llms_predibase.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.predibase import PredibaseLLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-replicate/llama_index/llms/replicate/base.py b/llama-index-integrations/llms/llama-index-llms-replicate/llama_index/llms/replicate/base.py
index f56e819087adf3e7e9707d8a63bdba1d293155a1..297715ba5681396ffdf053b2a100d00f0b097f99 100644
--- a/llama-index-integrations/llms/llama-index-llms-replicate/llama_index/llms/replicate/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-replicate/llama_index/llms/replicate/base.py
@@ -12,7 +12,7 @@ from llama_index.core.bridge.pydantic import Field
 from llama_index.core.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
 from llama_index.core.llms.custom import CustomLLM
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_response_to_chat_response,
     stream_completion_response_to_chat_response,
 )
diff --git a/llama-index-integrations/llms/llama-index-llms-replicate/tests/test_llms_replicate.py b/llama-index-integrations/llms/llama-index-llms-replicate/tests/test_llms_replicate.py
index f1abf03ec2d22ef129f1479014f5c09a76337ec6..3f1a5cdef6cb77d2f45aeb48c206d476595078ad 100644
--- a/llama-index-integrations/llms/llama-index-llms-replicate/tests/test_llms_replicate.py
+++ b/llama-index-integrations/llms/llama-index-llms-replicate/tests/test_llms_replicate.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.replicate import Replicate
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-rungpt/tests/test_llms_rungpt.py b/llama-index-integrations/llms/llama-index-llms-rungpt/tests/test_llms_rungpt.py
index d3c5febaba18aa649a86cb04890c4e770cb1cd02..a5e9ae5d56977360da83261c50117437524a8ff6 100644
--- a/llama-index-integrations/llms/llama-index-llms-rungpt/tests/test_llms_rungpt.py
+++ b/llama-index-integrations/llms/llama-index-llms-rungpt/tests/test_llms_rungpt.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.rungpt import RunGptLLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-sagemaker-endpoint/llama_index/llms/sagemaker_endpoint/base.py b/llama-index-integrations/llms/llama-index-llms-sagemaker-endpoint/llama_index/llms/sagemaker_endpoint/base.py
index 1f0d5bc1baa98930efe04dbe95a880ba9aceb4e3..e4234175067d8aec0dd785dd8c791c2dbff9be06 100644
--- a/llama-index-integrations/llms/llama-index-llms-sagemaker-endpoint/llama_index/llms/sagemaker_endpoint/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-sagemaker-endpoint/llama_index/llms/sagemaker_endpoint/base.py
@@ -16,7 +16,7 @@ from llama_index.core.llms.callbacks import (
     llm_chat_callback,
     llm_completion_callback,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_response_to_chat_response,
     stream_completion_response_to_chat_response,
 )
diff --git a/llama-index-integrations/llms/llama-index-llms-sagemaker-endpoint/tests/test_llms_sagemaker_endpoint.py b/llama-index-integrations/llms/llama-index-llms-sagemaker-endpoint/tests/test_llms_sagemaker_endpoint.py
index e3406679c0c66ee6541022c1ad956843e04962fd..867e28213de7c652876ce9c096e8ce2aa5ea3426 100644
--- a/llama-index-integrations/llms/llama-index-llms-sagemaker-endpoint/tests/test_llms_sagemaker_endpoint.py
+++ b/llama-index-integrations/llms/llama-index-llms-sagemaker-endpoint/tests/test_llms_sagemaker_endpoint.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.sagemaker_endpoint import SageMakerLLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-together/tests/test_llms_together.py b/llama-index-integrations/llms/llama-index-llms-together/tests/test_llms_together.py
index eee087d9057c68adc36052e575240cbf4c6494b9..a93b4d893936784596650cb305c6fbd5901d31b2 100644
--- a/llama-index-integrations/llms/llama-index-llms-together/tests/test_llms_together.py
+++ b/llama-index-integrations/llms/llama-index-llms-together/tests/test_llms_together.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.together import TogetherLLM
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-vertex/tests/test_llms_vertex.py b/llama-index-integrations/llms/llama-index-llms-vertex/tests/test_llms_vertex.py
index b9d9e2fc995d0d09387512ad326f0abff847484c..5bb1e9987c7c9e1c33a40b44017f9515e6c4cc8c 100644
--- a/llama-index-integrations/llms/llama-index-llms-vertex/tests/test_llms_vertex.py
+++ b/llama-index-integrations/llms/llama-index-llms-vertex/tests/test_llms_vertex.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.vertex import Vertex
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py b/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py
index 93c795871b23fc28a86ab134e401c762b17d65a0..c1c70987774d13a3a11ea174de9e96530012631b 100644
--- a/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py
@@ -14,11 +14,11 @@ from llama_index.core.base.llms.types import (
 from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_response_to_chat_response,
     stream_completion_response_to_chat_response,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     messages_to_prompt as generic_messages_to_prompt,
 )
 from llama_index.core.llms.llm import LLM
diff --git a/llama-index-integrations/llms/llama-index-llms-vllm/tests/test_llms_vllm.py b/llama-index-integrations/llms/llama-index-llms-vllm/tests/test_llms_vllm.py
index 04dac87b8c987ab51e549b2618cf06dd6ef33222..1be18dd2d48710c7c52fff0b802e9b918c58e3a5 100644
--- a/llama-index-integrations/llms/llama-index-llms-vllm/tests/test_llms_vllm.py
+++ b/llama-index-integrations/llms/llama-index-llms-vllm/tests/test_llms_vllm.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.vllm import Vllm
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-watsonx/llama_index/llms/watsonx/base.py b/llama-index-integrations/llms/llama-index-llms-watsonx/llama_index/llms/watsonx/base.py
index e62df096a02f2ece3b333501fbe4baca58d52631..3573b59cd52abfdc3d1d62e3dc25def05452557e 100644
--- a/llama-index-integrations/llms/llama-index-llms-watsonx/llama_index/llms/watsonx/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-watsonx/llama_index/llms/watsonx/base.py
@@ -14,7 +14,7 @@ from llama_index.core.base.llms.types import (
 from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     completion_to_chat_decorator,
     stream_completion_to_chat_decorator,
 )
diff --git a/llama-index-integrations/llms/llama-index-llms-watsonx/tests/test_llms_watsonx.py b/llama-index-integrations/llms/llama-index-llms-watsonx/tests/test_llms_watsonx.py
index 6342caea8e0c5b2daaca487167a385b0b3d7e843..df2f3c652d1e2468476d8f0786ce9f764c047cf6 100644
--- a/llama-index-integrations/llms/llama-index-llms-watsonx/tests/test_llms_watsonx.py
+++ b/llama-index-integrations/llms/llama-index-llms-watsonx/tests/test_llms_watsonx.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.watsonx import WatsonX
 
 
diff --git a/llama-index-integrations/llms/llama-index-llms-xinference/tests/test_llms_xinference.py b/llama-index-integrations/llms/llama-index-llms-xinference/tests/test_llms_xinference.py
index 3b73ff1d3f00f92f3ea704fe11b1ae0678cc9c07..a9877230e99021bf9bde1c949a8d7071ae9dbf9d 100644
--- a/llama-index-integrations/llms/llama-index-llms-xinference/tests/test_llms_xinference.py
+++ b/llama-index-integrations/llms/llama-index-llms-xinference/tests/test_llms_xinference.py
@@ -1,4 +1,4 @@
-from llama_index.core.llms.base import BaseLLM
+from llama_index.core.base.llms.base import BaseLLM
 from llama_index.llms.xinference import Xinference
 
 
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-azure-openai/llama_index/multi_modal_llms/azure_openai/base.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-azure-openai/llama_index/multi_modal_llms/azure_openai/base.py
index 270585d2b1552acfc9aa0be9119b822ae38b0826..b5a9a892006663c7b1e29458ee0ff54ce20dca2c 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-azure-openai/llama_index/multi_modal_llms/azure_openai/base.py
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-azure-openai/llama_index/multi_modal_llms/azure_openai/base.py
@@ -8,7 +8,7 @@ from llama_index.core.constants import (
     DEFAULT_NUM_OUTPUTS,
     DEFAULT_TEMPERATURE,
 )
-from llama_index.core.llms.generic_utils import get_from_param_or_env
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
 from llama_index.core.multi_modal_llms import MultiModalLLMMetadata
 from llama_index.llms.azure_openai.utils import (
     refresh_openai_azuread_token,
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-openai/llama_index/multi_modal_llms/openai/base.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-openai/llama_index/multi_modal_llms/openai/base.py
index 5bde0f36198d4b50c1508d936beef9da5dacf254..fed2ffb1a8c03089ed185c8364c6bee99979881a 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-openai/llama_index/multi_modal_llms/openai/base.py
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-openai/llama_index/multi_modal_llms/openai/base.py
@@ -18,7 +18,7 @@ from llama_index.core.constants import (
     DEFAULT_NUM_OUTPUTS,
     DEFAULT_TEMPERATURE,
 )
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     messages_to_prompt as generic_messages_to_prompt,
 )
 from llama_index.core.multi_modal_llms import (
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-replicate-multi-modal/llama_index/multi_modal_llms/replicate_multi_modal/base.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-replicate-multi-modal/llama_index/multi_modal_llms/replicate_multi_modal/base.py
index 340b4e36f6b03920fb868a213730c3c8112a425b..734c9a3e9fc9e3034ff0b6cb4d7c310247f8896c 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-replicate-multi-modal/llama_index/multi_modal_llms/replicate_multi_modal/base.py
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-replicate-multi-modal/llama_index/multi_modal_llms/replicate_multi_modal/base.py
@@ -13,7 +13,7 @@ from llama_index.core.base.llms.types import (
 from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     messages_to_prompt as generic_messages_to_prompt,
 )
 from llama_index.core.multi_modal_llms import (
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-replicate/llama_index/multi_modal_llms/replicate/base.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-replicate/llama_index/multi_modal_llms/replicate/base.py
index 340b4e36f6b03920fb868a213730c3c8112a425b..734c9a3e9fc9e3034ff0b6cb4d7c310247f8896c 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-replicate/llama_index/multi_modal_llms/replicate/base.py
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-replicate/llama_index/multi_modal_llms/replicate/base.py
@@ -13,7 +13,7 @@ from llama_index.core.base.llms.types import (
 from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
-from llama_index.core.llms.generic_utils import (
+from llama_index.core.base.llms.generic_utils import (
     messages_to_prompt as generic_messages_to_prompt,
 )
 from llama_index.core.multi_modal_llms import (
diff --git a/poetry.lock b/poetry.lock
index a8e64e28c115e6858d5131a027d7f02d006f0421..66f8e3491c04e386d6fe43e84d7c84712c666a3f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1502,13 +1502,13 @@ llama-index-llms-openai = ">=0.1.1,<0.2.0"
 
 [[package]]
 name = "llama-index-core"
-version = "0.10.7"
+version = "0.10.8.post1"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "llama_index_core-0.10.7-py3-none-any.whl", hash = "sha256:0dd1ec2878451d75d4644757cc24c8533d83a6c62ffa2ac0a4f1745a31cbb1ad"},
-    {file = "llama_index_core-0.10.7.tar.gz", hash = "sha256:d02f92128ce285110e953ed116a3db1ba02eec508d11ccdca14a851ece5eaead"},
+    {file = "llama_index_core-0.10.8.post1-py3-none-any.whl", hash = "sha256:51b736b22818cb0b117a1486be54cf05f9496e79feb5de7262be5ec5480d85ec"},
+    {file = "llama_index_core-0.10.8.post1.tar.gz", hash = "sha256:449fdd206901ca1e403b0c7fd021e52f9f0aa6fd121793f3062c442429a11ffd"},
 ]
 
 [package.dependencies]
@@ -4264,4 +4264,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.12"
-content-hash = "712557a4f68621c668730fdc56f1a225def91772af558b7279a081426c0de646"
+content-hash = "f1f95484f87927a6eed40e388280e01e1fb44cd987b310542a60e21420522e6e"
diff --git a/pyproject.toml b/pyproject.toml
index 889e1a6d927ff4c5e30f6d969af5a6f13bdc434d..103a8a41de38b966ae5885b5b87be7d38329d8be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,7 +57,7 @@ llama-index-agent-openai = "^0.1.0"
 llama-index-readers-file = "^0.1.0"
 llama-index-readers-llama-parse = "^0.1.0"
 llama-index-indices-managed-llama-cloud = "^0.1.0"
-llama-index-core = "^0.10.0"
+llama-index-core = "^0.10.8.post1"
 llama-index-multi-modal-llms-openai = "^0.1.0"
 
 [tool.poetry.group.dev.dependencies]