diff --git a/docs/05-local-execution.ipynb b/docs/05-local-execution.ipynb
index f02ecf15478d49a3668338d547f9418171f6664b..cdb6c3feb3645e8640a91d76855d5f6523b0b549 100644
--- a/docs/05-local-execution.ipynb
+++ b/docs/05-local-execution.ipynb
@@ -342,7 +342,7 @@
     "from semantic_router import RouteLayer\n",
     "\n",
     "from llama_cpp import Llama\n",
-    "from semantic_router.llms import LlamaCppLLM\n",
+    "from semantic_router.llms.llamacpp import LlamaCppLLM\n",
     "\n",
     "enable_gpu = True  # offload LLM layers to the GPU (must fit in memory)\n",
     "\n",
diff --git a/semantic_router/llms/__init__.py b/semantic_router/llms/__init__.py
index 02b3fd5b2422e718fcdf9fd4b34e4ace7fb3d957..e5aedc85fd30cc0b576fc2170c1b7ca694bdf200 100644
--- a/semantic_router/llms/__init__.py
+++ b/semantic_router/llms/__init__.py
@@ -1,7 +1,6 @@
 from semantic_router.llms.base import BaseLLM
 from semantic_router.llms.cohere import CohereLLM
-from semantic_router.llms.llamacpp import LlamaCppLLM
 from semantic_router.llms.openai import OpenAILLM
 from semantic_router.llms.openrouter import OpenRouterLLM
 
-__all__ = ["BaseLLM", "OpenAILLM", "OpenRouterLLM", "CohereLLM", "LlamaCppLLM"]
+__all__ = ["BaseLLM", "OpenAILLM", "OpenRouterLLM", "CohereLLM"]
diff --git a/tests/unit/llms/test_llm_llamacpp.py b/tests/unit/llms/test_llm_llamacpp.py
index 5793c2d2f1b008ccc2a5fe3b183e4698f20dee9c..f0a5253f909ecce92769b50ccf7b6578720c3f63 100644
--- a/tests/unit/llms/test_llm_llamacpp.py
+++ b/tests/unit/llms/test_llm_llamacpp.py
@@ -1,7 +1,7 @@
 import pytest
 from llama_cpp import Llama
 
-from semantic_router.llms import LlamaCppLLM
+from semantic_router.llms.llamacpp import LlamaCppLLM
 from semantic_router.schema import Message