diff --git a/README.md b/README.md index 3578cb88c1c4e161d55c7c960ef13b9482241aec..bc853cdaeae1338440874879fee2e3f75d313a7d 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Semantic Router is a superfast decision-making layer for your LLMs and agents. Rather than waiting for slow LLM generations to make tool-use decisions, we use the magic of semantic vector space to make those decisions — _routing_ our requests using _semantic_ meaning. + --- ## Quickstart @@ -25,7 +26,7 @@ To get started with _semantic-router_ we install it like so: pip install -qU semantic-router ``` -❗️ _If wanting to use local embeddings you can use `FastEmbedEncoder` (`pip install -qU "semantic-router[fastembed]`"). To use the `HybridRouteLayer` you must `pip install -qU "semantic-router[hybrid]"`._ +❗️ _If wanting to use a fully local version of semantic router you can use `HuggingFaceEncoder` and `LlamaCppEncoder` (`pip install -qU "semantic-router[local]"`, see [here](https://github.com/aurelio-labs/semantic-router/blob/main/docs/05-local-execution.ipynb)). To use the `HybridRouteLayer` you must `pip install -qU "semantic-router[hybrid]"`._ We begin by defining a set of `Route` objects. These are the decision paths that the semantic router can decide to use, let's try two simple routes for now — one for talk on _politics_ and another for _chitchat_: diff --git a/docs/05-local-execution.ipynb b/docs/05-local-execution.ipynb index f02ecf15478d49a3668338d547f9418171f6664b..cdb6c3feb3645e8640a91d76855d5f6523b0b549 100644 --- a/docs/05-local-execution.ipynb +++ b/docs/05-local-execution.ipynb @@ -342,7 +342,7 @@ "from semantic_router import RouteLayer\n", "\n", "from llama_cpp import Llama\n", - "from semantic_router.llms import LlamaCppLLM\n", + "from semantic_router.llms.llamacpp import LlamaCppLLM\n", "\n", "enable_gpu = True # offload LLM layers to the GPU (must fit in memory)\n", "\n", diff --git a/semantic_router/llms/__init__.py b/semantic_router/llms/__init__.py index 02b3fd5b2422e718fcdf9fd4b34e4ace7fb3d957..e5aedc85fd30cc0b576fc2170c1b7ca694bdf200 100644 --- a/semantic_router/llms/__init__.py +++ b/semantic_router/llms/__init__.py @@ -1,7 +1,6 @@ from semantic_router.llms.base import BaseLLM from semantic_router.llms.cohere import CohereLLM -from semantic_router.llms.llamacpp import LlamaCppLLM from semantic_router.llms.openai import OpenAILLM from semantic_router.llms.openrouter import OpenRouterLLM -__all__ = ["BaseLLM", "OpenAILLM", "OpenRouterLLM", "CohereLLM", "LlamaCppLLM"] +__all__ = ["BaseLLM", "OpenAILLM", "OpenRouterLLM", "CohereLLM"] diff --git a/tests/unit/llms/test_llm_llamacpp.py b/tests/unit/llms/test_llm_llamacpp.py index 5793c2d2f1b008ccc2a5fe3b183e4698f20dee9c..f0a5253f909ecce92769b50ccf7b6578720c3f63 100644 --- a/tests/unit/llms/test_llm_llamacpp.py +++ b/tests/unit/llms/test_llm_llamacpp.py @@ -1,7 +1,7 @@ import pytest from llama_cpp import Llama -from semantic_router.llms import LlamaCppLLM +from semantic_router.llms.llamacpp import LlamaCppLLM from semantic_router.schema import Message