diff --git a/docs/examples/hybrid-layer.ipynb b/docs/examples/hybrid-layer.ipynb index 89965b4e238bbd92215164429e178057fbf29844..1257e0a18bbd6db47f1cbfd7b678eccaef183367 100644 --- a/docs/examples/hybrid-layer.ipynb +++ b/docs/examples/hybrid-layer.ipynb @@ -143,14 +143,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 2/2 [00:00<00:00, 2.58it/s]\n" + "100%|██████████| 2/2 [00:00<00:00, 4.22it/s]\n" ] } ], "source": [ "from semantic_router.hybrid_layer import HybridRouteLayer\n", "\n", - "dl = HybridRouteLayer(dense_encoder=dense_encoder, sparse_encoder=sparse_encoder, routes=routes)" + "dl = HybridRouteLayer(\n", + " dense_encoder=dense_encoder, sparse_encoder=sparse_encoder, routes=routes\n", + ")" ] }, { diff --git a/poetry.lock b/poetry.lock index b6fff4703b0e67f0bbb4d2b130fdeef485f18fce..e6d799b3b32e138cfb5b0bd49c0a1a330d1781e7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2302,5 +2302,5 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" -python-versions = "^3.10" +python-versions = "^3.9" content-hash = "7e705f5c5f2a8bba630031c0ff6752972e7cddc8ec95f3fb05b5be2ad7962268" diff --git a/semantic_router/encoders/__init__.py b/semantic_router/encoders/__init__.py index 2769b31daf1485a2066797e65f60dde47ba6daee..c2bde1e5f9af069e5b412e54ea9454b6a09e3bd8 100644 --- a/semantic_router/encoders/__init__.py +++ b/semantic_router/encoders/__init__.py @@ -4,4 +4,10 @@ from .cohere import CohereEncoder from .openai import OpenAIEncoder from .tfidf import TfidfEncoder -__all__ = ["BaseEncoder", "CohereEncoder", "OpenAIEncoder", "BM25Encoder", "TfidfEncoder"] +__all__ = [ + "BaseEncoder", + "CohereEncoder", + "OpenAIEncoder", + "BM25Encoder", + "TfidfEncoder", +] diff --git a/semantic_router/encoders/tfidf.py b/semantic_router/encoders/tfidf.py index 41487a0621dc9c9441d82632487a1c5a0b99458a..ea7a77260522c9a2333908d93b2f6ba8eb8b4ba9 100644 --- a/semantic_router/encoders/tfidf.py +++ b/semantic_router/encoders/tfidf.py @@ -3,6 +3,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer from semantic_router.encoders import BaseEncoder from semantic_router.schema import Route + class TfidfEncoder(BaseEncoder): vectorizer: TfidfVectorizer | None = None @@ -28,6 +29,6 @@ class TfidfEncoder(BaseEncoder): def _get_all_utterances(self, routes: list[Route]) -> list[str]: utterances = [] for route in routes: - for utterance in route.utterances: - utterances.append(utterance) - return utterances \ No newline at end of file + for utterance in route.utterances: + utterances.append(utterance) + return utterances diff --git a/semantic_router/hybrid_layer.py b/semantic_router/hybrid_layer.py index 33a3269f5c5688801bae408854842d688673c347..8dfedb14da4f8628964843026ae37adfcd0a8e45 100644 --- a/semantic_router/hybrid_layer.py +++ b/semantic_router/hybrid_layer.py @@ -7,7 +7,7 @@ from semantic_router.encoders import ( BM25Encoder, CohereEncoder, OpenAIEncoder, - TfidfEncoder + TfidfEncoder, ) from semantic_router.schema import Route from semantic_router.utils.logger import logger @@ -20,7 +20,11 @@ class HybridRouteLayer: score_threshold = 0.82 def __init__( - self, dense_encoder: BaseEncoder, sparse_encoder: BaseEncoder, routes: list[Route] = [], alpha: float = 0.3 + self, + dense_encoder: BaseEncoder, + sparse_encoder: BaseEncoder, + routes: list[Route] = [], + alpha: float = 0.3, ): self.dense_encoder = dense_encoder self.sparse_encoder = sparse_encoder diff --git a/tests/unit/test_hybrid_layer.py b/tests/unit/test_hybrid_layer.py index 94720cd8d9567b19e78a016a03c2cc90b8f62d40..a9d35ea58382dee198d58b6e8a67d4f6f2fdee4e 100644 --- a/tests/unit/test_hybrid_layer.py +++ b/tests/unit/test_hybrid_layer.py @@ -1,6 +1,12 @@ import pytest -from semantic_router.encoders import BaseEncoder, CohereEncoder, OpenAIEncoder +from semantic_router.encoders import ( + BaseEncoder, + CohereEncoder, + OpenAIEncoder, + TfidfEncoder, + BM25Encoder, +) from semantic_router.hybrid_layer import HybridRouteLayer from semantic_router.schema import Route @@ -34,6 +40,12 @@ def openai_encoder(mocker): return OpenAIEncoder(name="test-openai-encoder", openai_api_key="test_api_key") +@pytest.fixture +def bm25_encoder(mocker): + mocker.patch.object(BM25Encoder, "__call__", side_effect=mock_encoder_call) + return BM25Encoder(name="test-bm25-encoder") + + @pytest.fixture def routes(): return [ @@ -73,8 +85,10 @@ class TestHybridRouteLayer: assert len(route_layer.index) == 5 assert len(set(route_layer.categories)) == 2 - def test_query_and_classification(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder, routes=routes) + def test_query_and_classification(self, openai_encoder, bm25_encoder, routes): + route_layer = HybridRouteLayer( + dense_encoder=openai_encoder, sparse_encoder=bm25_encoder, routes=routes + ) query_result = route_layer("Hello") assert query_result in ["Route 1", "Route 2"]