From 4d3ba4d387ff56c7ea9951f1754f593e6a60ea0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CDaniel=20Griffiths=E2=80=9D?= <Danielgriffiths1790@gmail.com> Date: Tue, 19 Dec 2023 10:36:51 +0000 Subject: [PATCH] fixed tests --- docs/examples/hybrid-layer.ipynb | 6 ++++-- poetry.lock | 2 +- semantic_router/encoders/__init__.py | 8 +++++++- semantic_router/encoders/tfidf.py | 7 ++++--- semantic_router/hybrid_layer.py | 8 ++++++-- tests/unit/test_hybrid_layer.py | 20 +++++++++++++++++--- 6 files changed, 39 insertions(+), 12 deletions(-) diff --git a/docs/examples/hybrid-layer.ipynb b/docs/examples/hybrid-layer.ipynb index 89965b4e..1257e0a1 100644 --- a/docs/examples/hybrid-layer.ipynb +++ b/docs/examples/hybrid-layer.ipynb @@ -143,14 +143,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 2/2 [00:00<00:00, 2.58it/s]\n" + "100%|██████████| 2/2 [00:00<00:00, 4.22it/s]\n" ] } ], "source": [ "from semantic_router.hybrid_layer import HybridRouteLayer\n", "\n", - "dl = HybridRouteLayer(dense_encoder=dense_encoder, sparse_encoder=sparse_encoder, routes=routes)" + "dl = HybridRouteLayer(\n", + " dense_encoder=dense_encoder, sparse_encoder=sparse_encoder, routes=routes\n", + ")" ] }, { diff --git a/poetry.lock b/poetry.lock index b6fff470..e6d799b3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2302,5 +2302,5 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" -python-versions = "^3.10" +python-versions = "^3.9" content-hash = "7e705f5c5f2a8bba630031c0ff6752972e7cddc8ec95f3fb05b5be2ad7962268" diff --git a/semantic_router/encoders/__init__.py b/semantic_router/encoders/__init__.py index 2769b31d..c2bde1e5 100644 --- a/semantic_router/encoders/__init__.py +++ b/semantic_router/encoders/__init__.py @@ -4,4 +4,10 @@ from .cohere import CohereEncoder from .openai import OpenAIEncoder from .tfidf import TfidfEncoder -__all__ = ["BaseEncoder", "CohereEncoder", "OpenAIEncoder", "BM25Encoder", "TfidfEncoder"] +__all__ = [ + "BaseEncoder", + "CohereEncoder", + "OpenAIEncoder", + "BM25Encoder", + "TfidfEncoder", +] diff --git a/semantic_router/encoders/tfidf.py b/semantic_router/encoders/tfidf.py index 41487a06..ea7a7726 100644 --- a/semantic_router/encoders/tfidf.py +++ b/semantic_router/encoders/tfidf.py @@ -3,6 +3,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer from semantic_router.encoders import BaseEncoder from semantic_router.schema import Route + class TfidfEncoder(BaseEncoder): vectorizer: TfidfVectorizer | None = None @@ -28,6 +29,6 @@ class TfidfEncoder(BaseEncoder): def _get_all_utterances(self, routes: list[Route]) -> list[str]: utterances = [] for route in routes: - for utterance in route.utterances: - utterances.append(utterance) - return utterances \ No newline at end of file + for utterance in route.utterances: + utterances.append(utterance) + return utterances diff --git a/semantic_router/hybrid_layer.py b/semantic_router/hybrid_layer.py index 33a3269f..8dfedb14 100644 --- a/semantic_router/hybrid_layer.py +++ b/semantic_router/hybrid_layer.py @@ -7,7 +7,7 @@ from semantic_router.encoders import ( BM25Encoder, CohereEncoder, OpenAIEncoder, - TfidfEncoder + TfidfEncoder, ) from semantic_router.schema import Route from semantic_router.utils.logger import logger @@ -20,7 +20,11 @@ class HybridRouteLayer: score_threshold = 0.82 def __init__( - self, dense_encoder: BaseEncoder, sparse_encoder: BaseEncoder, routes: list[Route] = [], alpha: float = 0.3 + self, + dense_encoder: BaseEncoder, + sparse_encoder: BaseEncoder, + routes: list[Route] = [], + alpha: float = 0.3, ): self.dense_encoder = dense_encoder self.sparse_encoder = sparse_encoder diff --git a/tests/unit/test_hybrid_layer.py b/tests/unit/test_hybrid_layer.py index 94720cd8..a9d35ea5 100644 --- a/tests/unit/test_hybrid_layer.py +++ b/tests/unit/test_hybrid_layer.py @@ -1,6 +1,12 @@ import pytest -from semantic_router.encoders import BaseEncoder, CohereEncoder, OpenAIEncoder +from semantic_router.encoders import ( + BaseEncoder, + CohereEncoder, + OpenAIEncoder, + TfidfEncoder, + BM25Encoder, +) from semantic_router.hybrid_layer import HybridRouteLayer from semantic_router.schema import Route @@ -34,6 +40,12 @@ def openai_encoder(mocker): return OpenAIEncoder(name="test-openai-encoder", openai_api_key="test_api_key") +@pytest.fixture +def bm25_encoder(mocker): + mocker.patch.object(BM25Encoder, "__call__", side_effect=mock_encoder_call) + return BM25Encoder(name="test-bm25-encoder") + + @pytest.fixture def routes(): return [ @@ -73,8 +85,10 @@ class TestHybridRouteLayer: assert len(route_layer.index) == 5 assert len(set(route_layer.categories)) == 2 - def test_query_and_classification(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder, routes=routes) + def test_query_and_classification(self, openai_encoder, bm25_encoder, routes): + route_layer = HybridRouteLayer( + dense_encoder=openai_encoder, sparse_encoder=bm25_encoder, routes=routes + ) query_result = route_layer("Hello") assert query_result in ["Route 1", "Route 2"] -- GitLab