diff --git a/semantic_router/encoders/tfidf.py b/semantic_router/encoders/tfidf.py index ea7a77260522c9a2333908d93b2f6ba8eb8b4ba9..226e9dd06d6a8f81cdaa82ee778131d3c669b8e8 100644 --- a/semantic_router/encoders/tfidf.py +++ b/semantic_router/encoders/tfidf.py @@ -1,4 +1,3 @@ -from typing import Any from sklearn.feature_extraction.text import TfidfVectorizer from semantic_router.encoders import BaseEncoder from semantic_router.schema import Route diff --git a/semantic_router/hybrid_layer.py b/semantic_router/hybrid_layer.py index 8dfedb14da4f8628964843026ae37adfcd0a8e45..3993ca451612f6a474961c15823ca104aae9b949 100644 --- a/semantic_router/hybrid_layer.py +++ b/semantic_router/hybrid_layer.py @@ -4,7 +4,6 @@ from tqdm.auto import tqdm from semantic_router.encoders import ( BaseEncoder, - BM25Encoder, CohereEncoder, OpenAIEncoder, TfidfEncoder, diff --git a/tests/unit/test_hybrid_layer.py b/tests/unit/test_hybrid_layer.py index a9d35ea58382dee198d58b6e8a67d4f6f2fdee4e..0a5dba6c49d0b3f017b902fc33ac8b660c34a810 100644 --- a/tests/unit/test_hybrid_layer.py +++ b/tests/unit/test_hybrid_layer.py @@ -4,8 +4,8 @@ from semantic_router.encoders import ( BaseEncoder, CohereEncoder, OpenAIEncoder, - TfidfEncoder, BM25Encoder, + TfidfEncoder, ) from semantic_router.hybrid_layer import HybridRouteLayer from semantic_router.schema import Route @@ -45,6 +45,10 @@ def bm25_encoder(mocker): mocker.patch.object(BM25Encoder, "__call__", side_effect=mock_encoder_call) return BM25Encoder(name="test-bm25-encoder") +@pytest.fixture +def tfidf_encoder(mocker): + mocker.patch.object(TfidfEncoder, "__call__", side_effect=mock_encoder_call) + return TfidfEncoder(name="test-tfidf-encoder") @pytest.fixture def routes(): @@ -55,30 +59,30 @@ def routes(): class TestHybridRouteLayer: - def test_initialization(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder, routes=routes) + def test_initialization(self, openai_encoder, bm25_encoder, routes): + route_layer = HybridRouteLayer(dense_encoder=openai_encoder, sparse_encoder=bm25_encoder, routes=routes) assert route_layer.index is not None and route_layer.categories is not None assert route_layer.score_threshold == 0.82 assert len(route_layer.index) == 5 assert len(set(route_layer.categories)) == 2 - def test_initialization_different_encoders(self, cohere_encoder, openai_encoder): - route_layer_cohere = HybridRouteLayer(encoder=cohere_encoder) + def test_initialization_different_encoders(self, cohere_encoder, openai_encoder, bm25_encoder): + route_layer_cohere = HybridRouteLayer(dense_encoder=cohere_encoder, sparse_encoder=bm25_encoder) assert route_layer_cohere.score_threshold == 0.3 - route_layer_openai = HybridRouteLayer(encoder=openai_encoder) + route_layer_openai = HybridRouteLayer(dense_encoder=openai_encoder, sparse_encoder=bm25_encoder) assert route_layer_openai.score_threshold == 0.82 - def test_add_route(self, openai_encoder): - route_layer = HybridRouteLayer(encoder=openai_encoder) + def test_add_route(self, openai_encoder, bm25_encoder): + route_layer = HybridRouteLayer(dense_encoder=openai_encoder, sparse_encoder=bm25_encoder) route = Route(name="Route 3", utterances=["Yes", "No"]) route_layer.add(route) assert route_layer.index is not None and route_layer.categories is not None assert len(route_layer.index) == 2 assert len(set(route_layer.categories)) == 1 - def test_add_multiple_routes(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder) + def test_add_multiple_routes(self, openai_encoder, bm25_encoder, routes): + route_layer = HybridRouteLayer(dense_encoder=openai_encoder, sparse_encoder=bm25_encoder) for route in routes: route_layer.add(route) assert route_layer.index is not None and route_layer.categories is not None @@ -92,12 +96,12 @@ class TestHybridRouteLayer: query_result = route_layer("Hello") assert query_result in ["Route 1", "Route 2"] - def test_query_with_no_index(self, openai_encoder): - route_layer = HybridRouteLayer(encoder=openai_encoder) + def test_query_with_no_index(self, openai_encoder, bm25_encoder): + route_layer = HybridRouteLayer(dense_encoder=openai_encoder, sparse_encoder=bm25_encoder) assert route_layer("Anything") is None - def test_semantic_classify(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder, routes=routes) + def test_semantic_classify(self, openai_encoder, bm25_encoder, routes): + route_layer = HybridRouteLayer(dense_encoder=openai_encoder, sparse_encoder=bm25_encoder, routes=routes) classification, score = route_layer._semantic_classify( [ {"route": "Route 1", "score": 0.9}, @@ -107,8 +111,8 @@ class TestHybridRouteLayer: assert classification == "Route 1" assert score == [0.9] - def test_semantic_classify_multiple_routes(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder, routes=routes) + def test_semantic_classify_multiple_routes(self, openai_encoder, bm25_encoder, routes): + route_layer = HybridRouteLayer(dense_encoder=openai_encoder, sparse_encoder=bm25_encoder, routes=routes) classification, score = route_layer._semantic_classify( [ {"route": "Route 1", "score": 0.9}, @@ -119,13 +123,13 @@ class TestHybridRouteLayer: assert classification == "Route 1" assert score == [0.9, 0.8] - def test_pass_threshold(self, openai_encoder): - route_layer = HybridRouteLayer(encoder=openai_encoder) + def test_pass_threshold(self, openai_encoder, bm25_encoder): + route_layer = HybridRouteLayer(dense_encoder=openai_encoder, sparse_encoder=bm25_encoder) assert not route_layer._pass_threshold([], 0.5) assert route_layer._pass_threshold([0.6, 0.7], 0.5) - def test_failover_score_threshold(self, base_encoder): - route_layer = HybridRouteLayer(encoder=base_encoder) + def test_failover_score_threshold(self, base_encoder, bm25_encoder): + route_layer = HybridRouteLayer(dense_encoder=base_encoder, sparse_encoder=bm25_encoder) assert route_layer.score_threshold == 0.82