diff --git a/tests/unit/encoders/test_bm25.py b/tests/unit/encoders/test_bm25.py index e654d7bbc98070b6db16249c3164d515852e28e5..174453d254370c609c83a7f2a533b4d03ca264dc 100644 --- a/tests/unit/encoders/test_bm25.py +++ b/tests/unit/encoders/test_bm25.py @@ -5,7 +5,11 @@ from semantic_router.encoders import BM25Encoder @pytest.fixture def bm25_encoder(): - return BM25Encoder() + sparse_encoder = BM25Encoder(use_default_params=False) + sparse_encoder.fit( + ["The quick brown fox", "jumps over the lazy dog", "Hello, world!"] + ) + return sparse_encoder class TestBM25Encoder: diff --git a/tests/unit/test_hybrid_layer.py b/tests/unit/test_hybrid_layer.py index 6896c4de1cb1e13196d209455f2bd39e8e14915d..df530149d72fe44d836765b7654a2cbcdf71c694 100644 --- a/tests/unit/test_hybrid_layer.py +++ b/tests/unit/test_hybrid_layer.py @@ -1,6 +1,11 @@ import pytest -from semantic_router.encoders import BaseEncoder, CohereEncoder, OpenAIEncoder +from semantic_router.encoders import ( + BaseEncoder, + BM25Encoder, + CohereEncoder, + OpenAIEncoder, +) from semantic_router.hybrid_layer import HybridRouteLayer from semantic_router.route import Route @@ -42,9 +47,15 @@ def routes(): ] +sparse_encoder = BM25Encoder(use_default_params=False) +sparse_encoder.fit(["The quick brown fox", "jumps over the lazy dog", "Hello, world!"]) + + class TestHybridRouteLayer: def test_initialization(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder, routes=routes) + route_layer = HybridRouteLayer( + encoder=openai_encoder, sparse_encoder=sparse_encoder, routes=routes + ) assert route_layer.index is not None and route_layer.categories is not None assert openai_encoder.score_threshold == 0.82 assert route_layer.score_threshold == 0.82 @@ -52,14 +63,20 @@ class TestHybridRouteLayer: assert len(set(route_layer.categories)) == 2 def test_initialization_different_encoders(self, cohere_encoder, openai_encoder): - route_layer_cohere = HybridRouteLayer(encoder=cohere_encoder) + route_layer_cohere = HybridRouteLayer( + encoder=cohere_encoder, sparse_encoder=sparse_encoder + ) assert route_layer_cohere.score_threshold == 0.3 - route_layer_openai = HybridRouteLayer(encoder=openai_encoder) + route_layer_openai = HybridRouteLayer( + encoder=openai_encoder, sparse_encoder=sparse_encoder + ) assert route_layer_openai.score_threshold == 0.82 def test_add_route(self, openai_encoder): - route_layer = HybridRouteLayer(encoder=openai_encoder) + route_layer = HybridRouteLayer( + encoder=openai_encoder, sparse_encoder=sparse_encoder + ) route = Route(name="Route 3", utterances=["Yes", "No"]) route_layer._add_routes([route]) assert route_layer.index is not None and route_layer.categories is not None @@ -67,7 +84,9 @@ class TestHybridRouteLayer: assert len(set(route_layer.categories)) == 1 def test_add_multiple_routes(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder) + route_layer = HybridRouteLayer( + encoder=openai_encoder, sparse_encoder=sparse_encoder + ) for route in routes: route_layer.add(route) assert route_layer.index is not None and route_layer.categories is not None @@ -75,16 +94,22 @@ class TestHybridRouteLayer: assert len(set(route_layer.categories)) == 2 def test_query_and_classification(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder, routes=routes) + route_layer = HybridRouteLayer( + encoder=openai_encoder, sparse_encoder=sparse_encoder, routes=routes + ) query_result = route_layer("Hello") assert query_result in ["Route 1", "Route 2"] def test_query_with_no_index(self, openai_encoder): - route_layer = HybridRouteLayer(encoder=openai_encoder) + route_layer = HybridRouteLayer( + encoder=openai_encoder, sparse_encoder=sparse_encoder + ) assert route_layer("Anything") is None def test_semantic_classify(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder, routes=routes) + route_layer = HybridRouteLayer( + encoder=openai_encoder, sparse_encoder=sparse_encoder, routes=routes + ) classification, score = route_layer._semantic_classify( [ {"route": "Route 1", "score": 0.9}, @@ -95,7 +120,9 @@ class TestHybridRouteLayer: assert score == [0.9] def test_semantic_classify_multiple_routes(self, openai_encoder, routes): - route_layer = HybridRouteLayer(encoder=openai_encoder, routes=routes) + route_layer = HybridRouteLayer( + encoder=openai_encoder, sparse_encoder=sparse_encoder, routes=routes + ) classification, score = route_layer._semantic_classify( [ {"route": "Route 1", "score": 0.9}, @@ -107,12 +134,16 @@ class TestHybridRouteLayer: assert score == [0.9, 0.8] def test_pass_threshold(self, openai_encoder): - route_layer = HybridRouteLayer(encoder=openai_encoder) + route_layer = HybridRouteLayer( + encoder=openai_encoder, sparse_encoder=sparse_encoder + ) assert not route_layer._pass_threshold([], 0.5) assert route_layer._pass_threshold([0.6, 0.7], 0.5) def test_failover_score_threshold(self, base_encoder): - route_layer = HybridRouteLayer(encoder=base_encoder) + route_layer = HybridRouteLayer( + encoder=base_encoder, sparse_encoder=sparse_encoder + ) assert base_encoder.score_threshold == 0.50 assert route_layer.score_threshold == 0.50