From f9a72dbf009a13af078f656b100049855f954cc2 Mon Sep 17 00:00:00 2001
From: jamescalam <james.briggs@hotmail.com>
Date: Fri, 29 Nov 2024 13:11:21 +0100
Subject: [PATCH] fix: hybrid fixes

---
 semantic_router/index/hybrid_local.py |  3 +-
 semantic_router/routers/base.py       |  8 +++--
 tests/unit/test_hybrid_layer.py       | 44 +++++++++++++--------------
 3 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/semantic_router/index/hybrid_local.py b/semantic_router/index/hybrid_local.py
index f927914e..d4096edb 100644
--- a/semantic_router/index/hybrid_local.py
+++ b/semantic_router/index/hybrid_local.py
@@ -135,7 +135,8 @@ class HybridLocalIndex(LocalIndex):
             route_names = self.routes[idx] if self.routes is not None else []
             return scores, route_names
         else:
-            raise ValueError("Index or sparse index is not populated.")
+            logger.warning("Index or sparse index is not populated.")
+            return np.array([]), []
 
     async def aquery(
         self,
diff --git a/semantic_router/routers/base.py b/semantic_router/routers/base.py
index 392e91d8..e4376b42 100644
--- a/semantic_router/routers/base.py
+++ b/semantic_router/routers/base.py
@@ -1104,11 +1104,15 @@ class BaseRouter(BaseModel):
                 )
 
     def set_threshold(self, threshold: float, route_name: str | None = None):
-        """Set the score threshold for a specific route or all routes.
+        """Set the score threshold for a specific route or all routes. A `threshold` of 0.0
+        will mean that the route will be returned no matter how low it scores whereas
+        a threshold of 1.0 will mean that a route must contain an exact utterance match
+        to be returned.
 
         :param threshold: The threshold to set.
         :type threshold: float
-        :param route_name: The name of the route to set the threshold for. If None, the threshold will be set for all routes.
+        :param route_name: The name of the route to set the threshold for. If None, the
+        threshold will be set for all routes.
         :type route_name: str | None
         """
         if route_name is None:
diff --git a/tests/unit/test_hybrid_layer.py b/tests/unit/test_hybrid_layer.py
index a7d29b46..f9f8ff6d 100644
--- a/tests/unit/test_hybrid_layer.py
+++ b/tests/unit/test_hybrid_layer.py
@@ -12,14 +12,23 @@ from semantic_router.routers import HybridRouter
 from semantic_router.route import Route
 
 
+UTTERANCES = [
+    "Hello we need this text to be a little longer for our sparse encoders",
+    "In this case they need to learn from recurring tokens, ie words.",
+    "We give ourselves several examples from our encoders to learn from.",
+    "But given this is only an example we don't need too many",
+    "Just enough to test that our sparse encoders work as expected",
+]
+
+
 def mock_encoder_call(utterances):
     # Define a mapping of utterances to return values
     mock_responses = {
-        "Hello": [0.1, 0.2, 0.3],
-        "Hi": [0.4, 0.5, 0.6],
-        "Goodbye": [0.7, 0.8, 0.9],
-        "Bye": [1.0, 1.1, 1.2],
-        "Au revoir": [1.3, 1.4, 1.5],
+        UTTERANCES[0]: [0.1, 0.2, 0.3],
+        UTTERANCES[1]: [0.4, 0.5, 0.6],
+        UTTERANCES[2]: [0.7, 0.8, 0.9],
+        UTTERANCES[3]: [1.0, 1.1, 1.2],
+        UTTERANCES[4]: [1.3, 1.4, 1.5],
     }
     return [mock_responses.get(u, [0, 0, 0]) for u in utterances]
 
@@ -70,21 +79,8 @@ def tfidf_encoder():
 @pytest.fixture
 def routes():
     return [
-        Route(
-            name="Route 1",
-            utterances=[
-                "Hello we need this text to be a little longer for our sparse encoders",
-                "In this case they need to learn from recurring tokens, ie words.",
-            ],
-        ),
-        Route(
-            name="Route 2",
-            utterances=[
-                "We give ourselves several examples from our encoders to learn from.",
-                "But given this is only an example we don't need too many",
-                "Just enough to test that our sparse encoders work as expected",
-            ],
-        ),
+        Route(name="Route 1", utterances=[UTTERANCES[0], UTTERANCES[1]]),
+        Route(name="Route 2", utterances=[UTTERANCES[2], UTTERANCES[3], UTTERANCES[4]]),
     ]
 
 
@@ -158,8 +154,9 @@ class TestHybridRouter:
             auto_sync="local",
         )
         print("...2")
-        query_result = route_layer("Hello")
-        assert query_result in ["Route 1", "Route 2"]
+        route_layer.set_threshold(0.0)
+        query_result = route_layer(UTTERANCES[0])
+        assert query_result.name in ["Route 1", "Route 2"]
 
     def test_query_with_no_index(self, openai_encoder):
         route_layer = HybridRouter(
@@ -171,7 +168,7 @@ class TestHybridRouter:
             f"route_layer.sparse_encoder is {route_layer.sparse_encoder.__class__.__name__} "
             "not BM25Encoder or TfidfEncoder"
         )
-        assert route_layer("Anything") is None
+        assert route_layer("Anything").name is None
 
     def test_semantic_classify(self, openai_encoder, routes):
         route_layer = HybridRouter(
@@ -217,6 +214,7 @@ class TestHybridRouter:
             encoder=cohere_encoder,
             sparse_encoder=tfidf_encoder,
             routes=routes[:-1],
+            auto_sync="local",
         )
         hybrid_route_layer.add(routes=routes[-1])
         all_utterances = [
-- 
GitLab