From 5791bfaf5d485fda6a5c8977dffb1a37f81ddf02 Mon Sep 17 00:00:00 2001
From: James Briggs <35938317+jamescalam@users.noreply.github.com>
Date: Mon, 6 Jan 2025 14:29:04 +0400
Subject: [PATCH] fix: sparse vector testing

---
 semantic_router/index/base.py     |  7 +++++++
 semantic_router/index/local.py    |  6 ++++++
 semantic_router/index/pinecone.py |  6 ++++++
 semantic_router/index/postgres.py |  6 ++++++
 semantic_router/index/qdrant.py   | 10 ++++++++++
 semantic_router/routers/base.py   | 11 +++++------
 semantic_router/routers/hybrid.py |  4 ++--
 tests/unit/test_router.py         | 14 +++++++++-----
 8 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/semantic_router/index/base.py b/semantic_router/index/base.py
index 452a18c6..933e2294 100644
--- a/semantic_router/index/base.py
+++ b/semantic_router/index/base.py
@@ -160,6 +160,13 @@ class BaseIndex(BaseModel):
         """
         raise NotImplementedError("This method should be implemented by subclasses.")
 
+    def is_ready(self) -> bool:
+        """
+        Checks if the index is ready to be used.
+        This method should be implemented by subclasses.
+        """
+        raise NotImplementedError("This method should be implemented by subclasses.")
+
     def query(
         self,
         vector: np.ndarray,
diff --git a/semantic_router/index/local.py b/semantic_router/index/local.py
index 10b77bea..61b2c3b5 100644
--- a/semantic_router/index/local.py
+++ b/semantic_router/index/local.py
@@ -82,6 +82,12 @@ class LocalIndex(BaseIndex):
             vectors=self.index.shape[0] if self.index is not None else 0,
         )
 
+    def is_ready(self) -> bool:
+        """
+        Checks if the index is ready to be used.
+        """
+        return self.index is not None and self.routes is not None
+
     def query(
         self,
         vector: np.ndarray,
diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py
index b0706318..f885fbf7 100644
--- a/semantic_router/index/pinecone.py
+++ b/semantic_router/index/pinecone.py
@@ -464,6 +464,12 @@ class PineconeIndex(BaseIndex):
                 vectors=0,
             )
 
+    def is_ready(self) -> bool:
+        """
+        Checks if the index is ready to be used.
+        """
+        return self.index is not None
+
     def query(
         self,
         vector: np.ndarray,
diff --git a/semantic_router/index/postgres.py b/semantic_router/index/postgres.py
index 54054c84..6f4a9f2a 100644
--- a/semantic_router/index/postgres.py
+++ b/semantic_router/index/postgres.py
@@ -352,6 +352,12 @@ class PostgresIndex(BaseIndex):
                 vectors=count,
             )
 
+    def is_ready(self) -> bool:
+        """
+        Checks if the index is ready to be used.
+        """
+        return isinstance(self.conn, psycopg2.extensions.connection)
+
     def query(
         self,
         vector: np.ndarray,
diff --git a/semantic_router/index/qdrant.py b/semantic_router/index/qdrant.py
index 5986f2c0..5b2eac80 100644
--- a/semantic_router/index/qdrant.py
+++ b/semantic_router/index/qdrant.py
@@ -196,6 +196,10 @@ class QdrantIndex(BaseIndex):
             List[Tuple]: A list of (route_name, utterance, function_schema, metadata) objects.
         """
 
+        # Check if collection exists first
+        if not self.client.collection_exists(self.index_name):
+            return []
+
         from qdrant_client import grpc
 
         results = []
@@ -255,6 +259,12 @@ class QdrantIndex(BaseIndex):
             vectors=collection_info.points_count,
         )
 
+    def is_ready(self) -> bool:
+        """
+        Checks if the index is ready to be used.
+        """
+        return self.client.collection_exists(self.index_name)
+
     def query(
         self,
         vector: np.ndarray,
diff --git a/semantic_router/routers/base.py b/semantic_router/routers/base.py
index c551b124..e42f1630 100644
--- a/semantic_router/routers/base.py
+++ b/semantic_router/routers/base.py
@@ -422,9 +422,8 @@ class BaseRouter(BaseModel):
         simulate_static: bool = False,
         route_filter: Optional[List[str]] = None,
     ) -> RouteChoice:
-        ready = self._index_ready()
-        if not ready:
-            raise ValueError("Index or routes are not populated.")
+        if not self.index or not self.index.is_ready():
+            raise ValueError("Index is not ready.")
         # if no vector provided, encode text to get vector
         if vector is None:
             if text is None:
@@ -481,9 +480,9 @@ class BaseRouter(BaseModel):
         simulate_static: bool = False,
         route_filter: Optional[List[str]] = None,
     ) -> RouteChoice:
-        ready = self._index_ready()  # TODO: need async version for qdrant
-        if not ready:
-            raise ValueError("Index or routes are not populated.")
+        if not self.index or not self.index.is_ready():
+            # TODO: need async version for qdrant
+            raise ValueError("Index is not ready.")
         # if no vector provided, encode text to get vector
         if vector is None:
             if text is None:
diff --git a/semantic_router/routers/hybrid.py b/semantic_router/routers/hybrid.py
index 3d810576..cb8b5f51 100644
--- a/semantic_router/routers/hybrid.py
+++ b/semantic_router/routers/hybrid.py
@@ -218,8 +218,8 @@ class HybridRouter(BaseRouter):
         route_filter: Optional[List[str]] = None,
         sparse_vector: dict[int, float] | SparseEmbedding | None = None,
     ) -> RouteChoice:
-        if self.index.index is None or self.routes is None:
-            raise ValueError("Index or routes are not populated.")
+        if not self.index or not self.index.is_ready():
+            raise ValueError("Index is not ready.")
         potential_sparse_vector: List[SparseEmbedding] | None = None
         # if no vector provided, encode text to get vector
         if vector is None:
diff --git a/tests/unit/test_router.py b/tests/unit/test_router.py
index 799a978d..98589847 100644
--- a/tests/unit/test_router.py
+++ b/tests/unit/test_router.py
@@ -282,7 +282,7 @@ class TestIndexEncoders:
             try:
                 assert len(route_layer.index) == 5
                 break
-            except AssertionError:
+            except Exception:
                 logger.warning(f"Index not populated, waiting for retry (try {count})")
                 time.sleep(PINECONE_SLEEP)
                 count += 1
@@ -733,7 +733,7 @@ class TestSemanticRouter:
             try:
                 assert query_result in ["Route 1", "Route 2"]
                 break
-            except AssertionError:
+            except Exception:
                 logger.warning(
                     f"Query result not in expected routes, waiting for retry (try {count})"
                 )
@@ -770,7 +770,7 @@ class TestSemanticRouter:
             try:
                 assert query_result in ["Route 1"]
                 break
-            except AssertionError:
+            except Exception:
                 logger.warning(
                     f"Query result not in expected routes, waiting for retry (try {count})"
                 )
@@ -800,7 +800,7 @@ class TestSemanticRouter:
                     ).name
                     assert query_result in ["Route 1"]
                     break
-                except AssertionError:
+                except Exception:
                     logger.warning(
                         f"Query result not in expected routes, waiting for retry (try {count})"
                     )
@@ -830,7 +830,11 @@ class TestSemanticRouter:
         if index_cls is PineconeIndex:
             time.sleep(PINECONE_SLEEP)  # allow for index to be populated
         vector = encoder(["hello"])
-        query_result = route_layer(vector=vector).name
+        if router_cls is HybridRouter:
+            sparse_vector = route_layer.sparse_encoder(["hello"])[0]
+            query_result = route_layer(vector=vector, sparse_vector=sparse_vector).name
+        else:
+            query_result = route_layer(vector=vector).name
         assert query_result in ["Route 1", "Route 2"]
 
     def test_query_with_no_text_or_vector(
-- 
GitLab