From 5791bfaf5d485fda6a5c8977dffb1a37f81ddf02 Mon Sep 17 00:00:00 2001 From: James Briggs <35938317+jamescalam@users.noreply.github.com> Date: Mon, 6 Jan 2025 14:29:04 +0400 Subject: [PATCH] fix: sparse vector testing --- semantic_router/index/base.py | 7 +++++++ semantic_router/index/local.py | 6 ++++++ semantic_router/index/pinecone.py | 6 ++++++ semantic_router/index/postgres.py | 6 ++++++ semantic_router/index/qdrant.py | 10 ++++++++++ semantic_router/routers/base.py | 11 +++++------ semantic_router/routers/hybrid.py | 4 ++-- tests/unit/test_router.py | 14 +++++++++----- 8 files changed, 51 insertions(+), 13 deletions(-) diff --git a/semantic_router/index/base.py b/semantic_router/index/base.py index 452a18c6..933e2294 100644 --- a/semantic_router/index/base.py +++ b/semantic_router/index/base.py @@ -160,6 +160,13 @@ class BaseIndex(BaseModel): """ raise NotImplementedError("This method should be implemented by subclasses.") + def is_ready(self) -> bool: + """ + Checks if the index is ready to be used. + This method should be implemented by subclasses. + """ + raise NotImplementedError("This method should be implemented by subclasses.") + def query( self, vector: np.ndarray, diff --git a/semantic_router/index/local.py b/semantic_router/index/local.py index 10b77bea..61b2c3b5 100644 --- a/semantic_router/index/local.py +++ b/semantic_router/index/local.py @@ -82,6 +82,12 @@ class LocalIndex(BaseIndex): vectors=self.index.shape[0] if self.index is not None else 0, ) + def is_ready(self) -> bool: + """ + Checks if the index is ready to be used. + """ + return self.index is not None and self.routes is not None + def query( self, vector: np.ndarray, diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index b0706318..f885fbf7 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -464,6 +464,12 @@ class PineconeIndex(BaseIndex): vectors=0, ) + def is_ready(self) -> bool: + """ + Checks if the index is ready to be used. + """ + return self.index is not None + def query( self, vector: np.ndarray, diff --git a/semantic_router/index/postgres.py b/semantic_router/index/postgres.py index 54054c84..6f4a9f2a 100644 --- a/semantic_router/index/postgres.py +++ b/semantic_router/index/postgres.py @@ -352,6 +352,12 @@ class PostgresIndex(BaseIndex): vectors=count, ) + def is_ready(self) -> bool: + """ + Checks if the index is ready to be used. + """ + return isinstance(self.conn, psycopg2.extensions.connection) + def query( self, vector: np.ndarray, diff --git a/semantic_router/index/qdrant.py b/semantic_router/index/qdrant.py index 5986f2c0..5b2eac80 100644 --- a/semantic_router/index/qdrant.py +++ b/semantic_router/index/qdrant.py @@ -196,6 +196,10 @@ class QdrantIndex(BaseIndex): List[Tuple]: A list of (route_name, utterance, function_schema, metadata) objects. """ + # Check if collection exists first + if not self.client.collection_exists(self.index_name): + return [] + from qdrant_client import grpc results = [] @@ -255,6 +259,12 @@ class QdrantIndex(BaseIndex): vectors=collection_info.points_count, ) + def is_ready(self) -> bool: + """ + Checks if the index is ready to be used. + """ + return self.client.collection_exists(self.index_name) + def query( self, vector: np.ndarray, diff --git a/semantic_router/routers/base.py b/semantic_router/routers/base.py index c551b124..e42f1630 100644 --- a/semantic_router/routers/base.py +++ b/semantic_router/routers/base.py @@ -422,9 +422,8 @@ class BaseRouter(BaseModel): simulate_static: bool = False, route_filter: Optional[List[str]] = None, ) -> RouteChoice: - ready = self._index_ready() - if not ready: - raise ValueError("Index or routes are not populated.") + if not self.index or not self.index.is_ready(): + raise ValueError("Index is not ready.") # if no vector provided, encode text to get vector if vector is None: if text is None: @@ -481,9 +480,9 @@ class BaseRouter(BaseModel): simulate_static: bool = False, route_filter: Optional[List[str]] = None, ) -> RouteChoice: - ready = self._index_ready() # TODO: need async version for qdrant - if not ready: - raise ValueError("Index or routes are not populated.") + if not self.index or not self.index.is_ready(): + # TODO: need async version for qdrant + raise ValueError("Index is not ready.") # if no vector provided, encode text to get vector if vector is None: if text is None: diff --git a/semantic_router/routers/hybrid.py b/semantic_router/routers/hybrid.py index 3d810576..cb8b5f51 100644 --- a/semantic_router/routers/hybrid.py +++ b/semantic_router/routers/hybrid.py @@ -218,8 +218,8 @@ class HybridRouter(BaseRouter): route_filter: Optional[List[str]] = None, sparse_vector: dict[int, float] | SparseEmbedding | None = None, ) -> RouteChoice: - if self.index.index is None or self.routes is None: - raise ValueError("Index or routes are not populated.") + if not self.index or not self.index.is_ready(): + raise ValueError("Index is not ready.") potential_sparse_vector: List[SparseEmbedding] | None = None # if no vector provided, encode text to get vector if vector is None: diff --git a/tests/unit/test_router.py b/tests/unit/test_router.py index 799a978d..98589847 100644 --- a/tests/unit/test_router.py +++ b/tests/unit/test_router.py @@ -282,7 +282,7 @@ class TestIndexEncoders: try: assert len(route_layer.index) == 5 break - except AssertionError: + except Exception: logger.warning(f"Index not populated, waiting for retry (try {count})") time.sleep(PINECONE_SLEEP) count += 1 @@ -733,7 +733,7 @@ class TestSemanticRouter: try: assert query_result in ["Route 1", "Route 2"] break - except AssertionError: + except Exception: logger.warning( f"Query result not in expected routes, waiting for retry (try {count})" ) @@ -770,7 +770,7 @@ class TestSemanticRouter: try: assert query_result in ["Route 1"] break - except AssertionError: + except Exception: logger.warning( f"Query result not in expected routes, waiting for retry (try {count})" ) @@ -800,7 +800,7 @@ class TestSemanticRouter: ).name assert query_result in ["Route 1"] break - except AssertionError: + except Exception: logger.warning( f"Query result not in expected routes, waiting for retry (try {count})" ) @@ -830,7 +830,11 @@ class TestSemanticRouter: if index_cls is PineconeIndex: time.sleep(PINECONE_SLEEP) # allow for index to be populated vector = encoder(["hello"]) - query_result = route_layer(vector=vector).name + if router_cls is HybridRouter: + sparse_vector = route_layer.sparse_encoder(["hello"])[0] + query_result = route_layer(vector=vector, sparse_vector=sparse_vector).name + else: + query_result = route_layer(vector=vector).name assert query_result in ["Route 1", "Route 2"] def test_query_with_no_text_or_vector( -- GitLab