From 1d777b0f5933762dc55b500f7bb52a8e8806cb29 Mon Sep 17 00:00:00 2001 From: James Briggs <35938317+jamescalam@users.noreply.github.com> Date: Sun, 12 Jan 2025 11:17:15 +0000 Subject: [PATCH] fix: init index logic --- semantic_router/index/pinecone.py | 14 ++++++++++++-- semantic_router/routers/base.py | 4 ++++ semantic_router/routers/hybrid.py | 3 --- semantic_router/routers/semantic.py | 3 --- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index f1ec2a7c..c0176992 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -236,7 +236,12 @@ class PineconeIndex(BaseIndex): else: # if the index doesn't exist and we don't have the dimensions # we return None - logger.warning("Index could not be initialized.") + logger.warning( + "Index could not be initialized. Init parameters: " + f"{self.index_name=}, {self.dimensions=}, {self.metric=}, " + f"{self.cloud=}, {self.region=}, {self.host=}, {self.namespace=}, " + f"{force_create=}" + ) index = None if index is not None: self.host = self.client.describe_index(self.index_name)["host"] @@ -272,7 +277,12 @@ class PineconeIndex(BaseIndex): else: # if the index doesn't exist and we don't have the dimensions # we raise warning - logger.warning("Index could not be initialized.") + logger.warning( + "Index could not be initialized. Init parameters: " + f"{self.index_name=}, {self.dimensions=}, {self.metric=}, " + f"{self.cloud=}, {self.region=}, {self.host=}, {self.namespace=}, " + f"{force_create=}" + ) self.host = index_stats["host"] if index_stats else "" def _batch_upsert(self, batch: List[Dict]): diff --git a/semantic_router/routers/base.py b/semantic_router/routers/base.py index ae2108f2..25cb8b6b 100644 --- a/semantic_router/routers/base.py +++ b/semantic_router/routers/base.py @@ -351,6 +351,8 @@ class BaseRouter(BaseModel): for route in self.routes: if route.score_threshold is None: route.score_threshold = self.score_threshold + # initialize index + self._init_index_state() def _get_index(self, index: Optional[BaseIndex]) -> BaseIndex: if index is None: @@ -370,6 +372,7 @@ class BaseRouter(BaseModel): def _init_index_state(self): """Initializes an index (where required) and runs auto_sync if active.""" + print("JBTEMP _init_index_state") # initialize index now, check if we need dimensions if self.index.dimensions is None: dims = len(self.encoder(["test"])[0]) @@ -862,6 +865,7 @@ class BaseRouter(BaseModel): The name must exist within the local SemanticRouter, if not a KeyError will be raised. """ + # TODO JB: should modify update to take a Route object current_local_hash = self._get_hash() current_remote_hash = self.index._read_hash() if current_remote_hash.value == "": diff --git a/semantic_router/routers/hybrid.py b/semantic_router/routers/hybrid.py index 241fb57c..70e00440 100644 --- a/semantic_router/routers/hybrid.py +++ b/semantic_router/routers/hybrid.py @@ -62,9 +62,6 @@ class HybridRouter(BaseRouter): and self.routes ): self.sparse_encoder.fit(self.routes) - # run initialize index now if auto sync is active - if self.auto_sync: - self._init_index_state() def _set_score_threshold(self): """Set the score threshold for the HybridRouter. Unlike the base router the diff --git a/semantic_router/routers/semantic.py b/semantic_router/routers/semantic.py index 41c92d53..5efa2e48 100644 --- a/semantic_router/routers/semantic.py +++ b/semantic_router/routers/semantic.py @@ -32,9 +32,6 @@ class SemanticRouter(BaseRouter): aggregation=aggregation, auto_sync=auto_sync, ) - # run initialize index now if auto sync is active - if self.auto_sync: - self._init_index_state() def _encode(self, text: list[str]) -> Any: """Given some text, encode it.""" -- GitLab