From 1d777b0f5933762dc55b500f7bb52a8e8806cb29 Mon Sep 17 00:00:00 2001
From: James Briggs <35938317+jamescalam@users.noreply.github.com>
Date: Sun, 12 Jan 2025 11:17:15 +0000
Subject: [PATCH] fix: init index logic

---
 semantic_router/index/pinecone.py   | 14 ++++++++++++--
 semantic_router/routers/base.py     |  4 ++++
 semantic_router/routers/hybrid.py   |  3 ---
 semantic_router/routers/semantic.py |  3 ---
 4 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py
index f1ec2a7c..c0176992 100644
--- a/semantic_router/index/pinecone.py
+++ b/semantic_router/index/pinecone.py
@@ -236,7 +236,12 @@ class PineconeIndex(BaseIndex):
         else:
             # if the index doesn't exist and we don't have the dimensions
             # we return None
-            logger.warning("Index could not be initialized.")
+            logger.warning(
+                "Index could not be initialized. Init parameters: "
+                f"{self.index_name=}, {self.dimensions=}, {self.metric=}, "
+                f"{self.cloud=}, {self.region=}, {self.host=}, {self.namespace=}, "
+                f"{force_create=}"
+            )
             index = None
         if index is not None:
             self.host = self.client.describe_index(self.index_name)["host"]
@@ -272,7 +277,12 @@ class PineconeIndex(BaseIndex):
         else:
             # if the index doesn't exist and we don't have the dimensions
             # we raise warning
-            logger.warning("Index could not be initialized.")
+            logger.warning(
+                "Index could not be initialized. Init parameters: "
+                f"{self.index_name=}, {self.dimensions=}, {self.metric=}, "
+                f"{self.cloud=}, {self.region=}, {self.host=}, {self.namespace=}, "
+                f"{force_create=}"
+            )
         self.host = index_stats["host"] if index_stats else ""
 
     def _batch_upsert(self, batch: List[Dict]):
diff --git a/semantic_router/routers/base.py b/semantic_router/routers/base.py
index ae2108f2..25cb8b6b 100644
--- a/semantic_router/routers/base.py
+++ b/semantic_router/routers/base.py
@@ -351,6 +351,8 @@ class BaseRouter(BaseModel):
         for route in self.routes:
             if route.score_threshold is None:
                 route.score_threshold = self.score_threshold
+        # initialize index
+        self._init_index_state()
 
     def _get_index(self, index: Optional[BaseIndex]) -> BaseIndex:
         if index is None:
@@ -370,6 +372,7 @@ class BaseRouter(BaseModel):
 
     def _init_index_state(self):
         """Initializes an index (where required) and runs auto_sync if active."""
+        print("JBTEMP _init_index_state")
         # initialize index now, check if we need dimensions
         if self.index.dimensions is None:
             dims = len(self.encoder(["test"])[0])
@@ -862,6 +865,7 @@ class BaseRouter(BaseModel):
         The name must exist within the local SemanticRouter, if not a
         KeyError will be raised.
         """
+        # TODO JB: should modify update to take a Route object
         current_local_hash = self._get_hash()
         current_remote_hash = self.index._read_hash()
         if current_remote_hash.value == "":
diff --git a/semantic_router/routers/hybrid.py b/semantic_router/routers/hybrid.py
index 241fb57c..70e00440 100644
--- a/semantic_router/routers/hybrid.py
+++ b/semantic_router/routers/hybrid.py
@@ -62,9 +62,6 @@ class HybridRouter(BaseRouter):
             and self.routes
         ):
             self.sparse_encoder.fit(self.routes)
-        # run initialize index now if auto sync is active
-        if self.auto_sync:
-            self._init_index_state()
 
     def _set_score_threshold(self):
         """Set the score threshold for the HybridRouter. Unlike the base router the
diff --git a/semantic_router/routers/semantic.py b/semantic_router/routers/semantic.py
index 41c92d53..5efa2e48 100644
--- a/semantic_router/routers/semantic.py
+++ b/semantic_router/routers/semantic.py
@@ -32,9 +32,6 @@ class SemanticRouter(BaseRouter):
             aggregation=aggregation,
             auto_sync=auto_sync,
         )
-        # run initialize index now if auto sync is active
-        if self.auto_sync:
-            self._init_index_state()
 
     def _encode(self, text: list[str]) -> Any:
         """Given some text, encode it."""
-- 
GitLab