From d96c552230ebba4faddb25a651f3b68a379048cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CDaniel=20Griffiths=E2=80=9D?=
 <Danielgriffiths1790@gmail.com>
Date: Mon, 8 Jan 2024 15:33:31 +0000
Subject: [PATCH] fix: created embedding helper functions

---
 semantic_router/hybrid_layer.py | 52 ++++++++++++++-------------------
 1 file changed, 22 insertions(+), 30 deletions(-)

diff --git a/semantic_router/hybrid_layer.py b/semantic_router/hybrid_layer.py
index 40c95d63..d7200c26 100644
--- a/semantic_router/hybrid_layer.py
+++ b/semantic_router/hybrid_layer.py
@@ -56,45 +56,33 @@ class HybridRouteLayer:
             return None
 
     def add(self, route: Route):
+        self._add_route(route=route)
+
+    def _add_route(self, route: Route):
+        self.routes += [route]
+
+        self.update_dense_embeddings_index(route.utterances)
+
         if isinstance(self.sparse_encoder, TfidfEncoder) and hasattr(
             self.sparse_encoder, "fit"
         ):
-            self.sparse_encoder.fit(self.routes + [route])
+            self.sparse_encoder.fit(self.routes)
+            # re-build index
             self.sparse_index = None
-            for r in self.routes:
-                self.compute_and_store_sparse_embeddings(r)
-        self.routes.append(route)
-        self._add_route(route=route)
+            all_utterances = [
+                utterance for route in self.routes for utterance in route.utterances
+            ]
+            self.update_sparse_embeddings_index(all_utterances)
+        else:
+            self.update_sparse_embeddings_index(route.utterances)
 
-    def _add_route(self, route: Route):
-        # create embeddings
-        dense_embeds = np.array(self.dense_encoder(route.utterances))  # * self.alpha
-        self.compute_and_store_sparse_embeddings(route)
         # create route array
         if self.categories is None:
             self.categories = np.array([route.name] * len(route.utterances))
-            self.utterances = np.array(route.utterances)
         else:
             str_arr = np.array([route.name] * len(route.utterances))
             self.categories = np.concatenate([self.categories, str_arr])
-            self.utterances = np.concatenate(
-                [self.utterances, np.array(route.utterances)]
-            )
-        # create utterance array (the dense index)
-        if self.index is None:
-            self.index = dense_embeds
-        else:
-            self.index = np.concatenate([self.index, dense_embeds])
-
-    def compute_and_store_sparse_embeddings(self, route: Route):
-        sparse_embeds = np.array(
-            self.sparse_encoder(route.utterances)
-        )  # * (1 - self.alpha)
-        # create sparse utterance array
-        if self.sparse_index is None:
-            self.sparse_index = sparse_embeds
-        else:
-            self.sparse_index = np.concatenate([self.sparse_index, sparse_embeds])
+        self.routes.append(route)
 
     def _add_routes(self, routes: list[Route]):
         # create embeddings for all routes
@@ -102,8 +90,8 @@ class HybridRouteLayer:
         all_utterances = [
             utterance for route in routes for utterance in route.utterances
         ]
-        dense_embeds = np.array(self.dense_encoder(all_utterances))
-        sparse_embeds = np.array(self.sparse_encoder(all_utterances))
+        self.update_dense_embeddings_index(all_utterances)
+        self.update_sparse_embeddings_index(all_utterances)
 
         # create route array
         route_names = [route.name for route in routes for _ in route.utterances]
@@ -114,6 +102,8 @@ class HybridRouteLayer:
             else route_array
         )
 
+    def update_dense_embeddings_index(self, utterances: list):
+        dense_embeds = np.array(self.dense_encoder(utterances))
         # create utterance array (the dense index)
         self.index = (
             np.concatenate([self.index, dense_embeds])
@@ -121,6 +111,8 @@ class HybridRouteLayer:
             else dense_embeds
         )
 
+    def update_sparse_embeddings_index(self, utterances: list):
+        sparse_embeds = np.array(self.sparse_encoder(utterances))
         # create sparse utterance array
         self.sparse_index = (
             np.concatenate([self.sparse_index, sparse_embeds])
-- 
GitLab