diff --git a/docs/indexes/pinecone-sync-routes.ipynb b/docs/indexes/pinecone-sync-routes.ipynb index 13050b283ceeccdab960022334f79fff96d650ee..25ca12440f8a3b5501f0211027cba8d2b5c78f13 100644 --- a/docs/indexes/pinecone-sync-routes.ipynb +++ b/docs/indexes/pinecone-sync-routes.ipynb @@ -416,7 +416,7 @@ "\n", "* `merge-force-local`: Merge both local and remote keeping remote as the priority. Local utterances are only merged into remote *if* a matching route for the utterance is found in the remote, all other route-utterances are dropped. Where a route exists in both local and remote, but each contains different `function_schema` or `metadata` information, the remote version takes priotity and remote `function_schemas` and `metadata` are propogated to all local routes.\n", "\n", - "* `merge`: Merge both local and remote, merging also local and remote utterances when a route with same route name is present both locally and remotely. If a route exists in both local and remote but contains different `function_schemas` or `metadata` information, the remote version takes priority and remote `function_schemas` and `metadata` are propogated to all local routes.\n", + "* `merge`: Merge both local and remote, merging also local and remote utterances when a route with same route name is present both locally and remotely. If a route exists in both local and remote but contains different `function_schemas` or `metadata` information, the local version takes priority and local `function_schemas` and `metadata` are propogated to all remote routes.\n", "\n", "We can get the synchronization strategy for each of these (with the exception of `error`) using the `diff.get_sync_strategy` method." ] diff --git a/docs/source/route_layer/sync.rst b/docs/source/route_layer/sync.rst index 86875c0e4ea6c6f7d65c0b8c105a721aa30b32ba..c32f3b942d6a4e6c996825dddc21c222f86972cb 100644 --- a/docs/source/route_layer/sync.rst +++ b/docs/source/route_layer/sync.rst @@ -17,16 +17,28 @@ Semantic router supports several synchronization strategies. Those strategies are: * `error`: Raise an error if local and remote are not synchronized. + * `remote`: Take remote as the source of truth and update local to align. + * `local`: Take local as the source of truth and update remote to align. -* `merge-force-remote`: Merge both local and remote taking only remote routes - utterances when a route with same route name is present both locally and - remotely. -* `merge-force-local`: Merge both local and remote taking only local routes - utterances when a route with same route name is present both locally and - remotely. -* `merge`: Merge both local and remote, merging also local and remote utterances - when a route with same route name is present both locally and remotely. + +* `merge-force-local`: Merge both local and remote keeping local as the + priority. Remote utterances are only merged into local *if* a matching route + for the utterance is found in local, all other route-utterances are dropped. + Where a route exists in both local and remote, but each contains different + `function_schema` or `metadata` information, the local version takes priority + and local `function_schemas` and `metadata` is propogated to all remote + utterances belonging to the given route. + +* `merge-force-remote`: Merge both local and remote keeping remote as the + priority. Local utterances are only merged into remote *if* a matching route + for the utterance is found in the remote, all other route-utterances are + dropped. Where a route exists in both local and remote, but each contains + different `function_schema` or `metadata` information, the remote version takes + priotity and remote `function_schemas` and `metadata` are propogated to all + local routes. + +* `merge`: Merge both local and remote, merging also local and remote utterances when a route with same route name is present both locally and remotely. If a route exists in both local and remote but contains different `function_schemas` or `metadata` information, the local version takes priority and local `function_schemas` and `metadata` are propogated to all remote routes. There are two ways to specify the synchronization strategy. The first is to specify the strategy when initializing the `RouteLayer` object via the @@ -132,9 +144,77 @@ and we can return `True`. If the hashes do not match, we need to perform a The slow sync check works by creating a `LayerConfig` object from the remote index and then comparing this to our local `LayerConfig` object. If the two objects match, we know that the local and remote instances are synchronized and -we can return `True`. If the two objects do not match, we need to perform a -diff. +we can return `True`. If the two objects do not match, we must investigate and +decide how to synchronize the two instances. + +Resolving Synchronization Differences +------------------------------------- + +The first step in resolving synchronization differences is to understand the +nature of the differences. We can get a readable diff using the +`RouteLayer.get_utterance_diff` method. + +.. code-block:: python + + diff = rl.get_utterance_diff() + +.. code-block:: python + + ["- politics: don't you just hate the president", + "- politics: don't you just love the president", + "- politics: isn't politics the best thing ever", + '- politics: they will save the country!', + "- politics: they're going to destroy this country!", + "- politics: why don't you tell me about your political opinions", + '+ chitchat: how\'s the weather today?', + '+ chitchat: how are things going?', + '+ chitchat: lovely weather today', + '+ chitchat: the weather is horrendous', + '+ chitchat: let\'s go to the chippy'] The diff works by creating a list of all the routes in the remote index and then comparing these to the routes in our local instance. Any differences -between the remote and local routes are shown in the diff. \ No newline at end of file +between the remote and local routes are shown in the above diff. + +Now, to resolve these differences we will need to initialize an `UtteranceDiff` +object. This object will contain the differences between the remote and local +utterances. We can then use this object to decide how to synchronize the two +instances. To initialize the `UtteranceDiff` object we need to get our local +and remote utterances. + +.. code-block:: python + + local_utterances = rl.to_config().to_utterances() + remote_utterances = rl.index.get_utterances() + +We create an utterance diff object like so: + +.. code-block:: python + + diff = UtteranceDiff.from_utterances( + local_utterances=local_utterances, remote_utterances=remote_utterances + ) + +`UtteranceDiff` objects include all diff information inside the `diff` +attribute (which is a list of `Utterance` objects). Each of our `Utterance` +objects inside `UtteranceDiff.diff` now contain a populated `diff_tag` +attribute, where: + +- `diff_tag='+'` indicates the utterance exists in the remote instance *only*. +- `diff_tag='-'` indicates the utterance exists in the local instance *only*. +- `diff_tag=' '` indicates the utterance exists in both the local and remote + instances. + +After initializing an `UtteranceDiff` object we can get all utterances with +each diff tag like so: + +.. code-block:: python + + # all utterances that exist only in remote + diff.get_utterances(diff_tag='+') + + # all utterances that exist only in local + diff.get_utterances(diff_tag='-') + + # all utterances that exist in both local and remote + diff.get_utterances(diff_tag=' ') diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index ef8c4a39335c13054db89f5b13d43b0805aefd00..0a3856d816173ba0a085f28ef9eaca5c768c587d 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -95,15 +95,12 @@ class PineconeIndex(BaseIndex): if self.api_key is None: raise ValueError("Pinecone API key is required.") - logger.debug("Init PineconeIndex sync client") self.client = self._initialize_client(api_key=self.api_key) if init_async_index: - logger.debug("Init PineconeIndex async client") self.async_client = self._initialize_async_client(api_key=self.api_key) else: self.async_client = None # try initializing index - logger.debug("Init PineconeIndex index") self.index = self._init_index() def _initialize_client(self, api_key: Optional[str] = None): @@ -171,7 +168,6 @@ class PineconeIndex(BaseIndex): index = self.client.Index(self.index_name) time.sleep(0.5) elif index_exists: - logger.debug("Index exists, returning index") # if the index exists we just return it index = self.client.Index(self.index_name) # grab the dimensions from the index diff --git a/semantic_router/layer.py b/semantic_router/layer.py index b4cbad80a26e51e7ac98f24f781081466f1c14d3..f34ec9d1f2be9abfd978f9b0e66a29b1ad124496 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -320,16 +320,13 @@ class RouteLayer: route.score_threshold = self.score_threshold # if routes list has been passed, we initialize index now if self.auto_sync: - logger.debug(f"Auto sync enabled: {self.auto_sync}") # initialize index now, check if we need dimensions if self.index.dimensions is None: dims = len(self.encoder(["test"])[0]) self.index.dimensions = dims # now init index if isinstance(self.index, PineconeIndex): - logger.debug("Initializing PineconeIndex index") self.index.index = self.index._init_index(force_create=True) - logger.debug("Checking for diffs") local_utterances = self.to_config().to_utterances() remote_utterances = self.index.get_utterances() diff = UtteranceDiff.from_utterances( @@ -337,7 +334,6 @@ class RouteLayer: remote_utterances=remote_utterances, ) sync_strategy = diff.get_sync_strategy(self.auto_sync) - logger.debug(f"Sync strategy: {sync_strategy}") self._execute_sync_strategy(sync_strategy) def check_for_matching_routes(self, top_class: str) -> Optional[Route]: diff --git a/semantic_router/schema.py b/semantic_router/schema.py index 92af384cd6bb8e293e09eb576fe3c2d9b22387e6..ca5e55c4ea9ceb541ad9e93ce7a0939ce04c57a9 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -263,7 +263,7 @@ class UtteranceDiff(BaseModel): "remote": {"upsert": [], "delete": []}, "local": {"upsert": remote_only, "delete": local_only}, } - elif sync_mode == "merge-force-remote": # merge-to-local merge-join-local + elif sync_mode == "merge-force-local": # merge-to-local merge-join-local # PRIORITIZE LOCAL # get set of route names that exist in local (we keep these if # they are in remote) @@ -319,7 +319,7 @@ class UtteranceDiff(BaseModel): }, "local": {"upsert": remote_to_keep, "delete": []}, } - elif sync_mode == "merge-force-local": # merge-to-remote merge-join-remote + elif sync_mode == "merge-force-remote": # merge-to-remote merge-join-remote # get set of route names that exist in remote (we keep these if # they are in local) remote_route_names = set([utt.route for utt in remote_only]) diff --git a/tests/unit/test_route.py b/tests/unit/test_route.py index fe202181437fe53dcaccb3dec6f480586e94f855..53d3c00e8b53484607ed324399e38d23dacc85c2 100644 --- a/tests/unit/test_route.py +++ b/tests/unit/test_route.py @@ -89,35 +89,6 @@ class TestRoute: "example_utterance_5", ] - # TODO add async version - # @pytest.mark.asyncio - # @patch("semantic_router.route.allm", new_callable=Mock) - # async def test_generate_dynamic_route_async(self, mock_llm): - # print(f"mock_llm: {mock_llm}") - # mock_llm.return_value = """ - # <config> - # { - # "name": "test_function", - # "utterances": [ - # "example_utterance_1", - # "example_utterance_2", - # "example_utterance_3", - # "example_utterance_4", - # "example_utterance_5"] - # } - # </config> - # """ - # function_schemas = [{"name": "test_function", "type": "function"}] - # route = await Route._generate_dynamic_route(function_schemas) - # assert route.name == "test_function" - # assert route.utterances == [ - # "example_utterance_1", - # "example_utterance_2", - # "example_utterance_3", - # "example_utterance_4", - # "example_utterance_5", - # ] - def test_to_dict(self): route = Route(name="test", utterances=["utterance"]) expected_dict = { diff --git a/tests/unit/test_sync.py b/tests/unit/test_sync.py index e37612d1234d0cc301da2628183eb554593fe7d7..296da0d21d851f8e22aaf157069387fef7a6f44d 100644 --- a/tests/unit/test_sync.py +++ b/tests/unit/test_sync.py @@ -303,11 +303,11 @@ class TestRouteLayer: @pytest.mark.skipif( os.environ.get("PINECONE_API_KEY") is None, reason="Pinecone API key required" ) - def test_auto_sync_merge_force_remote( + def test_auto_sync_merge_force_local( self, openai_encoder, routes, routes_2, index_cls ): if index_cls is PineconeIndex: - # TEST MERGE FORCE REMOTE + # TEST MERGE FORCE LOCAL pinecone_index = init_index(index_cls) route_layer = RouteLayer( encoder=openai_encoder, @@ -320,7 +320,7 @@ class TestRouteLayer: encoder=openai_encoder, routes=routes_2, index=pinecone_index, - auto_sync="merge-force-remote", + auto_sync="merge-force-local", ) time.sleep(PINECONE_SLEEP) # allow for index to be populated # confirm local and remote are synced @@ -341,7 +341,7 @@ class TestRouteLayer: @pytest.mark.skipif( os.environ.get("PINECONE_API_KEY") is None, reason="Pinecone API key required" ) - def test_auto_sync_merge_force_local( + def test_auto_sync_merge_force_remote( self, openai_encoder, routes, routes_2, index_cls ): if index_cls is PineconeIndex: @@ -358,7 +358,7 @@ class TestRouteLayer: encoder=openai_encoder, routes=routes_2, index=pinecone_index, - auto_sync="merge-force-local", + auto_sync="merge-force-remote", ) time.sleep(PINECONE_SLEEP) # allow for index to be populated # confirm local and remote are synced