diff --git a/semantic_router/routers/base.py b/semantic_router/routers/base.py index 0d47f939b3c8b84a759edc0ed7bca21f9e150b04..556d0f51278b2ad518faa454ec6a41bcfc13e05c 100644 --- a/semantic_router/routers/base.py +++ b/semantic_router/routers/base.py @@ -428,8 +428,19 @@ class BaseRouter(BaseModel): vector = self._encode(text=[text]) # convert to numpy array if not already vector = xq_reshape(vector) - # calculate semantics - route, top_class_scores = self._retrieve_top_route(vector, route_filter) + # get scores and routes + scores, routes = self.index.query( + vector=vector[0], top_k=self.top_k, route_filter=route_filter + ) + query_results = [ + {"route": d, "score": s.item()} for d, s in zip(routes, scores) + ] + # decide most relevant routes + top_class, top_class_scores = self._semantic_classify( + query_results=query_results + ) + # TODO do we need this check? + route = self.check_for_matching_routes(top_class) passed = self._check_threshold(top_class_scores, route) if passed and route is not None and not simulate_static: if route.function_schemas and text is None: @@ -473,10 +484,19 @@ class BaseRouter(BaseModel): vector = await self._async_encode(text=[text]) # convert to numpy array if not already vector = xq_reshape(vector) - # calculate semantics - route, top_class_scores = await self._async_retrieve_top_route( - vector, route_filter + # get scores and routes + scores, routes = await self.index.aquery( + vector=vector[0], top_k=self.top_k, route_filter=route_filter + ) + query_results = [ + {"route": d, "score": s.item()} for d, s in zip(routes, scores) + ] + # decide most relevant routes + top_class, top_class_scores = await self._async_semantic_classify( + query_results=query_results ) + # TODO do we need this check? + route = self.check_for_matching_routes(top_class) passed = self._check_threshold(top_class_scores, route) if passed and route is not None and not simulate_static: if route.function_schemas and text is None: @@ -503,66 +523,6 @@ class BaseRouter(BaseModel): # if no route passes threshold, return empty route choice return RouteChoice() - # TODO: add multiple routes return to __call__ and acall - @deprecated("This method is deprecated. Use `__call__` instead.") - def retrieve_multiple_routes( - self, - text: Optional[str] = None, - vector: Optional[List[float] | np.ndarray] = None, - ) -> List[RouteChoice]: - if vector is None: - if text is None: - raise ValueError("Either text or vector must be provided") - vector = self._encode(text=[text]) - # convert to numpy array if not already - vector = xq_reshape(vector) - # get relevant utterances - results = self._retrieve(xq=vector) - # decide most relevant routes - categories_with_scores = self._semantic_classify_multiple_routes(results) - return [ - RouteChoice(name=category, similarity_score=score) - for category, score in categories_with_scores - ] - - # route_choices = [] - # TODO JB: do we need this check? Maybe we should be returning directly - # for category, score in categories_with_scores: - # route = self.check_for_matching_routes(category) - # if route: - # route_choice = RouteChoice(name=route.name, similarity_score=score) - # route_choices.append(route_choice) - - # return route_choices - - def _retrieve_top_route( - self, vector: np.ndarray, route_filter: Optional[List[str]] = None - ) -> Tuple[Optional[Route], List[float]]: - """ - Retrieve the top matching route based on the given vector. - Returns a tuple of the route (if any) and the scores of the top class. - """ - # get relevant results (scores and routes) - results = self._retrieve(xq=vector, top_k=self.top_k, route_filter=route_filter) - # decide most relevant routes - top_class, top_class_scores = self._semantic_classify(results) - # TODO do we need this check? - route = self.check_for_matching_routes(top_class) - return route, top_class_scores - - async def _async_retrieve_top_route( - self, vector: np.ndarray, route_filter: Optional[List[str]] = None - ) -> Tuple[Optional[Route], List[float]]: - # get relevant results (scores and routes) - results = await self._async_retrieve( - xq=vector, top_k=self.top_k, route_filter=route_filter - ) - # decide most relevant routes - top_class, top_class_scores = await self._async_semantic_classify(results) - # TODO do we need this check? - route = self.check_for_matching_routes(top_class) - return route, top_class_scores - def sync(self, sync_mode: str, force: bool = False, wait: int = 0) -> List[str]: """Runs a sync of the local routes with the remote index. @@ -1116,26 +1076,6 @@ class BaseRouter(BaseModel): # TODO: should encode "content" rather than text raise NotImplementedError("This method should be implemented by subclasses.") - def _retrieve( - self, xq: Any, top_k: int = 5, route_filter: Optional[List[str]] = None - ) -> List[Dict]: - """Given a query vector, retrieve the top_k most similar records.""" - # get scores and routes - scores, routes = self.index.query( - vector=xq[0], top_k=top_k, route_filter=route_filter - ) - return [{"route": d, "score": s.item()} for d, s in zip(routes, scores)] - - async def _async_retrieve( - self, xq: Any, top_k: int = 5, route_filter: Optional[List[str]] = None - ) -> List[Dict]: - """Given a query vector, retrieve the top_k most similar records.""" - # get scores and routes - scores, routes = await self.index.aquery( - vector=xq[0], top_k=top_k, route_filter=route_filter - ) - return [{"route": d, "score": s.item()} for d, s in zip(routes, scores)] - def _set_aggregation_method(self, aggregation: str = "sum"): # TODO is this really needed? if aggregation == "sum": @@ -1149,6 +1089,7 @@ class BaseRouter(BaseModel): f"Unsupported aggregation method chosen: {aggregation}. Choose either 'SUM', 'MEAN', or 'MAX'." ) + # TODO JB allow return of multiple routes def _semantic_classify(self, query_results: List[Dict]) -> Tuple[str, List[float]]: """Classify the query results into a single class based on the highest total score. If no classification is found, return an empty string and an empty list. @@ -1216,6 +1157,7 @@ class BaseRouter(BaseModel): logger.error(f"Route `{name}` not found") return None + @deprecated("This method is deprecated. Use `semantic_classify` instead.") def _semantic_classify_multiple_routes( self, query_results: List[Dict] ) -> List[Tuple[str, float]]: @@ -1243,6 +1185,7 @@ class BaseRouter(BaseModel): self, query_results: List[Dict] ) -> Dict[str, List[float]]: scores_by_class: Dict[str, List[float]] = {} + logger.warning(f"JBTEMP: {query_results=}") for result in query_results: score = result["score"] route = result["route"] diff --git a/semantic_router/routers/hybrid.py b/semantic_router/routers/hybrid.py index 0bb0574ba15577b24ea70c017fbd66529a7412ee..8ab312853520e45ce8948b59285291183d54ad5a 100644 --- a/semantic_router/routers/hybrid.py +++ b/semantic_router/routers/hybrid.py @@ -77,6 +77,7 @@ class HybridRouter(BaseRouter): if current_remote_hash.value == "": # if remote hash is empty, the index is to be initialized current_remote_hash = current_local_hash + logger.warning(f"JBTEMP: {routes}") if isinstance(routes, Route): routes = [routes] # create embeddings for all routes @@ -220,16 +221,18 @@ class HybridRouter(BaseRouter): raise ValueError("Sparse vector is required for HybridLocalIndex.") # TODO: add alpha as a parameter scores, route_names = self.index.query( - vector=vector, + vector=vector[0], top_k=self.top_k, route_filter=route_filter, sparse_vector=sparse_vector, ) + query_results = [ + {"route": d, "score": s.item()} for d, s in zip(route_names, scores) + ] + # TODO JB we should probably make _semantic_classify consume arrays rather than + # needing to convert to list here top_class, top_class_scores = self._semantic_classify( - [ - {"score": score, "route": route} - for score, route in zip(scores, route_names) - ] + query_results=query_results ) passed = self._pass_threshold(top_class_scores, self.score_threshold) if passed: diff --git a/tests/unit/test_router.py b/tests/unit/test_router.py index b2b87b9339ec6de935a8ef9fa6ffd03f15e5dc42..bd215242b82575843e035f610e6b586e33627836 100644 --- a/tests/unit/test_router.py +++ b/tests/unit/test_router.py @@ -41,13 +41,16 @@ def init_index( index_cls, dimensions: Optional[int] = None, namespace: Optional[str] = "", + index_name: Optional[str] = None, ): """We use this function to initialize indexes with different names to avoid issues during testing. """ if index_cls is PineconeIndex: + # we specify different index names to avoid dimensionality issues between different encoders + index_name = TEST_ID if not index_name else f"{TEST_ID}-{index_name.lower()}" index = index_cls( - index_name=TEST_ID, dimensions=dimensions, namespace=namespace + index_name=index_name, dimensions=dimensions, namespace=namespace ) else: index = index_cls() @@ -108,12 +111,28 @@ def base_encoder(): @pytest.fixture def cohere_encoder(mocker): mocker.patch.object(CohereEncoder, "__call__", side_effect=mock_encoder_call) + + # Mock async call + async def async_mock_encoder_call(docs=None, utterances=None): + # Handle either docs or utterances parameter + texts = docs if docs is not None else utterances + return mock_encoder_call(texts) + + mocker.patch.object(CohereEncoder, "acall", side_effect=async_mock_encoder_call) return CohereEncoder(name="test-cohere-encoder", cohere_api_key="test_api_key") @pytest.fixture def openai_encoder(mocker): mocker.patch.object(OpenAIEncoder, "__call__", side_effect=mock_encoder_call) + + # Mock async call + async def async_mock_encoder_call(docs=None, utterances=None): + # Handle either docs or utterances parameter + texts = docs if docs is not None else utterances + return mock_encoder_call(texts) + + mocker.patch.object(OpenAIEncoder, "acall", side_effect=async_mock_encoder_call) return OpenAIEncoder(name="text-embedding-3-small", openai_api_key="test_api_key") @@ -218,12 +237,11 @@ def get_test_routers(): ], ) class TestIndexEncoders: - def test_initialization( - self, routes, openai_encoder, index_cls, encoder_cls, router_cls - ): - index = init_index(index_cls) + def test_initialization(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) route_layer = router_cls( - encoder=encoder_cls(), + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -232,8 +250,7 @@ class TestIndexEncoders: if index_cls is PineconeIndex: time.sleep(PINECONE_SLEEP) # allow for index to be populated - assert openai_encoder.score_threshold == 0.3 - assert route_layer.score_threshold == 0.3 + assert route_layer.score_threshold == encoder.score_threshold assert route_layer.top_k == 10 assert len(route_layer.index) == 5 assert ( @@ -245,69 +262,293 @@ class TestIndexEncoders: def test_initialization_different_encoders( self, encoder_cls, index_cls, router_cls ): - index = init_index(index_cls) encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) route_layer = router_cls(encoder=encoder, index=index) assert route_layer.score_threshold == encoder.score_threshold - def test_initialization_no_encoder(self, openai_encoder, index_cls, router_cls): + def test_initialization_no_encoder(self, index_cls, encoder_cls, router_cls): os.environ["OPENAI_API_KEY"] = "test_api_key" route_layer_none = router_cls(encoder=None) - assert route_layer_none.score_threshold == openai_encoder.score_threshold + assert route_layer_none.score_threshold == 0.3 + + +class TestRouterConfig: + def test_from_file_json(self, tmp_path): + # Create a temporary JSON file with layer configuration + config_path = tmp_path / "config.json" + config_path.write_text( + layer_json() + ) # Assuming layer_json() returns a valid JSON string + + # Load the RouterConfig from the temporary file + layer_config = RouterConfig.from_file(str(config_path)) + + # Assertions to verify the loaded configuration + assert layer_config.encoder_type == "cohere" + assert layer_config.encoder_name == "embed-english-v3.0" + assert len(layer_config.routes) == 2 + assert layer_config.routes[0].name == "politics" + + def test_from_file_yaml(self, tmp_path): + # Create a temporary YAML file with layer configuration + config_path = tmp_path / "config.yaml" + config_path.write_text( + layer_yaml() + ) # Assuming layer_yaml() returns a valid YAML string + + # Load the RouterConfig from the temporary file + layer_config = RouterConfig.from_file(str(config_path)) + + # Assertions to verify the loaded configuration + assert layer_config.encoder_type == "cohere" + assert layer_config.encoder_name == "embed-english-v3.0" + assert len(layer_config.routes) == 2 + assert layer_config.routes[0].name == "politics" + + def test_from_file_invalid_path(self): + with pytest.raises(FileNotFoundError) as excinfo: + RouterConfig.from_file("nonexistent_path.json") + assert "[Errno 2] No such file or directory: 'nonexistent_path.json'" in str( + excinfo.value + ) + + def test_from_file_unsupported_type(self, tmp_path): + # Create a temporary unsupported file + config_path = tmp_path / "config.unsupported" + config_path.write_text(layer_json()) + + with pytest.raises(ValueError) as excinfo: + RouterConfig.from_file(str(config_path)) + assert "Unsupported file type" in str(excinfo.value) + + def test_from_file_invalid_config(self, tmp_path): + # Define an invalid configuration JSON + invalid_config_json = """ + { + "encoder_type": "cohere", + "encoder_name": "embed-english-v3.0", + "routes": "This should be a list, not a string" + }""" + + # Write the invalid configuration to a temporary JSON file + config_path = tmp_path / "invalid_config.json" + with open(config_path, "w") as file: + file.write(invalid_config_json) + + # Patch the is_valid function to return False for this test + with patch("semantic_router.routers.base.is_valid", return_value=False): + # Attempt to load the RouterConfig from the temporary file + # and assert that it raises an exception due to invalid configuration + with pytest.raises(Exception) as excinfo: + RouterConfig.from_file(str(config_path)) + assert "Invalid config JSON or YAML" in str( + excinfo.value + ), "Loading an invalid configuration should raise an exception." + + def test_from_file_with_llm(self, tmp_path): + llm_config_json = """ + { + "encoder_type": "cohere", + "encoder_name": "embed-english-v3.0", + "routes": [ + { + "name": "llm_route", + "utterances": ["tell me a joke", "say something funny"], + "llm": { + "module": "semantic_router.llms.base", + "class": "BaseLLM", + "model": "fake-model-v1" + } + } + ] + }""" + + config_path = tmp_path / "config_with_llm.json" + with open(config_path, "w") as file: + file.write(llm_config_json) + + # Load the RouterConfig from the temporary file + layer_config = RouterConfig.from_file(str(config_path)) + + # Using BaseLLM because trying to create a usable Mock LLM is a nightmare. + assert isinstance( + layer_config.routes[0].llm, BaseLLM + ), "LLM should be instantiated and associated with the route based on the " + "config" + assert ( + layer_config.routes[0].llm.name == "fake-model-v1" + ), "LLM instance should have the 'name' attribute set correctly" + + def test_init(self): + layer_config = RouterConfig() + assert layer_config.routes == [] + + def test_to_file_json(self): + route = Route(name="test", utterances=["utterance"]) + layer_config = RouterConfig(routes=[route]) + with patch("builtins.open", mock_open()) as mocked_open: + layer_config.to_file("data/test_output.json") + mocked_open.assert_called_once_with("data/test_output.json", "w") + + def test_to_file_yaml(self): + route = Route(name="test", utterances=["utterance"]) + layer_config = RouterConfig(routes=[route]) + with patch("builtins.open", mock_open()) as mocked_open: + layer_config.to_file("data/test_output.yaml") + mocked_open.assert_called_once_with("data/test_output.yaml", "w") + + def test_to_file_invalid(self): + route = Route(name="test", utterances=["utterance"]) + layer_config = RouterConfig(routes=[route]) + with pytest.raises(ValueError): + layer_config.to_file("test_output.txt") + + def test_from_file_invalid(self): + with open("test.txt", "w") as f: + f.write("dummy content") + with pytest.raises(ValueError): + RouterConfig.from_file("test.txt") + os.remove("test.txt") + + def test_to_dict(self): + route = Route(name="test", utterances=["utterance"]) + layer_config = RouterConfig(routes=[route]) + assert layer_config.to_dict()["routes"] == [route.to_dict()] + + def test_add(self): + route = Route(name="test", utterances=["utterance"]) + route2 = Route(name="test2", utterances=["utterance2"]) + layer_config = RouterConfig() + layer_config.add(route) + # confirm route added + assert layer_config.routes == [route] + # add second route and check updates + layer_config.add(route2) + assert layer_config.routes == [route, route2] + + def test_get(self): + route = Route(name="test", utterances=["utterance"]) + layer_config = RouterConfig(routes=[route]) + assert layer_config.get("test") == route + + def test_get_not_found(self): + route = Route(name="test", utterances=["utterance"]) + layer_config = RouterConfig(routes=[route]) + assert layer_config.get("not_found") is None + + def test_remove(self): + route = Route(name="test", utterances=["utterance"]) + layer_config = RouterConfig(routes=[route]) + layer_config.remove("test") + assert layer_config.routes == [] + + def test_setting_aggregation_methods(self, openai_encoder, routes): + for agg in ["sum", "mean", "max"]: + route_layer = SemanticRouter( + encoder=openai_encoder, + routes=routes, + aggregation=agg, + ) + assert route_layer.aggregation == agg + + def test_semantic_classify_multiple_routes_with_different_aggregation( + self, openai_encoder, routes + ): + route_scores = [ + {"route": "Route 1", "score": 0.5}, + {"route": "Route 1", "score": 0.5}, + {"route": "Route 1", "score": 0.5}, + {"route": "Route 1", "score": 0.5}, + {"route": "Route 2", "score": 0.4}, + {"route": "Route 2", "score": 0.6}, + {"route": "Route 2", "score": 0.8}, + {"route": "Route 3", "score": 0.1}, + {"route": "Route 3", "score": 1.0}, + ] + for agg in ["sum", "mean", "max"]: + route_layer = SemanticRouter( + encoder=openai_encoder, + routes=routes, + aggregation=agg, + ) + classification, score = route_layer._semantic_classify(route_scores) + + if agg == "sum": + assert classification == "Route 1" + assert score == [0.5, 0.5, 0.5, 0.5] + elif agg == "mean": + assert classification == "Route 2" + assert score == [0.4, 0.6, 0.8] + elif agg == "max": + assert classification == "Route 3" + assert score == [0.1, 1.0] -@pytest.mark.parametrize("index_cls", get_test_indexes()) +@pytest.mark.parametrize( + "index_cls,encoder_cls,router_cls", + [ + (index, encoder, router) + for index in get_test_indexes() + for encoder in get_test_encoders() + for router in get_test_routers() + ], +) class TestSemanticRouter: def test_initialization_dynamic_route( - self, dynamic_routes, openai_encoder, index_cls + self, dynamic_routes, index_cls, encoder_cls, router_cls ): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=dynamic_routes, index=index, auto_sync="local", ) - assert route_layer.score_threshold == openai_encoder.score_threshold + assert route_layer.score_threshold == encoder.score_threshold def test_add_single_utterance( - self, routes, route_single_utterance, openai_encoder, index_cls + self, routes, route_single_utterance, index_cls, encoder_cls, router_cls ): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", ) route_layer.add(routes=route_single_utterance) - assert route_layer.score_threshold == openai_encoder.score_threshold + assert route_layer.score_threshold == encoder.score_threshold if index_cls is PineconeIndex: time.sleep(PINECONE_SLEEP) # allow for index to be updated _ = route_layer("Hello") assert len(route_layer.index.get_utterances()) == 6 def test_init_and_add_single_utterance( - self, route_single_utterance, openai_encoder, index_cls + self, route_single_utterance, index_cls, encoder_cls, router_cls ): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, index=index, auto_sync="local", ) if index_cls is PineconeIndex: time.sleep(PINECONE_SLEEP) # allow for index to be updated route_layer.add(routes=route_single_utterance) - assert route_layer.score_threshold == openai_encoder.score_threshold + assert route_layer.score_threshold == encoder.score_threshold _ = route_layer("Hello") assert len(route_layer.index.get_utterances()) == 1 - def test_delete_index(self, openai_encoder, routes, index_cls): + def test_delete_index(self, routes, index_cls, encoder_cls, router_cls): # TODO merge .delete_index() and .delete_all() and get working index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + encoder = encoder_cls() + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -319,10 +560,11 @@ class TestSemanticRouter: time.sleep(PINECONE_SLEEP) # allow for index to be updated assert route_layer.index.get_utterances() == [] - def test_add_route(self, routes, openai_encoder, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, routes=[], index=index, auto_sync="local" + def test_add_route(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=[], index=index, auto_sync="local" ) if index_cls is PineconeIndex: time.sleep(PINECONE_SLEEP) # allow for index to be updated @@ -347,10 +589,11 @@ class TestSemanticRouter: assert route_layer.routes == [routes[0], routes[1]] assert len(route_layer.index.get_utterances()) == 5 - def test_list_route_names(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_list_route_names(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -362,10 +605,11 @@ class TestSemanticRouter: route.name for route in routes }, "The list of route names should match the names of the routes added." - def test_delete_route(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_delete_route(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -387,9 +631,10 @@ class TestSemanticRouter: utterance not in route_layer.index ), "The route's utterances should be deleted from the index." - def test_remove_route_not_found(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_remove_route_not_found(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) if index_cls is PineconeIndex: time.sleep(PINECONE_SLEEP) # Attempt to remove a route that does not exist @@ -397,10 +642,11 @@ class TestSemanticRouter: route_layer.delete(non_existent_route) # we should see warning in logs only (ie no errors) - def test_add_multiple_routes(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_add_multiple_routes(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, index=index, auto_sync="local", ) @@ -412,10 +658,11 @@ class TestSemanticRouter: assert route_layer.index is not None assert len(route_layer.index.get_utterances()) == 5 - def test_query_and_classification(self, openai_encoder, routes, index_cls): - index = init_index(index_cls, dimensions=3) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_query_and_classification(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -425,11 +672,12 @@ class TestSemanticRouter: query_result = route_layer(text="Hello").name assert query_result in ["Route 1", "Route 2"] - def test_query_filter(self, openai_encoder, routes, index_cls): - index = init_index(index_cls, dimensions=3) - route_layer = SemanticRouter( - encoder=openai_encoder, - routes=routes, + def test_query_filter(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, + routes=routes, index=index, auto_sync="local", ) @@ -447,11 +695,12 @@ class TestSemanticRouter: @pytest.mark.skipif( os.environ.get("PINECONE_API_KEY") is None, reason="Pinecone API key required" ) - def test_query_filter_pinecone(self, openai_encoder, routes, index_cls): + def test_query_filter_pinecone(self, routes, index_cls, encoder_cls, router_cls): if index_cls is PineconeIndex: - pineconeindex = init_index(index_cls, dimensions=3) - route_layer = SemanticRouter( - encoder=openai_encoder, + encoder = encoder_cls() + pineconeindex = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=pineconeindex, auto_sync="local", @@ -469,11 +718,14 @@ class TestSemanticRouter: @pytest.mark.skipif( os.environ.get("PINECONE_API_KEY") is None, reason="Pinecone API key required" ) - def test_namespace_pinecone_index(self, openai_encoder, routes, index_cls): + def test_namespace_pinecone_index(self, routes, index_cls, encoder_cls, router_cls): if index_cls is PineconeIndex: - pineconeindex = init_index(index_cls, namespace="test") - route_layer = SemanticRouter( - encoder=openai_encoder, + encoder = encoder_cls() + pineconeindex = init_index( + index_cls, namespace="test", index_name=encoder.__class__.__name__ + ) + route_layer = router_cls( + encoder=encoder, routes=routes, index=pineconeindex, auto_sync="local", @@ -489,15 +741,18 @@ class TestSemanticRouter: assert query_result in ["Route 1"] route_layer.index.index.delete(namespace="test", delete_all=True) - def test_query_with_no_index(self, openai_encoder, index_cls): - route_layer = SemanticRouter(encoder=openai_encoder) + def test_query_with_no_index(self, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + route_layer = router_cls(encoder=encoder) + # TODO: probably should avoid running this with multiple encoders or find a way to set dims with pytest.raises(ValueError): assert route_layer(text="Anything").name is None - def test_query_with_vector(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_query_with_vector(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -508,16 +763,20 @@ class TestSemanticRouter: query_result = route_layer(vector=vector).name assert query_result in ["Route 1", "Route 2"] - def test_query_with_no_text_or_vector(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_query_with_no_text_or_vector( + self, routes, index_cls, encoder_cls, router_cls + ): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) with pytest.raises(ValueError): route_layer() - def test_semantic_classify(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_semantic_classify(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -533,10 +792,13 @@ class TestSemanticRouter: assert classification == "Route 1" assert score == [0.9] - def test_semantic_classify_multiple_routes(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_semantic_classify_multiple_routes( + self, routes, index_cls, encoder_cls, router_cls + ): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -554,44 +816,46 @@ class TestSemanticRouter: assert score == [0.9, 0.8] def test_query_no_text_dynamic_route( - self, openai_encoder, dynamic_routes, index_cls + self, dynamic_routes, index_cls, encoder_cls, router_cls ): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, routes=dynamic_routes, index=index - ) + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=dynamic_routes, index=index) vector = [0.1, 0.2, 0.3] with pytest.raises(ValueError): route_layer(vector=vector) - def test_pass_threshold(self, openai_encoder, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_pass_threshold(self, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, index=index, auto_sync="local", ) assert not route_layer._pass_threshold([], 0.3) assert route_layer._pass_threshold([0.6, 0.7], 0.3) - def test_failover_score_threshold(self, openai_encoder, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_failover_score_threshold(self, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, index=index, auto_sync="local", ) assert route_layer.score_threshold == 0.3 - def test_json(self, openai_encoder, routes, index_cls): + def test_json(self, routes, index_cls, encoder_cls, router_cls): temp = tempfile.NamedTemporaryFile(suffix=".yaml", delete=False) try: temp_path = temp.name # Save the temporary file's path temp.close() # Close the file to ensure it can be opened again on Windows os.environ["OPENAI_API_KEY"] = "test_api_key" - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -608,15 +872,16 @@ class TestSemanticRouter: finally: os.remove(temp_path) # Ensure the file is deleted even if the test fails - def test_yaml(self, openai_encoder, routes, index_cls): + def test_yaml(self, routes, index_cls, encoder_cls, router_cls): temp = tempfile.NamedTemporaryFile(suffix=".yaml", delete=False) try: temp_path = temp.name # Save the temporary file's path temp.close() # Close the file to ensure it can be opened again on Windows os.environ["OPENAI_API_KEY"] = "test_api_key" - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -633,116 +898,11 @@ class TestSemanticRouter: finally: os.remove(temp_path) # Ensure the file is deleted even if the test fails - def test_from_file_json(openai_encoder, tmp_path, index_cls): - # Create a temporary JSON file with layer configuration - config_path = tmp_path / "config.json" - config_path.write_text( - layer_json() - ) # Assuming layer_json() returns a valid JSON string - - # Load the RouterConfig from the temporary file - layer_config = RouterConfig.from_file(str(config_path)) - - # Assertions to verify the loaded configuration - assert layer_config.encoder_type == "cohere" - assert layer_config.encoder_name == "embed-english-v3.0" - assert len(layer_config.routes) == 2 - assert layer_config.routes[0].name == "politics" - - def test_from_file_yaml(openai_encoder, tmp_path, index_cls): - # Create a temporary YAML file with layer configuration - config_path = tmp_path / "config.yaml" - config_path.write_text( - layer_yaml() - ) # Assuming layer_yaml() returns a valid YAML string - - # Load the RouterConfig from the temporary file - layer_config = RouterConfig.from_file(str(config_path)) - - # Assertions to verify the loaded configuration - assert layer_config.encoder_type == "cohere" - assert layer_config.encoder_name == "embed-english-v3.0" - assert len(layer_config.routes) == 2 - assert layer_config.routes[0].name == "politics" - - def test_from_file_invalid_path(self, index_cls): - with pytest.raises(FileNotFoundError) as excinfo: - RouterConfig.from_file("nonexistent_path.json") - assert "[Errno 2] No such file or directory: 'nonexistent_path.json'" in str( - excinfo.value - ) - - def test_from_file_unsupported_type(self, tmp_path, index_cls): - # Create a temporary unsupported file - config_path = tmp_path / "config.unsupported" - config_path.write_text(layer_json()) - - with pytest.raises(ValueError) as excinfo: - RouterConfig.from_file(str(config_path)) - assert "Unsupported file type" in str(excinfo.value) - - def test_from_file_invalid_config(self, tmp_path, index_cls): - # Define an invalid configuration JSON - invalid_config_json = """ - { - "encoder_type": "cohere", - "encoder_name": "embed-english-v3.0", - "routes": "This should be a list, not a string" - }""" - - # Write the invalid configuration to a temporary JSON file - config_path = tmp_path / "invalid_config.json" - with open(config_path, "w") as file: - file.write(invalid_config_json) - - # Patch the is_valid function to return False for this test - with patch("semantic_router.routers.base.is_valid", return_value=False): - # Attempt to load the RouterConfig from the temporary file - # and assert that it raises an exception due to invalid configuration - with pytest.raises(Exception) as excinfo: - RouterConfig.from_file(str(config_path)) - assert "Invalid config JSON or YAML" in str( - excinfo.value - ), "Loading an invalid configuration should raise an exception." - - def test_from_file_with_llm(self, tmp_path, index_cls): - llm_config_json = """ - { - "encoder_type": "cohere", - "encoder_name": "embed-english-v3.0", - "routes": [ - { - "name": "llm_route", - "utterances": ["tell me a joke", "say something funny"], - "llm": { - "module": "semantic_router.llms.base", - "class": "BaseLLM", - "model": "fake-model-v1" - } - } - ] - }""" - - config_path = tmp_path / "config_with_llm.json" - with open(config_path, "w") as file: - file.write(llm_config_json) - - # Load the RouterConfig from the temporary file - layer_config = RouterConfig.from_file(str(config_path)) - - # Using BaseLLM because trying to create a usable Mock LLM is a nightmare. - assert isinstance( - layer_config.routes[0].llm, BaseLLM - ), "LLM should be instantiated and associated with the route based on the " - "config" - assert ( - layer_config.routes[0].llm.name == "fake-model-v1" - ), "LLM instance should have the 'name' attribute set correctly" - - def test_config(self, openai_encoder, routes, index_cls): + def test_config(self, routes, index_cls, encoder_cls, router_cls): os.environ["OPENAI_API_KEY"] = "test_api_key" - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) # confirm route creation functions as expected layer_config = route_layer.to_config() assert layer_config.routes == route_layer.routes @@ -755,16 +915,18 @@ class TestSemanticRouter: ) assert route_layer_from_config.score_threshold == route_layer.score_threshold - def test_get_thresholds(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_get_thresholds(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) assert route_layer.get_thresholds() == {"Route 1": 0.3, "Route 2": 0.3} def test_with_multiple_routes_passing_threshold( - self, openai_encoder, routes, index_cls + self, routes, index_cls, encoder_cls, router_cls ): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) route_layer.score_threshold = 0.5 # Set the score_threshold if needed # Assuming route_layer is already set up with routes "Route 1" and "Route 2" query_results = [ @@ -778,9 +940,12 @@ class TestSemanticRouter: expected ), "Should classify and return routes above their thresholds" - def test_with_no_routes_passing_threshold(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_with_no_routes_passing_threshold( + self, routes, index_cls, encoder_cls, router_cls + ): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) # set threshold to 1.0 so that no routes pass route_layer.score_threshold = 1.0 query_results = [ @@ -793,9 +958,10 @@ class TestSemanticRouter: results == expected ), "Should return an empty list when no routes pass their thresholds" - def test_with_no_query_results(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_with_no_query_results(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) route_layer.score_threshold = 0.5 query_results = [] expected = [] @@ -804,9 +970,10 @@ class TestSemanticRouter: results == expected ), "Should return an empty list when there are no query results" - def test_with_unrecognized_route(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_with_unrecognized_route(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) route_layer.score_threshold = 0.5 # Test with a route name that does not exist in the route_layer's routes query_results = [{"route": "UnrecognizedRoute", "score": 0.9}] @@ -814,10 +981,11 @@ class TestSemanticRouter: results = route_layer._semantic_classify_multiple_routes(query_results) assert results == expected, "Should ignore and not return unrecognized routes" - def test_retrieve_with_text(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_retrieve_with_text(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -829,10 +997,11 @@ class TestSemanticRouter: result.name in ["Route 1", "Route 2"] for result in results ), "Expected the result to be either 'Route 1' or 'Route 2'" - def test_retrieve_with_vector(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_retrieve_with_vector(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -846,10 +1015,13 @@ class TestSemanticRouter: result.name in ["Route 1", "Route 2"] for result in results ), "Expected the result to be either 'Route 1' or 'Route 2'" - def test_retrieve_without_text_or_vector(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_retrieve_without_text_or_vector( + self, routes, index_cls, encoder_cls, router_cls + ): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -857,10 +1029,11 @@ class TestSemanticRouter: with pytest.raises(ValueError, match="Either text or vector must be provided"): route_layer.retrieve_multiple_routes() - def test_retrieve_no_matches(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_retrieve_no_matches(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, index=index, auto_sync="local", @@ -871,10 +1044,11 @@ class TestSemanticRouter: results = route_layer.retrieve_multiple_routes(text=text) assert len(results) == 0, f"Expected no results, but got {len(results)}" - def test_retrieve_one_match(self, openai_encoder, routes_3, index_cls): - index = init_index(index_cls, dimensions=3) - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_retrieve_one_match(self, routes_3, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes_3, index=index, auto_sync="local", @@ -890,11 +1064,12 @@ class TestSemanticRouter: assert "Route 1" in matched_routes, "Expected 'Route 1' to be a match" def test_retrieve_with_text_for_multiple_matches( - self, openai_encoder, routes_2, index_cls + self, routes_2, index_cls, encoder_cls, router_cls ): - index = init_index(index_cls) - route_layer = SemanticRouter( - encoder=openai_encoder, + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes_2, index=index, auto_sync="local", @@ -910,10 +1085,11 @@ class TestSemanticRouter: assert "Route 2" in matched_routes, "Expected 'Route 2' to be a match" def test_set_aggregation_method_with_unsupported_value( - self, openai_encoder, routes, index_cls + self, routes, index_cls, encoder_cls, router_cls ): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) unsupported_aggregation = "unsupported_aggregation_method" with pytest.raises( ValueError, @@ -921,17 +1097,21 @@ class TestSemanticRouter: ): route_layer._set_aggregation_method(unsupported_aggregation) - def test_refresh_routes_not_implemented(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_refresh_routes_not_implemented( + self, routes, index_cls, encoder_cls, router_cls + ): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) with pytest.raises( NotImplementedError, match="This method has not yet been implemented." ): route_layer._refresh_routes() - def test_update_threshold(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_update_threshold(self, routes, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) route_name = "Route 1" new_threshold = 0.8 route_layer.update(name=route_name, threshold=new_threshold) @@ -940,9 +1120,12 @@ class TestSemanticRouter: updated_route.score_threshold == new_threshold ), f"Expected threshold to be updated to {new_threshold}, but got {updated_route.score_threshold}" - def test_update_non_existent_route(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_update_non_existent_route( + self, routes, index_cls, encoder_cls, router_cls + ): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) non_existent_route = "Non-existent Route" with pytest.raises( ValueError, @@ -950,18 +1133,24 @@ class TestSemanticRouter: ): route_layer.update(name=non_existent_route, threshold=0.7) - def test_update_without_parameters(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_update_without_parameters( + self, routes, index_cls, encoder_cls, router_cls + ): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) with pytest.raises( ValueError, match="At least one of 'threshold' or 'utterances' must be provided.", ): route_layer.update(name="Route 1") - def test_update_utterances_not_implemented(self, openai_encoder, routes, index_cls): - index = init_index(index_cls) - route_layer = SemanticRouter(encoder=openai_encoder, routes=routes, index=index) + def test_update_utterances_not_implemented( + self, routes, index_cls, encoder_cls, router_cls + ): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls(encoder=encoder, routes=routes, index=index) with pytest.raises( NotImplementedError, match="The update method cannot be used for updating utterances yet.", @@ -969,11 +1158,23 @@ class TestSemanticRouter: route_layer.update(name="Route 1", utterances=["New utterance"]) +@pytest.mark.parametrize( + "index_cls,encoder_cls,router_cls", + [ + (index, encoder, router) + for index in get_test_indexes() + for encoder in get_test_encoders() + for router in get_test_routers() + ], +) class TestLayerFit: - def test_eval(self, openai_encoder, routes, test_data): - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_eval(self, routes, test_data, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, + index=index, auto_sync="local", ) # unpack test data @@ -981,135 +1182,15 @@ class TestLayerFit: # evaluate route_layer.evaluate(X=X, y=y, batch_size=int(len(test_data) / 5)) - def test_fit(self, openai_encoder, routes, test_data): - route_layer = SemanticRouter( - encoder=openai_encoder, + def test_fit(self, routes, test_data, index_cls, encoder_cls, router_cls): + encoder = encoder_cls() + index = init_index(index_cls, index_name=encoder.__class__.__name__) + route_layer = router_cls( + encoder=encoder, routes=routes, + index=index, auto_sync="local", ) # unpack test data X, y = zip(*test_data) route_layer.fit(X=X, y=y, batch_size=int(len(test_data) / 5)) - - -# Add more tests for edge cases and error handling as needed. - - -class TestRouterConfig: - def test_init(self): - layer_config = RouterConfig() - assert layer_config.routes == [] - - def test_to_file_json(self): - route = Route(name="test", utterances=["utterance"]) - layer_config = RouterConfig(routes=[route]) - with patch("builtins.open", mock_open()) as mocked_open: - layer_config.to_file("data/test_output.json") - mocked_open.assert_called_once_with("data/test_output.json", "w") - - def test_to_file_yaml(self): - route = Route(name="test", utterances=["utterance"]) - layer_config = RouterConfig(routes=[route]) - with patch("builtins.open", mock_open()) as mocked_open: - layer_config.to_file("data/test_output.yaml") - mocked_open.assert_called_once_with("data/test_output.yaml", "w") - - def test_to_file_invalid(self): - route = Route(name="test", utterances=["utterance"]) - layer_config = RouterConfig(routes=[route]) - with pytest.raises(ValueError): - layer_config.to_file("test_output.txt") - - def test_from_file_json(self): - mock_json_data = layer_json() - with patch("builtins.open", mock_open(read_data=mock_json_data)) as mocked_open: - layer_config = RouterConfig.from_file("data/test.json") - mocked_open.assert_called_once_with("data/test.json", "r") - assert isinstance(layer_config, RouterConfig) - - def test_from_file_yaml(self): - mock_yaml_data = layer_yaml() - with patch("builtins.open", mock_open(read_data=mock_yaml_data)) as mocked_open: - layer_config = RouterConfig.from_file("data/test.yaml") - mocked_open.assert_called_once_with("data/test.yaml", "r") - assert isinstance(layer_config, RouterConfig) - - def test_from_file_invalid(self): - with open("test.txt", "w") as f: - f.write("dummy content") - with pytest.raises(ValueError): - RouterConfig.from_file("test.txt") - os.remove("test.txt") - - def test_to_dict(self): - route = Route(name="test", utterances=["utterance"]) - layer_config = RouterConfig(routes=[route]) - assert layer_config.to_dict()["routes"] == [route.to_dict()] - - def test_add(self): - route = Route(name="test", utterances=["utterance"]) - route2 = Route(name="test2", utterances=["utterance2"]) - layer_config = RouterConfig() - layer_config.add(route) - # confirm route added - assert layer_config.routes == [route] - # add second route and check updates - layer_config.add(route2) - assert layer_config.routes == [route, route2] - - def test_get(self): - route = Route(name="test", utterances=["utterance"]) - layer_config = RouterConfig(routes=[route]) - assert layer_config.get("test") == route - - def test_get_not_found(self): - route = Route(name="test", utterances=["utterance"]) - layer_config = RouterConfig(routes=[route]) - assert layer_config.get("not_found") is None - - def test_remove(self): - route = Route(name="test", utterances=["utterance"]) - layer_config = RouterConfig(routes=[route]) - layer_config.remove("test") - assert layer_config.routes == [] - - def test_setting_aggregation_methods(self, openai_encoder, routes): - for agg in ["sum", "mean", "max"]: - route_layer = SemanticRouter( - encoder=openai_encoder, - routes=routes, - aggregation=agg, - ) - assert route_layer.aggregation == agg - - def test_semantic_classify_multiple_routes_with_different_aggregation( - self, openai_encoder, routes - ): - route_scores = [ - {"route": "Route 1", "score": 0.5}, - {"route": "Route 1", "score": 0.5}, - {"route": "Route 1", "score": 0.5}, - {"route": "Route 1", "score": 0.5}, - {"route": "Route 2", "score": 0.4}, - {"route": "Route 2", "score": 0.6}, - {"route": "Route 2", "score": 0.8}, - {"route": "Route 3", "score": 0.1}, - {"route": "Route 3", "score": 1.0}, - ] - for agg in ["sum", "mean", "max"]: - route_layer = SemanticRouter( - encoder=openai_encoder, - routes=routes, - aggregation=agg, - ) - classification, score = route_layer._semantic_classify(route_scores) - - if agg == "sum": - assert classification == "Route 1" - assert score == [0.5, 0.5, 0.5, 0.5] - elif agg == "mean": - assert classification == "Route 2" - assert score == [0.4, 0.6, 0.8] - elif agg == "max": - assert classification == "Route 3" - assert score == [0.1, 1.0]