diff --git a/pyproject.toml b/pyproject.toml index 1db92298c89558bdea75590d9b3679c71fbcb149..24c1fa09f7918f8ee0b3c6988086effa1093ca97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "semantic-router" -version = "0.0.23" +version = "0.0.24" description = "Super fast semantic router for AI decision making" authors = [ "James Briggs <james@aurelio.ai>", diff --git a/semantic_router/__init__.py b/semantic_router/__init__.py index 7ac0c93ebe77c6c37c9cd8c960db48a399adac11..d810106abbd425f673391ada9a802561532318b3 100644 --- a/semantic_router/__init__.py +++ b/semantic_router/__init__.py @@ -4,4 +4,4 @@ from semantic_router.route import Route __all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"] -__version__ = "0.0.23" +__version__ = "0.0.24" diff --git a/semantic_router/splitters/rolling_window.py b/semantic_router/splitters/rolling_window.py index ca9eed95ff1105c7e69591034d66692f40ae4193..0e7c651de4fbe3e113e6ad0313f7b2d2ce1355d3 100644 --- a/semantic_router/splitters/rolling_window.py +++ b/semantic_router/splitters/rolling_window.py @@ -14,6 +14,7 @@ class RollingWindowSplitter(BaseSplitter): self, encoder: BaseEncoder, threshold_adjustment=0.01, + dynamic_threshold: bool = True, window_size=5, min_split_tokens=100, max_split_tokens=300, @@ -25,6 +26,7 @@ class RollingWindowSplitter(BaseSplitter): self.calculated_threshold: float self.encoder = encoder self.threshold_adjustment = threshold_adjustment + self.dynamic_threshold = dynamic_threshold self.window_size = window_size self.plot_splits = plot_splits self.min_split_tokens = min_split_tokens @@ -321,7 +323,10 @@ class RollingWindowSplitter(BaseSplitter): ) docs = split_to_sentences(docs[0]) encoded_docs = self.encode_documents(docs) - self.find_optimal_threshold(docs, encoded_docs) + if self.dynamic_threshold: + self.find_optimal_threshold(docs, encoded_docs) + else: + self.calculated_threshold = self.encoder.score_threshold similarities = self.calculate_similarity_scores(encoded_docs) split_indices = self.find_split_indices(similarities=similarities) splits = self.split_documents(docs, split_indices, similarities)