Skip to content
Snippets Groups Projects
Unverified Commit 0445421e authored by Simonas Jakubonis's avatar Simonas Jakubonis Committed by GitHub
Browse files

Merge branch 'main' into simonas/unstructured-splitting-example

parents de02b3a3 4feb4314
No related branches found
No related tags found
No related merge requests found
[tool.poetry] [tool.poetry]
name = "semantic-router" name = "semantic-router"
version = "0.0.23" version = "0.0.24"
description = "Super fast semantic router for AI decision making" description = "Super fast semantic router for AI decision making"
authors = [ authors = [
"James Briggs <james@aurelio.ai>", "James Briggs <james@aurelio.ai>",
......
...@@ -4,4 +4,4 @@ from semantic_router.route import Route ...@@ -4,4 +4,4 @@ from semantic_router.route import Route
__all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"] __all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"]
__version__ = "0.0.23" __version__ = "0.0.24"
...@@ -14,6 +14,7 @@ class RollingWindowSplitter(BaseSplitter): ...@@ -14,6 +14,7 @@ class RollingWindowSplitter(BaseSplitter):
self, self,
encoder: BaseEncoder, encoder: BaseEncoder,
threshold_adjustment=0.01, threshold_adjustment=0.01,
dynamic_threshold: bool = True,
window_size=5, window_size=5,
min_split_tokens=100, min_split_tokens=100,
max_split_tokens=300, max_split_tokens=300,
...@@ -25,6 +26,7 @@ class RollingWindowSplitter(BaseSplitter): ...@@ -25,6 +26,7 @@ class RollingWindowSplitter(BaseSplitter):
self.calculated_threshold: float self.calculated_threshold: float
self.encoder = encoder self.encoder = encoder
self.threshold_adjustment = threshold_adjustment self.threshold_adjustment = threshold_adjustment
self.dynamic_threshold = dynamic_threshold
self.window_size = window_size self.window_size = window_size
self.plot_splits = plot_splits self.plot_splits = plot_splits
self.min_split_tokens = min_split_tokens self.min_split_tokens = min_split_tokens
...@@ -321,7 +323,10 @@ class RollingWindowSplitter(BaseSplitter): ...@@ -321,7 +323,10 @@ class RollingWindowSplitter(BaseSplitter):
) )
docs = split_to_sentences(docs[0]) docs = split_to_sentences(docs[0])
encoded_docs = self.encode_documents(docs) encoded_docs = self.encode_documents(docs)
self.find_optimal_threshold(docs, encoded_docs) if self.dynamic_threshold:
self.find_optimal_threshold(docs, encoded_docs)
else:
self.calculated_threshold = self.encoder.score_threshold
similarities = self.calculate_similarity_scores(encoded_docs) similarities = self.calculate_similarity_scores(encoded_docs)
split_indices = self.find_split_indices(similarities=similarities) split_indices = self.find_split_indices(similarities=similarities)
splits = self.split_documents(docs, split_indices, similarities) splits = self.split_documents(docs, split_indices, similarities)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment