Skip to content
Snippets Groups Projects
Unverified Commit 0445421e authored by Simonas Jakubonis's avatar Simonas Jakubonis Committed by GitHub
Browse files

Merge branch 'main' into simonas/unstructured-splitting-example

parents de02b3a3 4feb4314
No related branches found
No related tags found
No related merge requests found
[tool.poetry]
name = "semantic-router"
version = "0.0.23"
version = "0.0.24"
description = "Super fast semantic router for AI decision making"
authors = [
"James Briggs <james@aurelio.ai>",
......
......@@ -4,4 +4,4 @@ from semantic_router.route import Route
__all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"]
__version__ = "0.0.23"
__version__ = "0.0.24"
......@@ -14,6 +14,7 @@ class RollingWindowSplitter(BaseSplitter):
self,
encoder: BaseEncoder,
threshold_adjustment=0.01,
dynamic_threshold: bool = True,
window_size=5,
min_split_tokens=100,
max_split_tokens=300,
......@@ -25,6 +26,7 @@ class RollingWindowSplitter(BaseSplitter):
self.calculated_threshold: float
self.encoder = encoder
self.threshold_adjustment = threshold_adjustment
self.dynamic_threshold = dynamic_threshold
self.window_size = window_size
self.plot_splits = plot_splits
self.min_split_tokens = min_split_tokens
......@@ -321,7 +323,10 @@ class RollingWindowSplitter(BaseSplitter):
)
docs = split_to_sentences(docs[0])
encoded_docs = self.encode_documents(docs)
self.find_optimal_threshold(docs, encoded_docs)
if self.dynamic_threshold:
self.find_optimal_threshold(docs, encoded_docs)
else:
self.calculated_threshold = self.encoder.score_threshold
similarities = self.calculate_similarity_scores(encoded_docs)
split_indices = self.find_split_indices(similarities=similarities)
splits = self.split_documents(docs, split_indices, similarities)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment