From 1ab26d7323ef35e73487972949d00b4818aa77d5 Mon Sep 17 00:00:00 2001 From: Simonas <20096648+simjak@users.noreply.github.com> Date: Wed, 17 Apr 2024 09:16:52 +0300 Subject: [PATCH] fix: Hard split for max token size --- semantic_router/splitters/rolling_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/semantic_router/splitters/rolling_window.py b/semantic_router/splitters/rolling_window.py index 2f80ff3b..fcd520e9 100644 --- a/semantic_router/splitters/rolling_window.py +++ b/semantic_router/splitters/rolling_window.py @@ -215,7 +215,7 @@ class RollingWindowSplitter(BaseSplitter): logger.debug(f"Document token count: {doc_token_count} tokens") # Check if current index is a split point based on similarity if doc_idx + 1 in split_indices: - if current_tokens_count + doc_token_count >= self.min_split_tokens: + if self.min_split_tokens <= current_tokens_count + doc_token_count < self.max_split_tokens: # Include the current document before splitting # if it doesn't exceed the max limit current_split.append(doc) -- GitLab