From 1ab26d7323ef35e73487972949d00b4818aa77d5 Mon Sep 17 00:00:00 2001
From: Simonas <20096648+simjak@users.noreply.github.com>
Date: Wed, 17 Apr 2024 09:16:52 +0300
Subject: [PATCH] fix: Hard split for max token size

---
 semantic_router/splitters/rolling_window.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/semantic_router/splitters/rolling_window.py b/semantic_router/splitters/rolling_window.py
index 2f80ff3b..fcd520e9 100644
--- a/semantic_router/splitters/rolling_window.py
+++ b/semantic_router/splitters/rolling_window.py
@@ -215,7 +215,7 @@ class RollingWindowSplitter(BaseSplitter):
             logger.debug(f"Document token count: {doc_token_count} tokens")
             # Check if current index is a split point based on similarity
             if doc_idx + 1 in split_indices:
-                if current_tokens_count + doc_token_count >= self.min_split_tokens:
+                if self.min_split_tokens <= current_tokens_count + doc_token_count < self.max_split_tokens:
                     # Include the current document before splitting
                     # if it doesn't exceed the max limit
                     current_split.append(doc)
-- 
GitLab