From ff2ed5f94090e66b606e516c5bb63cb96ae71178 Mon Sep 17 00:00:00 2001
From: Simonas <20096648+simjak@users.noreply.github.com>
Date: Fri, 23 Feb 2024 11:10:19 +0200
Subject: [PATCH] chore: lint

---
 Makefile                                     |  4 ++--
 semantic_router/splitters/base.py            |  3 +--
 semantic_router/splitters/consecutive_sim.py |  3 ++-
 semantic_router/splitters/cumulative_sim.py  | 16 ++++++++++------
 semantic_router/splitters/rolling_window.py  |  3 +--
 semantic_router/splitters/utils.py           |  6 ------
 6 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/Makefile b/Makefile
index 3891e912..a7c69643 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 format:
-	poetry run black --target-version py39 .
+	poetry run black --target-version py39 -l 88 .
 	poetry run ruff --select I --fix .
 
 PYTHON_FILES=.
@@ -7,7 +7,7 @@ lint: PYTHON_FILES=.
 lint_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$')
 
 lint lint_diff:
-	poetry run black --target-version py39 $(PYTHON_FILES) --check
+	poetry run black --target-version py39 -l 88 $(PYTHON_FILES) --check
 	poetry run ruff .
 	poetry run mypy $(PYTHON_FILES)
 
diff --git a/semantic_router/splitters/base.py b/semantic_router/splitters/base.py
index 8e68cd04..0514e014 100644
--- a/semantic_router/splitters/base.py
+++ b/semantic_router/splitters/base.py
@@ -1,4 +1,4 @@
-from typing import List, Optional
+from typing import List
 
 from colorama import Fore, Style
 from pydantic.v1 import BaseModel, Extra
@@ -10,7 +10,6 @@ from semantic_router.schema import DocumentSplit
 class BaseSplitter(BaseModel):
     name: str
     encoder: BaseEncoder
-    score_threshold: Optional[float]
 
     class Config:
         extra = Extra.allow
diff --git a/semantic_router/splitters/consecutive_sim.py b/semantic_router/splitters/consecutive_sim.py
index f30bbc75..4a2e1106 100644
--- a/semantic_router/splitters/consecutive_sim.py
+++ b/semantic_router/splitters/consecutive_sim.py
@@ -19,8 +19,9 @@ class ConsecutiveSimSplitter(BaseSplitter):
         name: str = "consecutive_similarity_splitter",
         score_threshold: float = 0.45,
     ):
-        super().__init__(name=name, score_threshold=score_threshold, encoder=encoder)
+        super().__init__(name=name, encoder=encoder)
         encoder.score_threshold = score_threshold
+        self.score_threshold = score_threshold
 
     def __call__(self, docs: List[Any]):
         # Check if there's only a single document
diff --git a/semantic_router/splitters/cumulative_sim.py b/semantic_router/splitters/cumulative_sim.py
index f7a6475a..e9dd8deb 100644
--- a/semantic_router/splitters/cumulative_sim.py
+++ b/semantic_router/splitters/cumulative_sim.py
@@ -8,9 +8,9 @@ from semantic_router.splitters.base import BaseSplitter
 
 
 class CumulativeSimSplitter(BaseSplitter):
-
     """
-    Called "cumulative sim" because we check the similarities of the embeddings of cumulative concatenated documents with the next document.
+    Called "cumulative sim" because we check the similarities of the
+    embeddings of cumulative concatenated documents with the next document.
     """
 
     def __init__(
@@ -19,15 +19,17 @@ class CumulativeSimSplitter(BaseSplitter):
         name: str = "cumulative_similarity_splitter",
         score_threshold: float = 0.45,
     ):
-        super().__init__(name=name, score_threshold=score_threshold, encoder=encoder)
+        super().__init__(name=name, encoder=encoder)
         encoder.score_threshold = score_threshold
+        self.score_threshold = score_threshold
 
     def __call__(self, docs: List[str]):
         total_docs = len(docs)
         # Check if there's only a single document
         if total_docs == 1:
             raise ValueError(
-                "There is only one document provided; at least two are required to determine topics based on similarity."
+                "There is only one document provided; at least two are required "
+                "to determine topics based on similarity."
             )
         splits = []
         curr_split_start_idx = 0
@@ -35,10 +37,12 @@ class CumulativeSimSplitter(BaseSplitter):
         for idx in range(0, total_docs):
             if idx + 1 < total_docs:  # Ensure there is a next document to compare with.
                 if idx == 0:
-                    # On the first iteration, compare the first document directly to the second.
+                    # On the first iteration, compare the
+                    # first document directly to the second.
                     curr_split_docs = docs[idx]
                 else:
-                    # For subsequent iterations, compare cumulative documents up to the current one with the next.
+                    # For subsequent iterations, compare cumulative
+                    # documents up to the current one with the next.
                     curr_split_docs = "\n".join(docs[curr_split_start_idx : idx + 1])
                 next_doc = docs[idx + 1]
 
diff --git a/semantic_router/splitters/rolling_window.py b/semantic_router/splitters/rolling_window.py
index 4a746f7c..0369c746 100644
--- a/semantic_router/splitters/rolling_window.py
+++ b/semantic_router/splitters/rolling_window.py
@@ -9,7 +9,6 @@ from semantic_router.splitters.utils import split_to_sentences, tiktoken_length
 from semantic_router.utils.logger import logger
 
 
-
 class RollingWindowSplitter(BaseSplitter):
     def __init__(
         self,
@@ -20,7 +19,7 @@ class RollingWindowSplitter(BaseSplitter):
         max_split_tokens=300,
         split_tokens_tolerance=10,
         plot_splits=False,
-        name = "rolling_window_splitter",
+        name="rolling_window_splitter",
     ):
         super().__init__(name=name, encoder=encoder)
         self.calculated_threshold: float
diff --git a/semantic_router/splitters/utils.py b/semantic_router/splitters/utils.py
index 296df2ad..349c3eaa 100644
--- a/semantic_router/splitters/utils.py
+++ b/semantic_router/splitters/utils.py
@@ -1,10 +1,5 @@
-from typing import List
-
 import regex
 import tiktoken
-from colorama import Fore, Style
-
-from semantic_router.schema import DocumentSplit
 
 
 def split_to_sentences(text: str) -> list[str]:
@@ -66,4 +61,3 @@ def tiktoken_length(text: str) -> int:
     tokenizer = tiktoken.get_encoding("cl100k_base")
     tokens = tokenizer.encode(text, disallowed_special=())
     return len(tokens)
-
-- 
GitLab