From ff1161f2cedd1dd90a6a81c6cc91837990912a27 Mon Sep 17 00:00:00 2001 From: Simonas <20096648+simjak@users.noreply.github.com> Date: Fri, 26 Apr 2024 20:22:56 +0300 Subject: [PATCH] Revert "fix: Added fix to encode documents within rolling window" This reverts commit 7bb47b3efa90c8ea21b7ac756158a7f70f4bc04d. --- semantic_router/splitters/rolling_window.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/semantic_router/splitters/rolling_window.py b/semantic_router/splitters/rolling_window.py index 2e02253d..a2809ff5 100644 --- a/semantic_router/splitters/rolling_window.py +++ b/semantic_router/splitters/rolling_window.py @@ -100,19 +100,12 @@ class RollingWindowSplitter(BaseSplitter): return splits def _encode_documents(self, docs: List[str]) -> np.ndarray: - max_docs_per_batch = 2000 # OpenAI limit is 2048 - embeddings = [] - - for i in range(0, len(docs), max_docs_per_batch): - batch_docs = docs[i : i + max_docs_per_batch] - try: - batch_embeddings = self.encoder(batch_docs) - embeddings.extend(batch_embeddings) - except Exception as e: - logger.error(f"Error encoding documents {batch_docs}: {e}") - raise - - return np.array(embeddings) + try: + embeddings = self.encoder(docs) + return np.array(embeddings) + except Exception as e: + logger.error(f"Error encoding documents {docs}: {e}") + raise def _calculate_similarity_scores(self, encoded_docs: np.ndarray) -> List[float]: raw_similarities = [] -- GitLab