Skip to content
Snippets Groups Projects
Commit ee0d792b authored by Juan Pablo Mesa Lopez's avatar Juan Pablo Mesa Lopez
Browse files

Added fix to _encode_documents within rolling_window.py when len(docs)> 2048 openai limit

parent 302fe173
No related branches found
No related tags found
No related merge requests found
...@@ -100,12 +100,19 @@ class RollingWindowSplitter(BaseSplitter): ...@@ -100,12 +100,19 @@ class RollingWindowSplitter(BaseSplitter):
return splits return splits
def _encode_documents(self, docs: List[str]) -> np.ndarray: def _encode_documents(self, docs: List[str]) -> np.ndarray:
try: max_docs_per_batch = 2000 # OpenAI limit is 2048
embeddings = self.encoder(docs) embeddings = []
return np.array(embeddings)
except Exception as e: for i in range(0, len(docs), max_docs_per_batch):
logger.error(f"Error encoding documents {docs}: {e}") batch_docs = docs[i : i + max_docs_per_batch]
raise try:
batch_embeddings = self.encoder(batch_docs)
embeddings.extend(batch_embeddings)
except Exception as e:
logger.error(f"Error encoding documents {batch_docs}: {e}")
raise
return np.array(embeddings)
def _calculate_similarity_scores(self, encoded_docs: np.ndarray) -> List[float]: def _calculate_similarity_scores(self, encoded_docs: np.ndarray) -> List[float]:
raw_similarities = [] raw_similarities = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment