Skip to content
Snippets Groups Projects
Commit ee0d792b authored by Juan Pablo Mesa Lopez's avatar Juan Pablo Mesa Lopez
Browse files

Added fix to _encode_documents within rolling_window.py when len(docs)> 2048 openai limit

parent 302fe173
No related branches found
No related tags found
No related merge requests found
......@@ -100,12 +100,19 @@ class RollingWindowSplitter(BaseSplitter):
return splits
def _encode_documents(self, docs: List[str]) -> np.ndarray:
try:
embeddings = self.encoder(docs)
return np.array(embeddings)
except Exception as e:
logger.error(f"Error encoding documents {docs}: {e}")
raise
max_docs_per_batch = 2000 # OpenAI limit is 2048
embeddings = []
for i in range(0, len(docs), max_docs_per_batch):
batch_docs = docs[i : i + max_docs_per_batch]
try:
batch_embeddings = self.encoder(batch_docs)
embeddings.extend(batch_embeddings)
except Exception as e:
logger.error(f"Error encoding documents {batch_docs}: {e}")
raise
return np.array(embeddings)
def _calculate_similarity_scores(self, encoded_docs: np.ndarray) -> List[float]:
raw_similarities = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment