Skip to content
Snippets Groups Projects
Unverified Commit 6a45948a authored by James Briggs's avatar James Briggs
Browse files

fix: allow encoding of special tokens

parent eb9ebe3c
No related branches found
No related tags found
No related merge requests found
......@@ -114,7 +114,8 @@ class OpenAIEncoder(BaseEncoder):
return embeddings
def _truncate(self, text: str) -> str:
tokens = self._token_encoder.encode(text)
# we use encode_ordinary as faster equivalent to encode(text, disallowed_special=())
tokens = self._token_encoder.encode_ordinary(text)
if len(tokens) > self.token_limit:
logger.warning(
f"Document exceeds token limit: {len(tokens)} > {self.token_limit}"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment