From 6a45948ab0cff1592d173813678b59df996e71f1 Mon Sep 17 00:00:00 2001 From: James Briggs <james.briggs@hotmail.com> Date: Sun, 28 Apr 2024 18:08:01 +0800 Subject: [PATCH] fix: allow encoding of special tokens --- semantic_router/encoders/openai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/semantic_router/encoders/openai.py b/semantic_router/encoders/openai.py index d56a1e71..3cfa2a70 100644 --- a/semantic_router/encoders/openai.py +++ b/semantic_router/encoders/openai.py @@ -114,7 +114,8 @@ class OpenAIEncoder(BaseEncoder): return embeddings def _truncate(self, text: str) -> str: - tokens = self._token_encoder.encode(text) + # we use encode_ordinary as faster equivalent to encode(text, disallowed_special=()) + tokens = self._token_encoder.encode_ordinary(text) if len(tokens) > self.token_limit: logger.warning( f"Document exceeds token limit: {len(tokens)} > {self.token_limit}" -- GitLab