From 6a45948ab0cff1592d173813678b59df996e71f1 Mon Sep 17 00:00:00 2001
From: James Briggs <james.briggs@hotmail.com>
Date: Sun, 28 Apr 2024 18:08:01 +0800
Subject: [PATCH] fix: allow encoding of special tokens

---
 semantic_router/encoders/openai.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/semantic_router/encoders/openai.py b/semantic_router/encoders/openai.py
index d56a1e71..3cfa2a70 100644
--- a/semantic_router/encoders/openai.py
+++ b/semantic_router/encoders/openai.py
@@ -114,7 +114,8 @@ class OpenAIEncoder(BaseEncoder):
         return embeddings
 
     def _truncate(self, text: str) -> str:
-        tokens = self._token_encoder.encode(text)
+        # we use encode_ordinary as faster equivalent to encode(text, disallowed_special=())
+        tokens = self._token_encoder.encode_ordinary(text)
         if len(tokens) > self.token_limit:
             logger.warning(
                 f"Document exceeds token limit: {len(tokens)} > {self.token_limit}"
-- 
GitLab