diff --git a/semantic_router/encoders/openai.py b/semantic_router/encoders/openai.py index d56a1e71ecd0ff520edd69ff0d709524617db964..3cfa2a704189c00709d406073691f7916720e24c 100644 --- a/semantic_router/encoders/openai.py +++ b/semantic_router/encoders/openai.py @@ -114,7 +114,8 @@ class OpenAIEncoder(BaseEncoder): return embeddings def _truncate(self, text: str) -> str: - tokens = self._token_encoder.encode(text) + # we use encode_ordinary as faster equivalent to encode(text, disallowed_special=()) + tokens = self._token_encoder.encode_ordinary(text) if len(tokens) > self.token_limit: logger.warning( f"Document exceeds token limit: {len(tokens)} > {self.token_limit}"