Skip to content
Snippets Groups Projects
Unverified Commit 29762f08 authored by James Briggs's avatar James Briggs Committed by GitHub
Browse files

Merge pull request #263 from aurelio-labs/james/encoder-disallowed-special

fix: James/encoder disallowed special
parents eb9ebe3c cc1430d1
No related branches found
No related tags found
No related merge requests found
[tool.poetry] [tool.poetry]
name = "semantic-router" name = "semantic-router"
version = "0.0.36" version = "0.0.37"
description = "Super fast semantic router for AI decision making" description = "Super fast semantic router for AI decision making"
authors = [ authors = [
"James Briggs <james@aurelio.ai>", "James Briggs <james@aurelio.ai>",
......
...@@ -4,4 +4,4 @@ from semantic_router.route import Route ...@@ -4,4 +4,4 @@ from semantic_router.route import Route
__all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"] __all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"]
__version__ = "0.0.36" __version__ = "0.0.37"
...@@ -114,7 +114,8 @@ class OpenAIEncoder(BaseEncoder): ...@@ -114,7 +114,8 @@ class OpenAIEncoder(BaseEncoder):
return embeddings return embeddings
def _truncate(self, text: str) -> str: def _truncate(self, text: str) -> str:
tokens = self._token_encoder.encode(text) # we use encode_ordinary as faster equivalent to encode(text, disallowed_special=())
tokens = self._token_encoder.encode_ordinary(text)
if len(tokens) > self.token_limit: if len(tokens) > self.token_limit:
logger.warning( logger.warning(
f"Document exceeds token limit: {len(tokens)} > {self.token_limit}" f"Document exceeds token limit: {len(tokens)} > {self.token_limit}"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment