diff --git a/docs/00-introduction.ipynb b/docs/00-introduction.ipynb index 7ce5f4e490aa0f7c5145e5fd3c8030aaf8d18c71..209aa103a441ac2eb5b16ef4e52e42811428142c 100644 --- a/docs/00-introduction.ipynb +++ b/docs/00-introduction.ipynb @@ -63,9 +63,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\Program_Installation\\anaconda\\envs\\rag\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "from semantic_router import Route\n", "\n", @@ -91,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -118,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -146,14 +155,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-04-19 18:34:06 INFO semantic_router.utils.logger local\u001b[0m\n" + "\u001b[32m2024-05-02 12:38:34 INFO semantic_router.utils.logger local\u001b[0m\n" ] } ], @@ -172,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -181,7 +190,7 @@ "RouteChoice(name='politics', function_call=None, similarity_score=None)" ] }, - "execution_count": 8, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -192,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -201,7 +210,7 @@ "RouteChoice(name='chitchat', function_call=None, similarity_score=None)" ] }, - "execution_count": 9, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -219,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -228,7 +237,7 @@ "RouteChoice(name=None, function_call=None, similarity_score=None)" ] }, - "execution_count": 10, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -246,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -256,7 +265,7 @@ " RouteChoice(name='chitchat', function_call=None, similarity_score=0.8356239688161808)]" ] }, - "execution_count": 11, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -267,7 +276,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -276,7 +285,7 @@ "[]" ] }, - "execution_count": 12, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -309,7 +318,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.8" } }, "nbformat": 4, diff --git a/semantic_router/encoders/openai.py b/semantic_router/encoders/openai.py index 3cfa2a704189c00709d406073691f7916720e24c..24cac037b9ec93ea30882d151048ac32594ef8af 100644 --- a/semantic_router/encoders/openai.py +++ b/semantic_router/encoders/openai.py @@ -80,9 +80,7 @@ class OpenAIEncoder(BaseEncoder): if truncate: # check if any document exceeds token limit and truncate if so for i in range(len(docs)): - logger.info(f"Document {i+1} length: {len(docs[i])}") docs[i] = self._truncate(docs[i]) - logger.info(f"Document {i+1} trunc length: {len(docs[i])}") # Exponential backoff for j in range(1, 7): diff --git a/semantic_router/schema.py b/semantic_router/schema.py index 60f61536e08840e2c023fd12867cc960a14d3608..20b6ef825872ca329a90fc5230a49e877987f6d9 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -1,6 +1,5 @@ from enum import Enum -from typing import List, Optional - +from typing import List, Optional, Union, Any from pydantic.v1 import BaseModel @@ -52,7 +51,7 @@ class Message(BaseModel): class DocumentSplit(BaseModel): - docs: List[str] + docs: List[Union[str, Any]] is_triggered: bool = False triggered_score: Optional[float] = None token_count: Optional[int] = None @@ -60,7 +59,7 @@ class DocumentSplit(BaseModel): @property def content(self) -> str: - return " ".join(self.docs) + return " ".join([doc if isinstance(doc, str) else "" for doc in self.docs]) class Metric(Enum):