diff --git a/docs/00-introduction.ipynb b/docs/00-introduction.ipynb index 7ce5f4e490aa0f7c5145e5fd3c8030aaf8d18c71..d03a09ffe670d9c35bb6cc67a1b7fbe276ed1d2b 100644 --- a/docs/00-introduction.ipynb +++ b/docs/00-introduction.ipynb @@ -65,7 +65,16 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\Program_Installation\\anaconda\\envs\\rag\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "from semantic_router import Route\n", "\n", @@ -146,14 +155,36 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-04-19 18:34:06 INFO semantic_router.utils.logger local\u001b[0m\n" + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger local\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 1 length: 34\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 1 trunc length: 34\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 2 length: 51\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 2 trunc length: 51\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 3 length: 33\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 3 trunc length: 33\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 4 length: 33\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 4 trunc length: 33\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 5 length: 38\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 5 trunc length: 38\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 6 length: 27\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 6 trunc length: 27\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 7 length: 24\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 7 trunc length: 24\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 8 length: 21\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 8 trunc length: 21\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 9 length: 20\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 9 trunc length: 20\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 10 length: 25\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 10 trunc length: 25\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 11 length: 22\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:30 INFO semantic_router.utils.logger Document 11 trunc length: 22\u001b[0m\n" ] } ], @@ -172,16 +203,24 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-05-02 12:09:35 INFO semantic_router.utils.logger Document 1 length: 24\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:35 INFO semantic_router.utils.logger Document 1 trunc length: 24\u001b[0m\n" + ] + }, { "data": { "text/plain": [ "RouteChoice(name='politics', function_call=None, similarity_score=None)" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -192,16 +231,24 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-05-02 12:09:37 INFO semantic_router.utils.logger Document 1 length: 24\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:37 INFO semantic_router.utils.logger Document 1 trunc length: 24\u001b[0m\n" + ] + }, { "data": { "text/plain": [ "RouteChoice(name='chitchat', function_call=None, similarity_score=None)" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -219,16 +266,24 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-05-02 12:09:38 INFO semantic_router.utils.logger Document 1 length: 40\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:38 INFO semantic_router.utils.logger Document 1 trunc length: 40\u001b[0m\n" + ] + }, { "data": { "text/plain": [ "RouteChoice(name=None, function_call=None, similarity_score=None)" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -246,9 +301,17 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-05-02 12:09:40 INFO semantic_router.utils.logger Document 1 length: 35\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:40 INFO semantic_router.utils.logger Document 1 trunc length: 35\u001b[0m\n" + ] + }, { "data": { "text/plain": [ @@ -256,7 +319,7 @@ " RouteChoice(name='chitchat', function_call=None, similarity_score=0.8356239688161808)]" ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -267,16 +330,24 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-05-02 12:09:42 INFO semantic_router.utils.logger Document 1 length: 40\u001b[0m\n", + "\u001b[32m2024-05-02 12:09:42 INFO semantic_router.utils.logger Document 1 trunc length: 40\u001b[0m\n" + ] + }, { "data": { "text/plain": [ "[]" ] }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -309,7 +380,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.8" } }, "nbformat": 4, diff --git a/semantic_router/schema.py b/semantic_router/schema.py index 60f61536e08840e2c023fd12867cc960a14d3608..20b6ef825872ca329a90fc5230a49e877987f6d9 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -1,6 +1,5 @@ from enum import Enum -from typing import List, Optional - +from typing import List, Optional, Union, Any from pydantic.v1 import BaseModel @@ -52,7 +51,7 @@ class Message(BaseModel): class DocumentSplit(BaseModel): - docs: List[str] + docs: List[Union[str, Any]] is_triggered: bool = False triggered_score: Optional[float] = None token_count: Optional[int] = None @@ -60,7 +59,7 @@ class DocumentSplit(BaseModel): @property def content(self) -> str: - return " ".join(self.docs) + return " ".join([doc if isinstance(doc, str) else "" for doc in self.docs]) class Metric(Enum):