Linting.

fa022a57 · Siraj R Aizlewood · 63049814 · fa022a57 · fa022a57 · fa022a57
Unverified Commit fa022a57 authored 1 year ago by Siraj R Aizlewood
--- a/docs/07-route-conversations-by-topic.ipynb
+++ b/docs/07-route-conversations-by-topic.ipynb
@@ -71,7 +71,9 @@
    "    Message(role=\"user\", content=\"Hi, my name is John Doe.\"),\n",
    "    Message(role=\"bot\", content=\"Okay, how can I help you today?\"),\n",
    "    Message(role=\"user\", content=\"My computer keeps crashing\"),\n",
-    "    Message(role=\"bot\", content=\"Okay, is our software running when the computer crashes.\"),\n",
+    "    Message(\n",
+    "        role=\"bot\", content=\"Okay, is our software running when the computer crashes.\"\n",
+    "    ),\n",
    "    Message(role=\"user\", content=\"Yeah, v.3.11.2 is running when it crashes.\"),\n",
    "]\n",
    "\n",
@@ -95,10 +97,10 @@
    "from semantic_router.encoders.cohere import CohereEncoder\n",
    "\n",
    "cohere_encoder = CohereEncoder(\n",
-    "    name=\"embed-english-v3.0\", \n",
-    "    cohere_api_key='',\n",
+    "    name=\"embed-english-v3.0\",\n",
+    "    cohere_api_key=\"\",\n",
    "    input_type=\"search_document\",\n",
-    "    )"
+    ")"
   ]
  },
  {
@@ -134,9 +136,7 @@
   ],
   "source": [
    "conversation.configure_splitter(\n",
-    "    encoder=cohere_encoder, \n",
-    "    threshold=0.5, \n",
-    "    split_method=\"cumulative_similarity\"\n",
+    "    encoder=cohere_encoder, threshold=0.5, split_method=\"cumulative_similarity\"\n",
    ")\n",
    "\n",
    "all_topics, new_topics = conversation.split_by_topic()\n",
@@ -191,7 +191,9 @@
   "source": [
    "# Define the IT support dialogue\n",
    "messages = [\n",
-    "    Message(role=\"user\", content=\"What do the system logs say, right before the crash?\"),\n",
+    "    Message(\n",
+    "        role=\"user\", content=\"What do the system logs say, right before the crash?\"\n",
+    "    ),\n",
    "    Message(role=\"user\", content=\"I'll check soon, but first let's talk refund.\"),\n",
    "    Message(role=\"bot\", content=\"Okay let me sort out a refund.\"),\n",
    "]\n",
@@ -223,9 +225,7 @@
   ],
   "source": [
    "conversation.configure_splitter(\n",
-    "    encoder=cohere_encoder, \n",
-    "    threshold=0.5, \n",
-    "    split_method=\"cumulative_similarity\"\n",
+    "    encoder=cohere_encoder, threshold=0.5, split_method=\"cumulative_similarity\"\n",
    ")\n",
    "\n",
    "all_topics, new_topics = conversation.split_by_topic()\n",

 %% Cell type:markdown id: tags:

 # Split Conversations by Topic

 %% Cell type:markdown id: tags:

 Topics Splitters have been implemented in the code in `semantic-router/splitters`.

 These allow a set of utterances to be automatically grouped/clustered into (un-labelled) topics.

 Additionally, splitters have been integrated with `Conversation` objects allowing conversations to be progressively spit by topic as they evolve. This is beneficial to routing, as earlier messages in a conversation topic might provide useful context when determining routes. By using all utterances in the latest conversation this additional context allows for correct routes to be more reliably chosen.

 %% Cell type:markdown id: tags:


 %% Cell type:markdown id: tags:

 ## Example: IT Support Dialogue

 %% Cell type:markdown id: tags:

 ### Setup

 %% Cell type:markdown id: tags:

 First, we import the necessary classes and initialize the conversation with dialogue.

 %% Cell type:code id: tags:

 ``` python
 from semantic_router.text import Conversation
 from semantic_router.schema import Message

 # Initialize the Conversation
 conversation = Conversation()

 # Define the IT support dialogue
 messages = [
    Message(role="user", content="Hi, there, please can you confirm your full name"),
    Message(role="user", content="Hi, my name is John Doe."),
    Message(role="bot", content="Okay, how can I help you today?"),
    Message(role="user", content="My computer keeps crashing"),
-    Message(role="bot", content="Okay, is our software running when the computer crashes."),
+    Message(
+        role="bot", content="Okay, is our software running when the computer crashes."
+    ),
    Message(role="user", content="Yeah, v.3.11.2 is running when it crashes."),
 ]

 # Add messages to the conversation
 conversation.add_new_messages(messages)
 ```

 %% Output

    c:\Users\Siraj\Documents\Personal\Work\Aurelio\20240130 2148 Semantic Topic Splitter (Siraj Local Repo)\venvs\semantic_splitter_1\Lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
      from .autonotebook import tqdm as notebook_tqdm

 %% Cell type:markdown id: tags:

 ### Initialize an Encoder

 %% Cell type:code id: tags:

 ``` python
 from semantic_router.encoders.cohere import CohereEncoder

 cohere_encoder = CohereEncoder(
    name="embed-english-v3.0",
-    cohere_api_key='',
+    cohere_api_key="",
    input_type="search_document",
-    )
+)
 ```

 %% Cell type:markdown id: tags:


 %% Cell type:markdown id: tags:

 ### Split Conversation by Topic

 %% Cell type:code id: tags:

 ``` python
 conversation.configure_splitter(
-    encoder=cohere_encoder,
-    threshold=0.5,
-    split_method="cumulative_similarity"
+    encoder=cohere_encoder, threshold=0.5, split_method="cumulative_similarity"
 )

 all_topics, new_topics = conversation.split_by_topic()

 # Display all topics
 print(conversation)
 ```

 %% Output

    No unclustered messages to process.
    [31muser: Hi, there, please can you confirm your full name[0m
    [31muser: Hi, my name is John Doe.[0m
    [32mbot: Okay, how can I help you today?[0m
    [33muser: My computer keeps crashing[0m
    [33mbot: Okay, is our software running when the computer crashes.[0m
    [33muser: Yeah, v.3.11.2 is running when it crashes.[0m

 %% Cell type:markdown id: tags:

 Notice that the last message says "Yeah, it crashes right after I start the software".

 This might be correctly routed by the semantic-router, particularly if the route is quite generic, intended for "software" and/or "crashes".

 However, as an illustrative example, what if the routes were

 Route A: "Sotware Crashes - v3.11"

 Route B: "Computer Crashes - v3.11"

 If just the last utterance was used, then Route A would likely be chosen. However, if instead every utterance from the last topic (Topic 4), concatenated together, were sent to the semantic-router, then this important additional context would most likely result in Route A being chosen.

 %% Cell type:markdown id: tags:

 ### Topic Splitting After Topic Continuation

 %% Cell type:markdown id: tags:

 Note that topics can be continued even after `conversation.split_by_topic()` has already been run.

 %% Cell type:markdown id: tags:

 Add some new messages.

 %% Cell type:code id: tags:

 ``` python
 # Define the IT support dialogue
 messages = [
-    Message(role="user", content="What do the system logs say, right before the crash?"),
+    Message(
+        role="user", content="What do the system logs say, right before the crash?"
+    ),
    Message(role="user", content="I'll check soon, but first let's talk refund."),
    Message(role="bot", content="Okay let me sort out a refund."),
 ]

 # Add messages to the conversation
 conversation.add_new_messages(messages)
 ```

 %% Cell type:code id: tags:

 ``` python
 conversation.configure_splitter(
-    encoder=cohere_encoder,
-    threshold=0.5,
-    split_method="cumulative_similarity"
+    encoder=cohere_encoder, threshold=0.5, split_method="cumulative_similarity"
 )

 all_topics, new_topics = conversation.split_by_topic()

 print(conversation)
 ```

 %% Output

    [31muser: Hi, there, please can you confirm your full name[0m
    [31muser: Hi, my name is John Doe.[0m
    [32mbot: Okay, how can I help you today?[0m
    [33muser: My computer keeps crashing[0m
    [33mbot: Okay, is our software running when the computer crashes.[0m
    [33muser: Yeah, v.3.11.2 is running when it crashes.[0m
    [33muser: What do the system logs say, right before the crash?[0m
    [34muser: I'll check soon, but first let's talk refund.[0m
    [34mbot: Okay let me sort out a refund.[0m

 %% Cell type:markdown id: tags:

 As you can see, we:

 1) Added the first six messages, as seen above, to the `Conversation`.
 2) Ran the Topic Splitter.
 3) Added the last two messages to the `Conversation`.
 4) Ran the Topic Splitter again.

 Despite "user: Yeah, v.3.11.2 is running when it crashes" and "user: What do the system logs say, right before the crash?" being added and separately, and despite the conversation splitter being run twice (once before user: What do the system logs say, right before the crash?" was added, and once after), both these utterances were successfully assigned the same Topic - `Topic 4`.

 %% Cell type:markdown id: tags:


 %% Cell type:markdown id: tags:


--- a/semantic_router/schema.py
+++ b/semantic_router/schema.py
@@ -64,7 +64,7 @@ class Message(BaseModel):

    def to_llamacpp(self):
        return {"role": self.role, "content": self.content}
-    
+
    def __str__(self):
        return f"{self.role}: {self.content}"


--- a/semantic_router/text.py
+++ b/semantic_router/text.py
@@ -19,7 +19,7 @@ colors = [
    Fore.YELLOW,
    Fore.BLUE,
    Fore.MAGENTA,
-    Fore.CYAN
+    Fore.CYAN,
 ]


@@ -48,10 +48,9 @@ class Conversation(BaseModel):
                return_str_list.append(f"{colors[color_idx]}{message}{Style.RESET_ALL}")
            return "\n".join(return_str_list)

-
    def add_new_messages(self, new_messages: List[Message]):
        """Adds new messages to the conversation.
-        
+
        :param messages: The new messages to be added to the conversation.
        :type messages: List[Message]
        """
@@ -148,7 +147,9 @@ class Conversation(BaseModel):
            for message in topic.docs:
                self.topics.append((i, message))

-    def split_by_topic(self, force: bool = False) -> Tuple[List[Tuple[int, str]], List[DocumentSplit]]:
+    def split_by_topic(
+        self, force: bool = False
+    ) -> Tuple[List[Tuple[int, str]], List[DocumentSplit]]:
        """
        Splits the messages into topics based on their semantic similarity.