From 44d74919959fd1fcefaa1784284fb6aea5b6460e Mon Sep 17 00:00:00 2001
From: zahid-syed <zahid.s2618@gmail.com>
Date: Thu, 28 Mar 2024 14:28:51 -0400
Subject: [PATCH] Fixed coverage and added demo jpnyb

---
 docs/09-route-filter.ipynb | 324 ++++++++++++++++++++++++++++++++++++
 docs/indexes/qdrant.ipynb  | 325 +++++++++++++++++++++++++++++++++++++
 tests/unit/test_layer.py   |  22 ++-
 3 files changed, 670 insertions(+), 1 deletion(-)
 create mode 100644 docs/09-route-filter.ipynb
 create mode 100644 docs/indexes/qdrant.ipynb

diff --git a/docs/09-route-filter.ipynb b/docs/09-route-filter.ipynb
new file mode 100644
index 00000000..bfddcad4
--- /dev/null
+++ b/docs/09-route-filter.ipynb
@@ -0,0 +1,324 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/00-introduction.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/00-introduction.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Semantic Router Filter"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The Semantic Router library can be used as a super fast route making layer on top of LLMs. That means rather than waiting on a slow agent to decide what to do, we can use the magic of semantic vector space to make routes. Cutting route making time down from seconds to milliseconds."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Getting Started"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We start by installing the library:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -qU semantic-router==0.0.29\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We start by defining a dictionary mapping routes to example phrases that should trigger those routes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/zahidsyed/anaconda3/envs/semantic_router/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from semantic_router import Route\n",
+    "\n",
+    "politics = Route(\n",
+    "    name=\"politics\",\n",
+    "    utterances=[\n",
+    "        \"isn't politics the best thing ever\",\n",
+    "        \"why don't you tell me about your political opinions\",\n",
+    "        \"don't you just love the president\",\n",
+    "        \"don't you just hate the president\",\n",
+    "        \"they're going to destroy this country!\",\n",
+    "        \"they will save the country!\",\n",
+    "    ],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's define another for good measure:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chitchat = Route(\n",
+    "    name=\"chitchat\",\n",
+    "    utterances=[\n",
+    "        \"how's the weather today?\",\n",
+    "        \"how are things going?\",\n",
+    "        \"lovely weather today\",\n",
+    "        \"the weather is horrendous\",\n",
+    "        \"let's go to the chippy\",\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "routes = [politics, chitchat]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we initialize our embedding model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from getpass import getpass\n",
+    "from semantic_router.encoders import CohereEncoder, OpenAIEncoder\n",
+    "os.environ[\"COHERE_API_KEY\"] = os.getenv(\"COHERE_API_KEY\") or getpass(\n",
+    "    \"Enter Cohere API Key: \"\n",
+    ")\n",
+    "# os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\") or getpass(\n",
+    "#     \"Enter OpenAI API Key: \"\n",
+    "# )\n",
+    "\n",
+    "encoder = CohereEncoder()\n",
+    "# encoder = OpenAIEncoder()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we define the `RouteLayer`. When called, the route layer will consume text (a query) and output the category (`Route`) it belongs to — to initialize a `RouteLayer` we need our `encoder` model and a list of `routes`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-03-28 14:24:37 INFO semantic_router.utils.logger local\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "from semantic_router.layer import RouteLayer\n",
+    "\n",
+    "rl = RouteLayer(encoder=encoder, routes=routes)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can test it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RouteChoice(name='politics', function_call=None, similarity_score=None)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rl(\"don't you love politics?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rl(\"how's the weather today?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Both are classified accurately, what if we send a query that is unrelated to our existing `Route` objects?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RouteChoice(name=None, function_call=None, similarity_score=None)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rl(\"I'm interested in learning about llama 2\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this case, we return `None` because no matches were identified."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Demonstrating the Filter Feature\n",
+    "\n",
+    "Now, let's demonstrate the filter feature. We can specify a subset of routes to consider when making a classification. This can be useful if we want to restrict the scope of possible routes based on some context.\n",
+    "\n",
+    "For example, let's say we only want to consider the \"chitchat\" route for a particular query:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RouteChoice(name='chitchat', function_call=None, similarity_score=None)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rl(\"don't you love politics?\", route_filter=[\"chitchat\"])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Even though the query might be more related to the \"politics\" route, it will be classified as \"chitchat\" because we've restricted the routes to consider.\n",
+    "\n",
+    "Similarly, we can restrict it to the \"politics\" route:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RouteChoice(name=None, function_call=None, similarity_score=None)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rl(\"how's the weather today?\", route_filter=[\"politics\"])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this case, it will return None because the query doesn't match the \"politics\" route well enough to pass the threshold.\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "decision-layer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/indexes/qdrant.ipynb b/docs/indexes/qdrant.ipynb
new file mode 100644
index 00000000..56427753
--- /dev/null
+++ b/docs/indexes/qdrant.ipynb
@@ -0,0 +1,325 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -qU \"semantic-router[qdrant]\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from semantic_router import Route\n",
+    "\n",
+    "# we could use this as a guide for our chatbot to avoid political conversations\n",
+    "politics = Route(\n",
+    "    name=\"politics\",\n",
+    "    utterances=[\n",
+    "        \"isn't politics the best thing ever\",\n",
+    "        \"why don't you tell me about your political opinions\",\n",
+    "        \"don't you just love the president\" \"don't you just hate the president\",\n",
+    "        \"they're going to destroy this country!\",\n",
+    "        \"they will save the country!\",\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "# this could be used as an indicator to our chatbot to switch to a more\n",
+    "# conversational prompt\n",
+    "chitchat = Route(\n",
+    "    name=\"chitchat\",\n",
+    "    utterances=[\n",
+    "        \"how's the weather today?\",\n",
+    "        \"how are things going?\",\n",
+    "        \"lovely weather today\",\n",
+    "        \"the weather is horrendous\",\n",
+    "        \"let's go to the chippy\",\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "# we place both of our decisions together into single list\n",
+    "routes = [politics, chitchat]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from getpass import getpass\n",
+    "from semantic_router.encoders import CohereEncoder\n",
+    "os.environ[\"COHERE_API_KEY\"] = os.environ.get(\"COHERE_API_KEY\") or getpass(\n",
+    "    \"Enter COHERE API key: \"\n",
+    ")\n",
+    "encoder = CohereEncoder()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from semantic_router.index.qdrant import QdrantIndex\n",
+    "\n",
+    "\n",
+    "qd_index = QdrantIndex(location=\":memory:\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-03-27 18:22:42 INFO semantic_router.utils.logger local\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "from semantic_router.layer import RouteLayer\n",
+    "\n",
+    "rl = RouteLayer(encoder=encoder, routes=routes, index=qd_index)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can check our route layer and index information."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['politics', 'chitchat']"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rl.list_route_names()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "10"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(rl.index)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And query:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'politics'"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rl(\"don't you love politics?\").name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'chitchat'"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rl(\"how's the weather today?\").name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rl(\"I'm interested in learning about llama 2\").name"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can delete or update routes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "10"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(rl.index)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "5"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import time\n",
+    "\n",
+    "rl.delete(route_name=\"chitchat\")\n",
+    "time.sleep(1)\n",
+    "len(rl.index)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rl(\"how's the weather today?\").name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('politics', 'they will save the country!'),\n",
+       " ('politics', \"isn't politics the best thing ever\"),\n",
+       " ('politics', \"why don't you tell me about your political opinions\"),\n",
+       " ('politics', \"they're going to destroy this country!\"),\n",
+       " ('politics',\n",
+       "  \"don't you just love the presidentdon't you just hate the president\")]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rl.index.get_routes()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'type': 'qdrant', 'dimensions': 1024, 'vectors': 5}"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rl.index.describe()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "semantic_router_1",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tests/unit/test_layer.py b/tests/unit/test_layer.py
index bd3ceaec..8af6e1cc 100644
--- a/tests/unit/test_layer.py
+++ b/tests/unit/test_layer.py
@@ -7,6 +7,7 @@ import pytest
 
 from semantic_router.encoders import BaseEncoder, CohereEncoder, OpenAIEncoder
 from semantic_router.index.local import LocalIndex
+from semantic_router.index.pinecone import PineconeIndex
 from semantic_router.index.qdrant import QdrantIndex
 from semantic_router.layer import LayerConfig, RouteLayer
 from semantic_router.llms.base import BaseLLM
@@ -126,7 +127,6 @@ def get_test_indexes():
 
     if importlib.util.find_spec("qdrant_client") is not None:
         indexes.append(QdrantIndex)
-
     return indexes
 
 
@@ -249,6 +249,26 @@ class TestRouteLayer:
             encoder=openai_encoder, routes=routes, index=index_cls()
         )
         query_result = route_layer(text="Hello", route_filter=["Route 1"]).name
+
+        try:
+            route_layer(text="Hello", route_filter=["Route 8"]).name
+        except ValueError:
+            assert True
+
+        assert query_result in ["Route 1"]
+
+    def test_query_filter_pinecone(self, openai_encoder, routes, index_cls):
+        pineconeindex = PineconeIndex()
+        route_layer = RouteLayer(
+            encoder=openai_encoder, routes=routes, index=pineconeindex
+        )
+        query_result = route_layer(text="Hello", route_filter=["Route 1"]).name
+
+        try:
+            route_layer(text="Hello", route_filter=["Route 8"]).name
+        except ValueError:
+            assert True
+
         assert query_result in ["Route 1"]
 
     def test_query_with_no_index(self, openai_encoder, index_cls):
-- 
GitLab