diff --git a/docs/examples/hybrid-layer.ipynb b/docs/examples/hybrid-layer.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..589e13a1e4d3fc1d924c7bb851c19c56f9f30dd5
--- /dev/null
+++ b/docs/examples/hybrid-layer.ipynb
@@ -0,0 +1,420 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Semantic Router: Hybrid Layer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The Hybrid Layer in the Semantic Router library can improve decision making performance particularly for niche use-cases that contain specific terminology, such as finance or medical. It helps us provide more importance to decision making based on the keywords contained in our utterances and user queries."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Getting Started"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We start by installing the library:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -qU semantic-router==0.0.5"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We start by defining a dictionary mapping decisions to example phrases that should trigger those decisions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"COHERE_API_KEY\"] = \"<<APIKEY>>\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jamesbriggs/opt/anaconda3/envs/decision-layer/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from semantic_router.schema import Decision\n",
+    "\n",
+    "politics = Decision(\n",
+    "    name=\"politics\",\n",
+    "    utterances=[\n",
+    "        \"isn't politics the best thing ever\",\n",
+    "        \"why don't you tell me about your political opinions\",\n",
+    "        \"don't you just love the president\" \"don't you just hate the president\",\n",
+    "        \"they're going to destroy this country!\",\n",
+    "        \"they will save the country!\",\n",
+    "    ],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's define another for good measure:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chitchat = Decision(\n",
+    "    name=\"chitchat\",\n",
+    "    utterances=[\n",
+    "        \"how's the weather today?\",\n",
+    "        \"how are things going?\",\n",
+    "        \"lovely weather today\",\n",
+    "        \"the weather is horrendous\",\n",
+    "        \"let's go to the chippy\",\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "decisions = [politics, chitchat]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we initialize our embedding model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from semantic_router.encoders import CohereEncoder\n",
+    "from getpass import getpass\n",
+    "\n",
+    "os.environ[\"COHERE_API_KEY\"] = os.environ[\"COHERE_API_KEY\"] or getpass(\n",
+    "    \"Enter Cohere API Key: \"\n",
+    ")\n",
+    "\n",
+    "encoder = CohereEncoder()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we define the `DecisionLayer`. When called, the decision layer will consume text (a query) and output the category (`Decision`) it belongs to — to initialize a `DecisionLayer` we need our `encoder` model and a list of `decisions`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2067848296 1405\n",
+      "2212344012 2520\n",
+      "3313717465 206\n",
+      "3076736765 769\n",
+      "1778150425 4131\n",
+      "2067848296 1405\n",
+      "202708381 770\n",
+      "2212344012 2520\n",
+      "3374841595 2375\n",
+      "2067848296 1405\n",
+      "3508911095 2067\n",
+      "3454774732 not in encoder.idx_mapping\n",
+      "2379717389 3565\n",
+      "298452803 4356\n",
+      "1063320047 3369\n",
+      "4186256544 713\n",
+      "1846246980 858\n",
+      "3897916792 643\n",
+      "575623047 1476\n",
+      "3897916792 643\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[1;32m/Users/jamesbriggs/Documents/projects/aurelio-labs/semantic-router/docs/examples/hybrid-layer.ipynb Cell 14\u001b[0m line \u001b[0;36m3\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/jamesbriggs/Documents/projects/aurelio-labs/semantic-router/docs/examples/hybrid-layer.ipynb#X16sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msemantic_router\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mlayer\u001b[39;00m \u001b[39mimport\u001b[39;00m HybridDecisionLayer\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/jamesbriggs/Documents/projects/aurelio-labs/semantic-router/docs/examples/hybrid-layer.ipynb#X16sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m dl \u001b[39m=\u001b[39m HybridDecisionLayer(encoder\u001b[39m=\u001b[39;49mencoder, decisions\u001b[39m=\u001b[39;49mdecisions)\n",
+      "File \u001b[0;32m~/Documents/projects/aurelio-labs/semantic-router/semantic_router/layer.py:137\u001b[0m, in \u001b[0;36mHybridDecisionLayer.__init__\u001b[0;34m(self, encoder, decisions, alpha)\u001b[0m\n\u001b[1;32m    134\u001b[0m \u001b[39mif\u001b[39;00m decisions:\n\u001b[1;32m    135\u001b[0m     \u001b[39m# initialize index now\u001b[39;00m\n\u001b[1;32m    136\u001b[0m     \u001b[39mfor\u001b[39;00m decision \u001b[39min\u001b[39;00m decisions:\n\u001b[0;32m--> 137\u001b[0m         \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_add_decision(decision\u001b[39m=\u001b[39;49mdecision)\n",
+      "File \u001b[0;32m~/Documents/projects/aurelio-labs/semantic-router/semantic_router/layer.py:156\u001b[0m, in \u001b[0;36mHybridDecisionLayer._add_decision\u001b[0;34m(self, decision)\u001b[0m\n\u001b[1;32m    154\u001b[0m sparse_embeds \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39msparse_encoder(decision\u001b[39m.\u001b[39mutterances)\n\u001b[1;32m    155\u001b[0m \u001b[39m# concatenate vectors to create hybrid vecs\u001b[39;00m\n\u001b[0;32m--> 156\u001b[0m embeds \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39;49mconcatenate([\n\u001b[1;32m    157\u001b[0m     dense_embeds, sparse_embeds\n\u001b[1;32m    158\u001b[0m ], axis\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m)\n\u001b[1;32m    160\u001b[0m \u001b[39m# create decision array\u001b[39;00m\n\u001b[1;32m    161\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcategories \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
+      "\u001b[0;31mValueError\u001b[0m: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)"
+     ]
+    }
+   ],
+   "source": [
+    "from semantic_router.layer import HybridDecisionLayer\n",
+    "\n",
+    "dl = HybridDecisionLayer(encoder=encoder, decisions=decisions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl(\"don't you love politics?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if 3454774732 in encoder.idx_mapping:\n",
+    "    print(\"yes\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from semantic_router.encoders import BM25Encoder\n",
+    "\n",
+    "encoder = BM25Encoder()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tests = [\"hello this is some text\", \"and more stuff\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "idx_list = encoder.model.get_params()['doc_freq']['indices']\n",
+    "idx_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sparse_dicts = encoder.model.encode_documents(tests)\n",
+    "sparse_dicts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embeds = [0.0] * len(encoder.idx_mapping)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for output in sparse_dicts:\n",
+    "    indices = output[\"indices\"]\n",
+    "    values = output[\"values\"]\n",
+    "    for idx, val in zip(indices, values):\n",
+    "        position = encoder.idx_mapping[idx]\n",
+    "        embeds[position] = val"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "encoder.idx_mapping"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "encoded_output = encoder(tests)\n",
+    "encoded_output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "sparse_vec = np.zeros(len(idx_list))\n",
+    "idx_position_dict = {idx: i for i, idx in enumerate(idx_list)}\n",
+    "\n",
+    "for output in encoded_output:\n",
+    "    indices = output['indices']\n",
+    "    values = output['values']\n",
+    "    for idx, value in zip(indices, values):\n",
+    "        if idx in idx_position_dict:\n",
+    "            position = idx_position_dict[idx]\n",
+    "            sparse_vec[position] = value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sparse_vec"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sparse_vec.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can test it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl(\"don't you love politics?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl(\"how's the weather today?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Both are classified accurately, what if we send a query that is unrelated to our existing `Decision` objects?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl(\"I'm interested in learning about llama 2\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this case, we return `None` because no matches were identified."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "decision-layer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/poetry.lock b/poetry.lock
index a2617c1c1dd5aaacf48d1efc23c5412f7af5f947..f47ac40ca97c59a3e5e3d18ea89f13bd816f3729 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -763,6 +763,17 @@ docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alab
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"]
 
+[[package]]
+name = "joblib"
+version = "1.3.2"
+description = "Lightweight pipelining with Python functions"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"},
+    {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"},
+]
+
 [[package]]
 name = "jupyter-client"
 version = "8.6.0"
@@ -819,6 +830,50 @@ files = [
 [package.dependencies]
 traitlets = "*"
 
+[[package]]
+name = "mmh3"
+version = "3.1.0"
+description = "Python wrapper for MurmurHash (MurmurHash3), a set of fast and robust hash functions."
+optional = false
+python-versions = "*"
+files = [
+    {file = "mmh3-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:16ee043b1bac040b4324b8baee39df9fdca480a560a6d74f2eef66a5009a234e"},
+    {file = "mmh3-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04ac865319e5b36148a4b6cdf27f8bda091c47c4ab7b355d7f353dfc2b8a3cce"},
+    {file = "mmh3-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e751f5433417a21c2060b0efa1afc67cfbe29977c867336148c8edb086fae70"},
+    {file = "mmh3-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdb863b89c1b34e3681d4a3b15d424734940eb8036f3457cb35ef34fb87a503c"},
+    {file = "mmh3-3.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1230930fbf2faec4ddf5b76d0768ae73c102de173c301962bdd468177275adf9"},
+    {file = "mmh3-3.1.0-cp310-cp310-win32.whl", hash = "sha256:b8ed7a2361718795a1b519a08d05f44947a20b27e202b53946561a00dde669c1"},
+    {file = "mmh3-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:29e878e7467a000f34ab68c218ad7ad81312c0a94bc10df3c50a48bcad39dd83"},
+    {file = "mmh3-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c271472325b70d64a4fbb1f2e964ca5b093ac10258e1390f8408890b065868fe"},
+    {file = "mmh3-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0109320f7e0e262123ff4f1acd06acfbc8b3bf19cc13d98c0bc369264430aaeb"},
+    {file = "mmh3-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:524e29dfe66499695f9496edcfc96782d130aabd6ba12c50c72372163cc6f3ea"},
+    {file = "mmh3-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66bdb06a03074e65e614da1aa199b1d16c90608bec9d8fc3faa81d887ffe93cc"},
+    {file = "mmh3-3.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a4d471eb75df8320061ab3b8cbe11c970be9f116b01bc2222ebda9c0a777520"},
+    {file = "mmh3-3.1.0-cp311-cp311-win32.whl", hash = "sha256:a886d9ce995a4bdfd7a600ddf61b9015cccbc73c50b898f8ff3c78af24384710"},
+    {file = "mmh3-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:5edb5ac882c04aff8a2a18ae8b74a0c339ac9b83db9820d8456f518bb558e0d8"},
+    {file = "mmh3-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:190fd10981fbd6c67e10ce3b56bcc021562c0df0fee2e2864347d64e65b1783a"},
+    {file = "mmh3-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd781b115cf649811cfde76368c33d2e553b6f88bb41131c314f30d8e65e9d24"},
+    {file = "mmh3-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f48bb0a867077acc1f548591ad49506389f36d18f36dccd10becf071e5cbdda4"},
+    {file = "mmh3-3.1.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d0936a82438e340636a11b9a938378870fc1c7a139632dac09a9a9277351704"},
+    {file = "mmh3-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:d196cc035c2238493248522ae4e54c3cb790549b1564f6dea4d88dfe4b326313"},
+    {file = "mmh3-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:731d37f089b6c212fab1beea24e673161146eb6c76baf9ac074a3424d1172d41"},
+    {file = "mmh3-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9977fb81f8c66f4eee8439734a18dba7826fe78723d15ab53f42db977005be0f"},
+    {file = "mmh3-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bf4f3f20a8b8405c08b13bc9e4ac33bf55129b50b535cd07ce1891b7f96326ac"},
+    {file = "mmh3-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87cdbc6e70099ad92f17a28b4054ffb1938657e8fb7c1e4e03b194a1b4683fd6"},
+    {file = "mmh3-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6dd81321d14f62aa3711f30533c85a74dc7596e0fee63c8eddd375bc92ab846c"},
+    {file = "mmh3-3.1.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e6eba88e5c1a2778f3de00a9502e3c214ebb757337ece2a7d71e060d188ddfa"},
+    {file = "mmh3-3.1.0-cp38-cp38-win32.whl", hash = "sha256:d91e696925f208d28f3bb7bdf29815524ce955248276af256519bd3538c411ce"},
+    {file = "mmh3-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:cbc2917df568aeb86ec5aa863bfb20fa14e01039cbdce7650efbabc30960df49"},
+    {file = "mmh3-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3b22832d565128be83d69f5d49243bb567840a954df377c9f5b26646a6eec39b"},
+    {file = "mmh3-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ced92a0e285a9111413541c197b0c17d280cee96f7c564b258caf5de5ab8ee01"},
+    {file = "mmh3-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f906833753b4ddcb690c2c1b74e77725868bc3a8b762b7a77737d08be89ae41d"},
+    {file = "mmh3-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:72b5685832a7a87a55ebff481794bc410484d7bd4c5e80dae4d8ac50739138ef"},
+    {file = "mmh3-3.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d2aa4d422c7c088bbc5d367b45431268ebe6742a0a64eade93fab708e25757c"},
+    {file = "mmh3-3.1.0-cp39-cp39-win32.whl", hash = "sha256:4459bec818f534dc8378568ad89ab310ff47cda3e00ab322edce48dd899bba32"},
+    {file = "mmh3-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:03e04b3480e71828f48d17653451a3286555f0534942cb6ba93065b10ad5f9dc"},
+    {file = "mmh3-3.1.0.tar.gz", hash = "sha256:9b0f2b2ab4a915333c9d1089572e290a021ebb5b900bb7f7114dccc03995d732"},
+]
+
 [[package]]
 name = "multidict"
 version = "6.0.4"
@@ -924,6 +979,65 @@ files = [
     {file = "nest_asyncio-1.5.8.tar.gz", hash = "sha256:25aa2ca0d2a5b5531956b9e273b45cf664cae2b145101d73b86b199978d48fdb"},
 ]
 
+[[package]]
+name = "nltk"
+version = "3.8.1"
+description = "Natural Language Toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"},
+    {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"},
+]
+
+[package.dependencies]
+click = "*"
+joblib = "*"
+regex = ">=2021.8.3"
+tqdm = "*"
+
+[package.extras]
+all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"]
+corenlp = ["requests"]
+machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"]
+plot = ["matplotlib"]
+tgrep = ["pyparsing"]
+twitter = ["twython"]
+
+[[package]]
+name = "numpy"
+version = "1.25.2"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"},
+    {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"},
+    {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"},
+    {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"},
+    {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"},
+    {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"},
+    {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"},
+    {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"},
+    {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"},
+    {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"},
+    {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"},
+    {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"},
+    {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"},
+    {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"},
+    {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"},
+    {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"},
+    {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"},
+    {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"},
+    {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"},
+    {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"},
+    {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"},
+    {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"},
+    {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"},
+    {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"},
+    {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
+]
+
 [[package]]
 name = "openai"
 version = "0.28.1"
@@ -997,6 +1111,28 @@ files = [
 [package.dependencies]
 ptyprocess = ">=0.5"
 
+[[package]]
+name = "pinecone-text"
+version = "0.7.0"
+description = "Text utilities library by Pinecone.io"
+optional = false
+python-versions = ">=3.8,<4.0"
+files = [
+    {file = "pinecone_text-0.7.0-py3-none-any.whl", hash = "sha256:d20c7adc2259965a30fcbcf93a5eeb3f8d12babc9ea65ba858f1a6a5973d0737"},
+    {file = "pinecone_text-0.7.0.tar.gz", hash = "sha256:8bda3c7337511dfb61da541299024ee73dbbed5d94e2af558a12357591b46174"},
+]
+
+[package.dependencies]
+mmh3 = ">=3.1.0,<4.0.0"
+nltk = ">=3.6.5,<4.0.0"
+numpy = ">=1.21.5,<=1.25.2"
+wget = ">=3.2,<4.0"
+
+[package.extras]
+dense = ["openai (>=1.2.3,<2.0.0)", "sentence-transformers (>=2.0.0)", "torch (>=1.13.1)", "transformers (>=4.26.1)"]
+openai = ["openai (>=1.2.3,<2.0.0)"]
+splade = ["sentence-transformers (>=2.0.0)", "torch (>=1.13.1)", "transformers (>=4.26.1)"]
+
 [[package]]
 name = "platformdirs"
 version = "4.0.0"
@@ -1298,6 +1434,103 @@ files = [
 [package.dependencies]
 cffi = {version = "*", markers = "implementation_name == \"pypy\""}
 
+[[package]]
+name = "regex"
+version = "2023.10.3"
+description = "Alternative regular expression module, to replace re."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"},
+    {file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"},
+    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cd1bccf99d3ef1ab6ba835308ad85be040e6a11b0977ef7ea8c8005f01a3c29"},
+    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81dce2ddc9f6e8f543d94b05d56e70d03a0774d32f6cca53e978dc01e4fc75b8"},
+    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c6b4d23c04831e3ab61717a707a5d763b300213db49ca680edf8bf13ab5d91b"},
+    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c15ad0aee158a15e17e0495e1e18741573d04eb6da06d8b84af726cfc1ed02ee"},
+    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6239d4e2e0b52c8bd38c51b760cd870069f0bdf99700a62cd509d7a031749a55"},
+    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4a8bf76e3182797c6b1afa5b822d1d5802ff30284abe4599e1247be4fd6b03be"},
+    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9c727bbcf0065cbb20f39d2b4f932f8fa1631c3e01fcedc979bd4f51fe051c5"},
+    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3ccf2716add72f80714b9a63899b67fa711b654be3fcdd34fa391d2d274ce767"},
+    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:107ac60d1bfdc3edb53be75e2a52aff7481b92817cfdddd9b4519ccf0e54a6ff"},
+    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:00ba3c9818e33f1fa974693fb55d24cdc8ebafcb2e4207680669d8f8d7cca79a"},
+    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0a47efb1dbef13af9c9a54a94a0b814902e547b7f21acb29434504d18f36e3a"},
+    {file = "regex-2023.10.3-cp310-cp310-win32.whl", hash = "sha256:36362386b813fa6c9146da6149a001b7bd063dabc4d49522a1f7aa65b725c7ec"},
+    {file = "regex-2023.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:c65a3b5330b54103e7d21cac3f6bf3900d46f6d50138d73343d9e5b2900b2353"},
+    {file = "regex-2023.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90a79bce019c442604662d17bf69df99090e24cdc6ad95b18b6725c2988a490e"},
+    {file = "regex-2023.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c7964c2183c3e6cce3f497e3a9f49d182e969f2dc3aeeadfa18945ff7bdd7051"},
+    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ef80829117a8061f974b2fda8ec799717242353bff55f8a29411794d635d964"},
+    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5addc9d0209a9afca5fc070f93b726bf7003bd63a427f65ef797a931782e7edc"},
+    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c148bec483cc4b421562b4bcedb8e28a3b84fcc8f0aa4418e10898f3c2c0eb9b"},
+    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d1f21af4c1539051049796a0f50aa342f9a27cde57318f2fc41ed50b0dbc4ac"},
+    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b9ac09853b2a3e0d0082104036579809679e7715671cfbf89d83c1cb2a30f58"},
+    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ebedc192abbc7fd13c5ee800e83a6df252bec691eb2c4bedc9f8b2e2903f5e2a"},
+    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d8a993c0a0ffd5f2d3bda23d0cd75e7086736f8f8268de8a82fbc4bd0ac6791e"},
+    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:be6b7b8d42d3090b6c80793524fa66c57ad7ee3fe9722b258aec6d0672543fd0"},
+    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4023e2efc35a30e66e938de5aef42b520c20e7eda7bb5fb12c35e5d09a4c43f6"},
+    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0d47840dc05e0ba04fe2e26f15126de7c755496d5a8aae4a08bda4dd8d646c54"},
+    {file = "regex-2023.10.3-cp311-cp311-win32.whl", hash = "sha256:9145f092b5d1977ec8c0ab46e7b3381b2fd069957b9862a43bd383e5c01d18c2"},
+    {file = "regex-2023.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:b6104f9a46bd8743e4f738afef69b153c4b8b592d35ae46db07fc28ae3d5fb7c"},
+    {file = "regex-2023.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff507ae210371d4b1fe316d03433ac099f184d570a1a611e541923f78f05037"},
+    {file = "regex-2023.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be5e22bbb67924dea15039c3282fa4cc6cdfbe0cbbd1c0515f9223186fc2ec5f"},
+    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a992f702c9be9c72fa46f01ca6e18d131906a7180950958f766c2aa294d4b41"},
+    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7434a61b158be563c1362d9071358f8ab91b8d928728cd2882af060481244c9e"},
+    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2169b2dcabf4e608416f7f9468737583ce5f0a6e8677c4efbf795ce81109d7c"},
+    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9e908ef5889cda4de038892b9accc36d33d72fb3e12c747e2799a0e806ec841"},
+    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bd4bc2c632742c7ce20db48e0d99afdc05e03f0b4c1af90542e05b809a03d9"},
+    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bc72c231f5449d86d6c7d9cc7cd819b6eb30134bb770b8cfdc0765e48ef9c420"},
+    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bce8814b076f0ce5766dc87d5a056b0e9437b8e0cd351b9a6c4e1134a7dfbda9"},
+    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:ba7cd6dc4d585ea544c1412019921570ebd8a597fabf475acc4528210d7c4a6f"},
+    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b0c7d2f698e83f15228ba41c135501cfe7d5740181d5903e250e47f617eb4292"},
+    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5a8f91c64f390ecee09ff793319f30a0f32492e99f5dc1c72bc361f23ccd0a9a"},
+    {file = "regex-2023.10.3-cp312-cp312-win32.whl", hash = "sha256:ad08a69728ff3c79866d729b095872afe1e0557251da4abb2c5faff15a91d19a"},
+    {file = "regex-2023.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:39cdf8d141d6d44e8d5a12a8569d5a227f645c87df4f92179bd06e2e2705e76b"},
+    {file = "regex-2023.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4a3ee019a9befe84fa3e917a2dd378807e423d013377a884c1970a3c2792d293"},
+    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76066d7ff61ba6bf3cb5efe2428fc82aac91802844c022d849a1f0f53820502d"},
+    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe50b61bab1b1ec260fa7cd91106fa9fece57e6beba05630afe27c71259c59b"},
+    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fd88f373cb71e6b59b7fa597e47e518282455c2734fd4306a05ca219a1991b0"},
+    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ab05a182c7937fb374f7e946f04fb23a0c0699c0450e9fb02ef567412d2fa3"},
+    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dac37cf08fcf2094159922edc7a2784cfcc5c70f8354469f79ed085f0328ebdf"},
+    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e54ddd0bb8fb626aa1f9ba7b36629564544954fff9669b15da3610c22b9a0991"},
+    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3367007ad1951fde612bf65b0dffc8fd681a4ab98ac86957d16491400d661302"},
+    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:16f8740eb6dbacc7113e3097b0a36065a02e37b47c936b551805d40340fb9971"},
+    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:f4f2ca6df64cbdd27f27b34f35adb640b5d2d77264228554e68deda54456eb11"},
+    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:39807cbcbe406efca2a233884e169d056c35aa7e9f343d4e78665246a332f597"},
+    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7eece6fbd3eae4a92d7c748ae825cbc1ee41a89bb1c3db05b5578ed3cfcfd7cb"},
+    {file = "regex-2023.10.3-cp37-cp37m-win32.whl", hash = "sha256:ce615c92d90df8373d9e13acddd154152645c0dc060871abf6bd43809673d20a"},
+    {file = "regex-2023.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0f649fa32fe734c4abdfd4edbb8381c74abf5f34bc0b3271ce687b23729299ed"},
+    {file = "regex-2023.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9b98b7681a9437262947f41c7fac567c7e1f6eddd94b0483596d320092004533"},
+    {file = "regex-2023.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:91dc1d531f80c862441d7b66c4505cd6ea9d312f01fb2f4654f40c6fdf5cc37a"},
+    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82fcc1f1cc3ff1ab8a57ba619b149b907072e750815c5ba63e7aa2e1163384a4"},
+    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7979b834ec7a33aafae34a90aad9f914c41fd6eaa8474e66953f3f6f7cbd4368"},
+    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef71561f82a89af6cfcbee47f0fabfdb6e63788a9258e913955d89fdd96902ab"},
+    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd829712de97753367153ed84f2de752b86cd1f7a88b55a3a775eb52eafe8a94"},
+    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00e871d83a45eee2f8688d7e6849609c2ca2a04a6d48fba3dff4deef35d14f07"},
+    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:706e7b739fdd17cb89e1fbf712d9dc21311fc2333f6d435eac2d4ee81985098c"},
+    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cc3f1c053b73f20c7ad88b0d1d23be7e7b3901229ce89f5000a8399746a6e039"},
+    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6f85739e80d13644b981a88f529d79c5bdf646b460ba190bffcaf6d57b2a9863"},
+    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:741ba2f511cc9626b7561a440f87d658aabb3d6b744a86a3c025f866b4d19e7f"},
+    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e77c90ab5997e85901da85131fd36acd0ed2221368199b65f0d11bca44549711"},
+    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:979c24cbefaf2420c4e377ecd1f165ea08cc3d1fbb44bdc51bccbbf7c66a2cb4"},
+    {file = "regex-2023.10.3-cp38-cp38-win32.whl", hash = "sha256:58837f9d221744d4c92d2cf7201c6acd19623b50c643b56992cbd2b745485d3d"},
+    {file = "regex-2023.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:c55853684fe08d4897c37dfc5faeff70607a5f1806c8be148f1695be4a63414b"},
+    {file = "regex-2023.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2c54e23836650bdf2c18222c87f6f840d4943944146ca479858404fedeb9f9af"},
+    {file = "regex-2023.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69c0771ca5653c7d4b65203cbfc5e66db9375f1078689459fe196fe08b7b4930"},
+    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ac965a998e1388e6ff2e9781f499ad1eaa41e962a40d11c7823c9952c77123e"},
+    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c0e8fae5b27caa34177bdfa5a960c46ff2f78ee2d45c6db15ae3f64ecadde14"},
+    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c56c3d47da04f921b73ff9415fbaa939f684d47293f071aa9cbb13c94afc17d"},
+    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ef1e014eed78ab650bef9a6a9cbe50b052c0aebe553fb2881e0453717573f52"},
+    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d29338556a59423d9ff7b6eb0cb89ead2b0875e08fe522f3e068b955c3e7b59b"},
+    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9c6d0ced3c06d0f183b73d3c5920727268d2201aa0fe6d55c60d68c792ff3588"},
+    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:994645a46c6a740ee8ce8df7911d4aee458d9b1bc5639bc968226763d07f00fa"},
+    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:66e2fe786ef28da2b28e222c89502b2af984858091675044d93cb50e6f46d7af"},
+    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:11175910f62b2b8c055f2b089e0fedd694fe2be3941b3e2633653bc51064c528"},
+    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:06e9abc0e4c9ab4779c74ad99c3fc10d3967d03114449acc2c2762ad4472b8ca"},
+    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fb02e4257376ae25c6dd95a5aec377f9b18c09be6ebdefa7ad209b9137b73d48"},
+    {file = "regex-2023.10.3-cp39-cp39-win32.whl", hash = "sha256:3b2c3502603fab52d7619b882c25a6850b766ebd1b18de3df23b2f939360e1bd"},
+    {file = "regex-2023.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:adbccd17dcaff65704c856bd29951c58a1bd4b2b0f8ad6b826dbd543fe740988"},
+    {file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"},
+]
+
 [[package]]
 name = "requests"
 version = "2.31.0"
@@ -1479,6 +1712,16 @@ files = [
     {file = "wcwidth-0.2.10.tar.gz", hash = "sha256:390c7454101092a6a5e43baad8f83de615463af459201709556b6e4b1c861f97"},
 ]
 
+[[package]]
+name = "wget"
+version = "3.2"
+description = "pure python download utility"
+optional = false
+python-versions = "*"
+files = [
+    {file = "wget-3.2.zip", hash = "sha256:35e630eca2aa50ce998b9b1a127bb26b30dfee573702782aa982f875e3f16061"},
+]
+
 [[package]]
 name = "yarl"
 version = "1.9.2"
@@ -1584,4 +1827,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "7955e07ea098c2e8b29421733eb5ec6c06cbbc5bf64bd88451baa1a42c71e6b2"
+content-hash = "4953e126f4e42186a812ca444ae887f723a5b76943234b7897df3bd8563944a3"
diff --git a/pyproject.toml b/pyproject.toml
index f45792aa7ef63d8fb74d1b32ce0c9b0aeb7da0c8..f2b4ba2e2e621f42e4d82909c623a86d0013024e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,7 @@ python = "^3.10"
 pydantic = "^1.8.2"
 openai = "^0.28.1"
 cohere = "^4.32"
+pinecone-text = "^0.7.0"
 
 
 [tool.poetry.group.dev.dependencies]
diff --git a/semantic_router/__init__.py b/semantic_router/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ac1e314ef1d854ad9e240c22354bf7b980428d01 100644
--- a/semantic_router/__init__.py
+++ b/semantic_router/__init__.py
@@ -0,0 +1,3 @@
+from .layer import DecisionLayer, HybridDecisionLayer
+
+__all__ = ["DecisionLayer", "HybridDecisionLayer"]
\ No newline at end of file
diff --git a/semantic_router/encoders/__init__.py b/semantic_router/encoders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c86ce7c47290d1490565837bb8a98a5631941f2
--- /dev/null
+++ b/semantic_router/encoders/__init__.py
@@ -0,0 +1,6 @@
+from .base import BaseEncoder
+from .cohere import CohereEncoder
+from .openai import OpenAIEncoder
+from .bm25 import BM25Encoder
+
+__all__ = ["BaseEncoder", "CohereEncoder", "OpenAIEncoder", "BM25Encoder"]
diff --git a/semantic_router/retrievers/base.py b/semantic_router/encoders/base.py
similarity index 88%
rename from semantic_router/retrievers/base.py
rename to semantic_router/encoders/base.py
index 4274e074cd26c52318ba287d12e4ca19fe36aac1..b6de1f89b80f42ea42d3abe76bbf61441936aa6e 100644
--- a/semantic_router/retrievers/base.py
+++ b/semantic_router/encoders/base.py
@@ -1,7 +1,7 @@
 from pydantic import BaseModel
 
 
-class BaseRetriever(BaseModel):
+class BaseEncoder(BaseModel):
     name: str
 
     class Config:
diff --git a/semantic_router/encoders/bm25.py b/semantic_router/encoders/bm25.py
new file mode 100644
index 0000000000000000000000000000000000000000..344f0820ae354645331dc6512456ffe65c60113b
--- /dev/null
+++ b/semantic_router/encoders/bm25.py
@@ -0,0 +1,40 @@
+from pinecone_text.sparse import BM25Encoder as encoder
+
+from semantic_router.encoders import BaseEncoder
+
+
+class BM25Encoder(BaseEncoder):
+    model: encoder | None = None
+    idx_mapping: dict[int, int] | None = None
+
+    def __init__(self, name: str = "bm25"):
+        super().__init__(name=name)
+        # initialize BM25 encoder with default params (trained on MSMarco)
+        self.model = encoder.default()
+        self.idx_mapping = {
+            idx: i for i, idx in enumerate(self.model.get_params()["doc_freq"]["indices"])
+        }
+
+    def __call__(self, docs: list[str]) -> list[list[float]]:
+        if len(docs) == 1:
+            sparse_dicts = self.model.encode_query(docs[0])
+        elif len(docs) > 1:
+            sparse_dicts = self.model.encode_documents(docs)
+        else:
+            raise ValueError("No documents to encode.")
+        # convert sparse dict to sparse vector
+        embeds = [0.0] * len(self.idx_mapping)
+        for output in sparse_dicts:
+            indices = output["indices"]
+            values = output["values"]
+            for idx, val in zip(indices, values):
+                if idx in self.idx_mapping:
+                    print(idx, self.idx_mapping[idx])
+                    position = self.idx_mapping[idx]
+                    embeds[position] = val
+                else:
+                    print(idx, "not in encoder.idx_mapping")
+        return embeds
+
+    def fit(self, docs: list[str]):
+        self.model.fit(docs)
diff --git a/semantic_router/retrievers/cohere.py b/semantic_router/encoders/cohere.py
similarity index 88%
rename from semantic_router/retrievers/cohere.py
rename to semantic_router/encoders/cohere.py
index 187cb7442df0a14db2e64e087016614c9439cf00..fd20fa7586da1a4370d541e8659b826b93db9fac 100644
--- a/semantic_router/retrievers/cohere.py
+++ b/semantic_router/encoders/cohere.py
@@ -2,10 +2,10 @@ import os
 
 import cohere
 
-from semantic_router.retrievers import BaseRetriever
+from semantic_router.encoders import BaseEncoder
 
 
-class CohereRetriever(BaseRetriever):
+class CohereEncoder(BaseEncoder):
     client: cohere.Client | None
 
     def __init__(
diff --git a/semantic_router/retrievers/huggingface.py b/semantic_router/encoders/huggingface.py
similarity index 61%
rename from semantic_router/retrievers/huggingface.py
rename to semantic_router/encoders/huggingface.py
index 9c8f2f05b403c4023446dc520c4816333edc59cf..52ddecd2ed02ea5d6cd187aa6f52bf1277785585 100644
--- a/semantic_router/retrievers/huggingface.py
+++ b/semantic_router/encoders/huggingface.py
@@ -1,7 +1,7 @@
-from semantic_router.retrievers import BaseRetriever
+from semantic_router.encoders import BaseEncoder
 
 
-class HuggingFaceRetriever(BaseRetriever):
+class HuggingFaceEncoder(BaseEncoder):
     def __init__(self, name: str):
         self.name = name
 
diff --git a/semantic_router/retrievers/openai.py b/semantic_router/encoders/openai.py
similarity index 92%
rename from semantic_router/retrievers/openai.py
rename to semantic_router/encoders/openai.py
index 2dbfd880904a1b196c2ab6e45fd44fdfd405feb3..5700c8003b05bcb7a3be237b7ac655d9b13d56d4 100644
--- a/semantic_router/retrievers/openai.py
+++ b/semantic_router/encoders/openai.py
@@ -4,10 +4,10 @@ from time import sleep
 import openai
 from openai.error import RateLimitError
 
-from semantic_router.retrievers import BaseRetriever
+from semantic_router.encoders import BaseEncoder
 
 
-class OpenAIRetriever(BaseRetriever):
+class OpenAIEncoder(BaseEncoder):
     def __init__(self, name: str, openai_api_key: str | None = None):
         super().__init__(name=name)
         openai.api_key = openai_api_key or os.getenv("OPENAI_API_KEY")
diff --git a/semantic_router/encoders/tfidf.py b/semantic_router/encoders/tfidf.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dc7f34d95b0a7a7f418801000330b3159e0692e
--- /dev/null
+++ b/semantic_router/encoders/tfidf.py
@@ -0,0 +1,37 @@
+from functools import partial
+
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+from semantic_router.encoders import BaseEncoder
+
+
+class TfidfEncoder(BaseEncoder):
+    model: encoder | None = None
+
+    def __init__(self, name: str = "bm25"):
+        super().__init__(name=name)
+        # initialize BM25 encoder with default params (trained on MSMarco)
+        self.model = encoder.default()
+        self.idx_mapping = {
+            idx: i for i, idx in enumerate(self.model.get_params()["doc_freq"]["indices"])
+        }
+
+    def __call__(self, docs: list[str]) -> list[list[float]]:
+        if len(docs) == 1:
+            sparse_dicts = self.model.encode_query(docs[0])
+        elif len(docs) > 1:
+            sparse_dicts = self.model.encode_documents(docs)
+        else:
+            raise ValueError("No documents to encode.")
+        # convert sparse dict to sparse vector
+        embeds = [0.0] * len(self.idx_mapping)
+        for output in sparse_dicts:
+            indices = output["indices"]
+            values = output["values"]
+            for idx, val in zip(indices, values):
+                position = self.idx_mapping[idx]
+                embeds[position] = val
+        return embeds
+
+    def fit(self, docs: list[str]):
+        self.model.fit(docs)
diff --git a/semantic_router/layer.py b/semantic_router/layer.py
index ad27a4c168f5dc4fe8869343b6707b8eaa3cdfcd..e8b71576c4accd5936621f6da2889411caf2fe97 100644
--- a/semantic_router/layer.py
+++ b/semantic_router/layer.py
@@ -1,11 +1,11 @@
 import numpy as np
 from numpy.linalg import norm
 
-from semantic_router.retrievers import (
-    BaseRetriever,
-    CohereRetriever,
-    OpenAIRetriever,
-    BM25Retriever
+from semantic_router.encoders import (
+    BaseEncoder,
+    CohereEncoder,
+    OpenAIEncoder,
+    BM25Encoder
 )
 from semantic_router.schema import Decision
 
@@ -15,12 +15,12 @@ class DecisionLayer:
     categories = None
     score_threshold = 0.82
 
-    def __init__(self, encoder: BaseRetriever, decisions: list[Decision] = []):
+    def __init__(self, encoder: BaseEncoder, decisions: list[Decision] = []):
         self.encoder = encoder
         # decide on default threshold based on encoder
-        if isinstance(encoder, OpenAIRetriever):
+        if isinstance(encoder, OpenAIEncoder):
             self.score_threshold = 0.82
-        elif isinstance(encoder, CohereRetriever):
+        elif isinstance(encoder, CohereEncoder):
             self.score_threshold = 0.3
         else:
             self.score_threshold = 0.82
@@ -116,17 +116,17 @@ class HybridDecisionLayer:
 
     def __init__(
         self,
-        encoder: BaseRetriever,
+        encoder: BaseEncoder,
         decisions: list[Decision] = [],
         alpha: float = 0.3
     ):
         self.encoder = encoder
-        self.sparse_encoder = BM25Retriever()
+        self.sparse_encoder = BM25Encoder()
         self.alpha = alpha
         # decide on default threshold based on encoder
-        if isinstance(encoder, OpenAIRetriever):
+        if isinstance(encoder, OpenAIEncoder):
             self.score_threshold = 0.82
-        elif isinstance(encoder, CohereRetriever):
+        elif isinstance(encoder, CohereEncoder):
             self.score_threshold = 0.3
         else:
             self.score_threshold = 0.82
@@ -150,8 +150,8 @@ class HybridDecisionLayer:
 
     def _add_decision(self, decision: Decision):
         # create embeddings
-        dense_embeds = self.encoder(decision.utterances)
-        sparse_embeds = self.sparse_encoder(decision.utterances)
+        dense_embeds = self.encoder(decision.utterances) * self.alpha
+        sparse_embeds = self.sparse_encoder(decision.utterances) * (1 - self.alpha)
         # concatenate vectors to create hybrid vecs
         embeds = np.concatenate([
             dense_embeds, sparse_embeds
@@ -168,12 +168,20 @@ class HybridDecisionLayer:
                 self.utterances,
                 np.array(decision.utterances)
             ])
-        # create utterance array (the index)
+        # create utterance array (the dense index)
         if self.index is None:
-            self.index = np.array(embeds)
+            self.index = np.array(dense_embeds)
         else:
-            embed_arr = np.array(embeds)
+            embed_arr = np.array(dense_embeds)
             self.index = np.concatenate([self.index, embed_arr])
+        # create sparse utterance array
+        if self.sparse_index is None:
+            self.sparse_index = np.array(sparse_embeds)
+        else:
+            sparse_embed_arr = np.array(sparse_embeds)
+            self.sparse_index = np.concatenate([
+                self.sparse_index, sparse_embed_arr
+            ])
 
     def _query(self, text: str, top_k: int = 5):
         """Given some text, encodes and searches the index vector space to
diff --git a/semantic_router/rankers/cohere.py b/semantic_router/rankers/cohere.py
index b703a960509208fd65df62d21867b5b3b1ee0ece..e79608b817708a324fe925fed4b3bbb5972e93cb 100644
--- a/semantic_router/rankers/cohere.py
+++ b/semantic_router/rankers/cohere.py
@@ -2,10 +2,10 @@ import os
 
 import cohere
 
-from semantic_router.rankers import BaseReranker
+from semantic_router.rankers import BaseRanker
 
 
-class CohereRanker(BaseReranker):
+class CohereRanker(BaseRanker):
     client: cohere.Client | None
 
     def __init__(
diff --git a/semantic_router/retrievers/__init__.py b/semantic_router/retrievers/__init__.py
deleted file mode 100644
index 0fcaa6d2e568ebbc5413726b3a638a75462b781d..0000000000000000000000000000000000000000
--- a/semantic_router/retrievers/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .base import BaseRetriever
-from .cohere import CohereRetriever
-from .openai import OpenAIRetriever
-
-__all__ = ["BaseRetriever", "CohereRetriever", "OpenAIRetriever"]
diff --git a/semantic_router/retrievers/bm25.py b/semantic_router/retrievers/bm25.py
deleted file mode 100644
index 2a68a3ff79430cd7f570b17463bfc103a6dad26e..0000000000000000000000000000000000000000
--- a/semantic_router/retrievers/bm25.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import os
-
-from pinecone_text import BM25Encoder
-
-from semantic_router.retrievers import BaseRetriever
-
-
-class BM25Retriever(BaseRetriever):
-    def __init__(self, name: str = "bm25"):
-        super().__init__(name=name)
-        self.model = BM25Encoder()
-
-    def __call__(self, docs: list[str]) -> list[list[float]]:
-        if self.params is None:
-            raise ValueError("BM25 model not trained, must call `.fit` first.")
-        embeds = self.model.encode_doocuments(docs)
-        return embeds.embeddings
-
-    def fit(self, docs: list[str]):
-        params = self.model.fit(docs)
-        self.model.set_params(**params)
\ No newline at end of file
diff --git a/semantic_router/schema.py b/semantic_router/schema.py
index cb1288fb1171501ea475f39dd6c812101532b736..37a43dd41007634706caedc1b5c5dadd6d734bba 100644
--- a/semantic_router/schema.py
+++ b/semantic_router/schema.py
@@ -3,10 +3,10 @@ from enum import Enum
 from pydantic import BaseModel
 from pydantic.dataclasses import dataclass
 
-from semantic_router.retrievers import (
-    BaseRetriever,
-    CohereRetriever,
-    OpenAIRetriever,
+from semantic_router.encoders import (
+    BaseEncoder,
+    CohereEncoder,
+    OpenAIEncoder,
 )
 
 
@@ -16,27 +16,27 @@ class Decision(BaseModel):
     description: str | None = None
 
 
-class RetrieverType(Enum):
+class EncoderType(Enum):
     HUGGINGFACE = "huggingface"
     OPENAI = "openai"
     COHERE = "cohere"
 
 
 @dataclass
-class Retriever:
-    type: RetrieverType
+class Encoder:
+    type: EncoderType
     name: str
-    model: BaseRetriever
+    model: BaseEncoder
 
     def __init__(self, type: str, name: str):
-        self.type = RetrieverType(type)
+        self.type = EncoderType(type)
         self.name = name
-        if self.type == RetrieverType.HUGGINGFACE:
+        if self.type == EncoderType.HUGGINGFACE:
             raise NotImplementedError
-        elif self.type == RetrieverType.OPENAI:
-            self.model = OpenAIRetriever(name)
-        elif self.type == RetrieverType.COHERE:
-            self.model = CohereRetriever(name)
+        elif self.type == EncoderType.OPENAI:
+            self.model = OpenAIEncoder(name)
+        elif self.type == EncoderType.COHERE:
+            self.model = CohereEncoder(name)
 
     def __call__(self, texts: list[str]) -> list[float]:
         return self.model(texts)