diff --git a/docs/05-local-execution.ipynb b/docs/05-local-execution.ipynb
index c818b9cc7a1e6ab278d5d7d4d84be68d1c105c4a..f03f77ae8696e3b99511a02c752c03a6152f01fa 100644
--- a/docs/05-local-execution.ipynb
+++ b/docs/05-local-execution.ipynb
@@ -31,8 +31,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install -qU semantic-router[local]==0.0.15\n",
-    "# ! CMAKE_ARGS=\"-DLLAMA_METAL=on\" pip install -qU semantic-router[local]==0.0.15 <-- With Metal hardware acceleration"
+    "!pip install -qU \"semantic-router[local]==0.0.16\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0029cc6d",
+   "metadata": {},
+   "source": [
+    "If you're running on Apple silicon you can run the following to run with Metal hardware acceleration:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f9b5729",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!CMAKE_ARGS=\"-DLLAMA_METAL=on\""
    ]
   },
   {
@@ -47,18 +64,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "1d6ddf61-c189-4b3b-99df-9508f830ae1f",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "mistral-7b-instruct-v0.2.Q4_0.gguf\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "! curl -L \"https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true\" -o ./mistral-7b-instruct-v0.2.Q4_0.gguf\n",
     "! ls mistral-7b-instruct-v0.2.Q4_0.gguf"
@@ -76,7 +85,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "id": "e26db664-9dff-476a-84ef-edd7a8cdf1ba",
    "metadata": {},
    "outputs": [],
@@ -140,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 2,
    "id": "fac95b0c-c61f-4158-b7d9-0221f7d0b65e",
    "metadata": {},
    "outputs": [
@@ -153,7 +162,7 @@
        " 'output': \"<class 'str'>\"}"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -174,10 +183,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "id": "5253c141-141b-4fda-b07c-a313393902ed",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jamesbriggs/opt/anaconda3/envs/decision-layer/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
    "source": [
     "from semantic_router.encoders import HuggingFaceEncoder\n",
     "\n",
@@ -204,7 +222,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "id": "772cec0d-7a0c-4c7e-9b7a-4a1864b0a8ec",
    "metadata": {
     "scrolled": true
@@ -283,30 +301,30 @@
       "llm_load_print_meta: PAD token        = 0 '<unk>'\n",
       "llm_load_print_meta: LF token         = 13 '<0x0A>'\n",
       "llm_load_tensors: ggml ctx size       =    0.11 MiB\n",
-      "ggml_backend_metal_buffer_from_ptr: allocated buffer, size =  3918.58 MiB, ( 3918.64 / 10922.67)\n",
+      "ggml_backend_metal_buffer_from_ptr: allocated buffer, size =  3918.58 MiB, ( 3918.64 / 21845.34)\n",
       "llm_load_tensors: system memory used  = 3917.98 MiB\n",
       "..................................................................................................\n",
       "llama_new_context_with_model: n_ctx      = 2048\n",
       "llama_new_context_with_model: freq_base  = 1000000.0\n",
       "llama_new_context_with_model: freq_scale = 1\n",
       "ggml_metal_init: allocating\n",
-      "ggml_metal_init: found device: Apple M1 Pro\n",
-      "ggml_metal_init: picking default device: Apple M1 Pro\n",
+      "ggml_metal_init: found device: Apple M1 Max\n",
+      "ggml_metal_init: picking default device: Apple M1 Max\n",
       "ggml_metal_init: default.metallib not found, loading from source\n",
       "ggml_metal_init: GGML_METAL_PATH_RESOURCES = nil\n",
-      "ggml_metal_init: loading '/Users/bogdanbuduroiu/development/aurelio-labs/semantic-router/.venv/lib/python3.11/site-packages/llama_cpp/ggml-metal.metal'\n",
-      "ggml_metal_init: GPU name:   Apple M1 Pro\n",
+      "ggml_metal_init: loading '/Users/jamesbriggs/opt/anaconda3/envs/decision-layer/lib/python3.11/site-packages/llama_cpp/ggml-metal.metal'\n",
+      "ggml_metal_init: GPU name:   Apple M1 Max\n",
       "ggml_metal_init: GPU family: MTLGPUFamilyApple7 (1007)\n",
       "ggml_metal_init: hasUnifiedMemory              = true\n",
-      "ggml_metal_init: recommendedMaxWorkingSetSize  = 11453.25 MB\n",
+      "ggml_metal_init: recommendedMaxWorkingSetSize  = 22906.50 MB\n",
       "ggml_metal_init: maxTransferRate               = built-in GPU\n",
-      "ggml_backend_metal_buffer_type_alloc_buffer: allocated buffer, size =   256.00 MiB, ( 4176.20 / 10922.67)\n",
+      "ggml_backend_metal_buffer_type_alloc_buffer: allocated buffer, size =   256.00 MiB, ( 4176.20 / 21845.34)\n",
       "llama_new_context_with_model: KV self size  =  256.00 MiB, K (f16):  128.00 MiB, V (f16):  128.00 MiB\n",
-      "ggml_backend_metal_buffer_type_alloc_buffer: allocated buffer, size =     0.02 MiB, ( 4176.22 / 10922.67)\n",
+      "ggml_backend_metal_buffer_type_alloc_buffer: allocated buffer, size =     0.02 MiB, ( 4176.22 / 21845.34)\n",
       "llama_build_graph: non-view tensors processed: 676/676\n",
       "llama_new_context_with_model: compute buffer total size = 159.19 MiB\n",
-      "ggml_backend_metal_buffer_type_alloc_buffer: allocated buffer, size =   156.02 MiB, ( 4332.22 / 10922.67)\n",
-      "\u001b[32m2024-01-13 13:02:53 INFO semantic_router.utils.logger Initializing RouteLayer\u001b[0m\n"
+      "ggml_backend_metal_buffer_type_alloc_buffer: allocated buffer, size =   156.02 MiB, ( 4332.22 / 21845.34)\n",
+      "\u001b[32m2024-01-13 14:48:23 INFO semantic_router.utils.logger Initializing RouteLayer\u001b[0m\n"
      ]
     }
    ],
@@ -331,7 +349,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "id": "a8bd1da4-8ff7-4cd3-a5e3-fd79a938cc67",
    "metadata": {},
    "outputs": [
@@ -341,7 +359,7 @@
        "RouteChoice(name='chitchat', function_call=None, similarity_score=None, trigger=None)"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -352,7 +370,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
    "id": "c6ccbea2-376b-4b28-9b79-d2e9c71e99f4",
    "metadata": {
     "scrolled": true
@@ -396,7 +414,7 @@
       "ws_30 ::= [ <U+0009><U+000A>] ws \n",
       "ws_31 ::= ws_30 | \n",
       "\n",
-      "\u001b[32m2024-01-13 13:03:36 INFO semantic_router.utils.logger Extracting function input...\u001b[0m\n"
+      "\u001b[32m2024-01-13 14:51:39 INFO semantic_router.utils.logger Extracting function input...\u001b[0m\n"
      ]
     },
     {
@@ -409,10 +427,10 @@
     {
      "data": {
       "text/plain": [
-       "'00:03'"
+       "'09:51'"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -423,6 +441,219 @@
     "get_time(**out.function_call)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "720f976a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "from_string grammar:\n",
+      "root ::= object \n",
+      "object ::= [{] ws object_11 [}] ws \n",
+      "value ::= object | array | string | number | value_6 ws \n",
+      "array ::= [[] ws array_15 []] ws \n",
+      "string ::= [\"] string_18 [\"] ws \n",
+      "number ::= number_19 number_25 number_29 ws \n",
+      "value_6 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] | [n] [u] [l] [l] \n",
+      "ws ::= ws_31 \n",
+      "object_8 ::= string [:] ws value object_10 \n",
+      "object_9 ::= [,] ws string [:] ws value \n",
+      "object_10 ::= object_9 object_10 | \n",
+      "object_11 ::= object_8 | \n",
+      "array_12 ::= value array_14 \n",
+      "array_13 ::= [,] ws value \n",
+      "array_14 ::= array_13 array_14 | \n",
+      "array_15 ::= array_12 | \n",
+      "string_16 ::= [^\"\\] | [\\] string_17 \n",
+      "string_17 ::= [\"\\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] \n",
+      "string_18 ::= string_16 string_18 | \n",
+      "number_19 ::= number_20 number_21 \n",
+      "number_20 ::= [-] | \n",
+      "number_21 ::= [0-9] | [1-9] number_22 \n",
+      "number_22 ::= [0-9] number_22 | \n",
+      "number_23 ::= [.] number_24 \n",
+      "number_24 ::= [0-9] number_24 | [0-9] \n",
+      "number_25 ::= number_23 | \n",
+      "number_26 ::= [eE] number_27 number_28 \n",
+      "number_27 ::= [-+] | \n",
+      "number_28 ::= [0-9] number_28 | [0-9] \n",
+      "number_29 ::= number_26 | \n",
+      "ws_30 ::= [ <U+0009><U+000A>] ws \n",
+      "ws_31 ::= ws_30 | \n",
+      "\n",
+      "\u001b[32m2024-01-13 15:00:56 INFO semantic_router.utils.logger Extracting function input...\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "name='get_time' function_call={'timezone': 'Europe/Rome'} similarity_score=None trigger=None\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'16:00'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "out = rl(\"what's the time in Rome right now?\")\n",
+    "print(out)\n",
+    "get_time(**out.function_call)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c9d9dbbb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "from_string grammar:\n",
+      "root ::= object \n",
+      "object ::= [{] ws object_11 [}] ws \n",
+      "value ::= object | array | string | number | value_6 ws \n",
+      "array ::= [[] ws array_15 []] ws \n",
+      "string ::= [\"] string_18 [\"] ws \n",
+      "number ::= number_19 number_25 number_29 ws \n",
+      "value_6 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] | [n] [u] [l] [l] \n",
+      "ws ::= ws_31 \n",
+      "object_8 ::= string [:] ws value object_10 \n",
+      "object_9 ::= [,] ws string [:] ws value \n",
+      "object_10 ::= object_9 object_10 | \n",
+      "object_11 ::= object_8 | \n",
+      "array_12 ::= value array_14 \n",
+      "array_13 ::= [,] ws value \n",
+      "array_14 ::= array_13 array_14 | \n",
+      "array_15 ::= array_12 | \n",
+      "string_16 ::= [^\"\\] | [\\] string_17 \n",
+      "string_17 ::= [\"\\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] \n",
+      "string_18 ::= string_16 string_18 | \n",
+      "number_19 ::= number_20 number_21 \n",
+      "number_20 ::= [-] | \n",
+      "number_21 ::= [0-9] | [1-9] number_22 \n",
+      "number_22 ::= [0-9] number_22 | \n",
+      "number_23 ::= [.] number_24 \n",
+      "number_24 ::= [0-9] number_24 | [0-9] \n",
+      "number_25 ::= number_23 | \n",
+      "number_26 ::= [eE] number_27 number_28 \n",
+      "number_27 ::= [-+] | \n",
+      "number_28 ::= [0-9] number_28 | [0-9] \n",
+      "number_29 ::= number_26 | \n",
+      "ws_30 ::= [ <U+0009><U+000A>] ws \n",
+      "ws_31 ::= ws_30 | \n",
+      "\n",
+      "\u001b[32m2024-01-13 15:01:59 INFO semantic_router.utils.logger Extracting function input...\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "name='get_time' function_call={'timezone': 'Asia/Bangkok'} similarity_score=None trigger=None\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'22:02'"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "out = rl(\"what's the time in Bangkok right now?\")\n",
+    "print(out)\n",
+    "get_time(**out.function_call)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "675d12fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "from_string grammar:\n",
+      "root ::= object \n",
+      "object ::= [{] ws object_11 [}] ws \n",
+      "value ::= object | array | string | number | value_6 ws \n",
+      "array ::= [[] ws array_15 []] ws \n",
+      "string ::= [\"] string_18 [\"] ws \n",
+      "number ::= number_19 number_25 number_29 ws \n",
+      "value_6 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] | [n] [u] [l] [l] \n",
+      "ws ::= ws_31 \n",
+      "object_8 ::= string [:] ws value object_10 \n",
+      "object_9 ::= [,] ws string [:] ws value \n",
+      "object_10 ::= object_9 object_10 | \n",
+      "object_11 ::= object_8 | \n",
+      "array_12 ::= value array_14 \n",
+      "array_13 ::= [,] ws value \n",
+      "array_14 ::= array_13 array_14 | \n",
+      "array_15 ::= array_12 | \n",
+      "string_16 ::= [^\"\\] | [\\] string_17 \n",
+      "string_17 ::= [\"\\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] \n",
+      "string_18 ::= string_16 string_18 | \n",
+      "number_19 ::= number_20 number_21 \n",
+      "number_20 ::= [-] | \n",
+      "number_21 ::= [0-9] | [1-9] number_22 \n",
+      "number_22 ::= [0-9] number_22 | \n",
+      "number_23 ::= [.] number_24 \n",
+      "number_24 ::= [0-9] number_24 | [0-9] \n",
+      "number_25 ::= number_23 | \n",
+      "number_26 ::= [eE] number_27 number_28 \n",
+      "number_27 ::= [-+] | \n",
+      "number_28 ::= [0-9] number_28 | [0-9] \n",
+      "number_29 ::= number_26 | \n",
+      "ws_30 ::= [ <U+0009><U+000A>] ws \n",
+      "ws_31 ::= ws_30 | \n",
+      "\n",
+      "\u001b[32m2024-01-13 15:02:49 INFO semantic_router.utils.logger Extracting function input...\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "name='get_time' function_call={'timezone': 'Asia/Bangkok'} similarity_score=None trigger=None\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'22:03'"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "out = rl(\"what's the time in Phuket right now?\")\n",
+    "print(out)\n",
+    "get_time(**out.function_call)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "5200f550-f3be-43d7-9b76-6390360f07c8",
@@ -433,10 +664,14 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d8430bf1-890d-4129-956a-0486ad215aeb",
+   "id": "76df5f53",
    "metadata": {},
    "source": [
-    "! rm ./mistral-7b-instruct-v0.2.Q4_0.gguf"
+    "Once done, if you'd like to delete the downloaded model you can do so with the following:\n",
+    "\n",
+    "```\n",
+    "! rm ./mistral-7b-instruct-v0.2.Q4_0.gguf\n",
+    "```"
    ]
   }
  ],
@@ -456,7 +691,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,
diff --git a/poetry.lock b/poetry.lock
index 74b4dd3f02e9cd36d6079a89e3066bf10786b8e1..c57d0a3de83b9cf3583cc72503c0f69c10cebf8e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -628,7 +628,7 @@ files = [
 name = "diskcache"
 version = "5.6.3"
 description = "Disk Cache -- Disk and file backed persistent cache."
-optional = false
+optional = true
 python-versions = ">=3"
 files = [
     {file = "diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19"},
@@ -1209,7 +1209,7 @@ test = ["ipykernel", "pre-commit", "pytest", "pytest-cov", "pytest-timeout"]
 name = "llama-cpp-python"
 version = "0.2.28"
 description = "Python bindings for the llama.cpp library"
-optional = false
+optional = true
 python-versions = ">=3.8"
 files = [
     {file = "llama_cpp_python-0.2.28.tar.gz", hash = "sha256:669885d9654fe27ed084061e23b0c2af5fcf5593aa3d5a159864e249f91e6d84"},
@@ -1253,6 +1253,16 @@ files = [
     {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -2241,6 +2251,7 @@ files = [
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
+    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
     {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
     {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
     {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@@ -2248,8 +2259,15 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
+    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
     {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
+    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
+    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
+    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
+    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
+    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
     {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -2266,6 +2284,7 @@ files = [
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
+    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
     {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
     {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
     {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@@ -2273,6 +2292,7 @@ files = [
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
+    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
     {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
     {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
     {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@@ -3229,4 +3249,4 @@ local = ["llama-cpp-python", "torch", "transformers"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "08cbfde28f9021460deb0fefacac6983d3f48b5a040f2c0e665e76b0924d4122"
+content-hash = "1de69e2e5050507790405e09d4cd79fe114b4200a56c87cc609a104366696989"
diff --git a/pyproject.toml b/pyproject.toml
index 5471837c17b95de29e9ff3a40eb1d166a9b70e1d..07536a512b342f58d2a0dc77933cdaa3c0f321f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ pinecone-text = {version = "^0.7.1", optional = true}
 fastembed = {version = "^0.1.3", optional = true, python = "<3.12"}
 torch = {version = "^2.1.2", optional = true}
 transformers = {version = "^4.36.2", optional = true}
-llama-cpp-python = "^0.2.28"
+llama-cpp-python = {version = "^0.2.28", optional = true}
 
 [tool.poetry.extras]
 hybrid = ["pinecone-text"]