diff --git a/poetry.lock b/poetry.lock
index 96a21bbc54879c58239b7afab825ba958c34d213..74b4dd3f02e9cd36d6079a89e3066bf10786b8e1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -440,13 +440,13 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
 
 [[package]]
 name = "cohere"
-version = "4.41"
+version = "4.42"
 description = "Python SDK for the Cohere API"
 optional = false
 python-versions = ">=3.8,<4.0"
 files = [
-    {file = "cohere-4.41-py3-none-any.whl", hash = "sha256:39470cc412fa96a1c612f522d48d7d86b34b3163a04030cff83ec48ebbaff32f"},
-    {file = "cohere-4.41.tar.gz", hash = "sha256:8509ca196dc038eca81e474d3cd5896da2ea168a4d3c578b4cb6969994be34ef"},
+    {file = "cohere-4.42-py3-none-any.whl", hash = "sha256:47f9355de0b7628314f461ca009fa3460c7edd9fd42d07cb5439321c05ae5ff9"},
+    {file = "cohere-4.42.tar.gz", hash = "sha256:8b1b93be118c5fb236d008df64abc0687cf88b77d1b589ac8cc8cd0d5dadb04b"},
 ]
 
 [package.dependencies]
@@ -624,6 +624,17 @@ files = [
     {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
 ]
 
+[[package]]
+name = "diskcache"
+version = "5.6.3"
+description = "Disk Cache -- Disk and file backed persistent cache."
+optional = false
+python-versions = ">=3"
+files = [
+    {file = "diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19"},
+    {file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"},
+]
+
 [[package]]
 name = "distro"
 version = "1.9.0"
@@ -1125,13 +1136,13 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"]
 
 [[package]]
 name = "jinja2"
-version = "3.1.2"
+version = "3.1.3"
 description = "A very fast and expressive template engine."
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"},
-    {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"},
+    {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
+    {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
 ]
 
 [package.dependencies]
@@ -1194,6 +1205,27 @@ traitlets = ">=5.3"
 docs = ["myst-parser", "pydata-sphinx-theme", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"]
 test = ["ipykernel", "pre-commit", "pytest", "pytest-cov", "pytest-timeout"]
 
+[[package]]
+name = "llama-cpp-python"
+version = "0.2.28"
+description = "Python bindings for the llama.cpp library"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "llama_cpp_python-0.2.28.tar.gz", hash = "sha256:669885d9654fe27ed084061e23b0c2af5fcf5593aa3d5a159864e249f91e6d84"},
+]
+
+[package.dependencies]
+diskcache = ">=5.6.1"
+numpy = ">=1.20.0"
+typing-extensions = ">=4.5.0"
+
+[package.extras]
+all = ["llama_cpp_python[dev,server,test]"]
+dev = ["black (>=23.3.0)", "httpx (>=0.24.1)", "mkdocs (>=1.4.3)", "mkdocs-material (>=9.1.18)", "mkdocstrings[python] (>=0.22.0)", "pytest (>=7.4.0)", "twine (>=4.0.2)"]
+server = ["fastapi (>=0.100.0)", "pydantic-settings (>=2.0.1)", "sse-starlette (>=1.6.1)", "starlette-context (>=0.3.6,<0.4)", "uvicorn (>=0.22.0)"]
+test = ["httpx (>=0.24.1)", "pytest (>=7.4.0)", "scipy (>=1.10)"]
+
 [[package]]
 name = "markupsafe"
 version = "2.1.3"
@@ -1221,16 +1253,6 @@ files = [
     {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -1792,13 +1814,13 @@ sympy = "*"
 
 [[package]]
 name = "openai"
-version = "1.7.0"
+version = "1.7.2"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.7.0-py3-none-any.whl", hash = "sha256:2282e8e15acb05df79cccba330c025b8e84284c7ec1f3fa31f167a8479066333"},
-    {file = "openai-1.7.0.tar.gz", hash = "sha256:f2a8dcb739e8620c9318a2c6304ea72aebb572ba02fa1d586344405e80d567d3"},
+    {file = "openai-1.7.2-py3-none-any.whl", hash = "sha256:8f41b90a762f5fd9d182b45851041386fed94c8ad240a70abefee61a68e0ef53"},
+    {file = "openai-1.7.2.tar.gz", hash = "sha256:c73c78878258b07f1b468b0602c6591f25a1478f49ecb90b9bd44b7cc80bce73"},
 ]
 
 [package.dependencies]
@@ -1933,22 +1955,22 @@ wcwidth = "*"
 
 [[package]]
 name = "protobuf"
-version = "4.25.1"
+version = "4.25.2"
 description = ""
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "protobuf-4.25.1-cp310-abi3-win32.whl", hash = "sha256:193f50a6ab78a970c9b4f148e7c750cfde64f59815e86f686c22e26b4fe01ce7"},
-    {file = "protobuf-4.25.1-cp310-abi3-win_amd64.whl", hash = "sha256:3497c1af9f2526962f09329fd61a36566305e6c72da2590ae0d7d1322818843b"},
-    {file = "protobuf-4.25.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:0bf384e75b92c42830c0a679b0cd4d6e2b36ae0cf3dbb1e1dfdda48a244f4bcd"},
-    {file = "protobuf-4.25.1-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:0f881b589ff449bf0b931a711926e9ddaad3b35089cc039ce1af50b21a4ae8cb"},
-    {file = "protobuf-4.25.1-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:ca37bf6a6d0046272c152eea90d2e4ef34593aaa32e8873fc14c16440f22d4b7"},
-    {file = "protobuf-4.25.1-cp38-cp38-win32.whl", hash = "sha256:abc0525ae2689a8000837729eef7883b9391cd6aa7950249dcf5a4ede230d5dd"},
-    {file = "protobuf-4.25.1-cp38-cp38-win_amd64.whl", hash = "sha256:1484f9e692091450e7edf418c939e15bfc8fc68856e36ce399aed6889dae8bb0"},
-    {file = "protobuf-4.25.1-cp39-cp39-win32.whl", hash = "sha256:8bdbeaddaac52d15c6dce38c71b03038ef7772b977847eb6d374fc86636fa510"},
-    {file = "protobuf-4.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:becc576b7e6b553d22cbdf418686ee4daa443d7217999125c045ad56322dda10"},
-    {file = "protobuf-4.25.1-py3-none-any.whl", hash = "sha256:a19731d5e83ae4737bb2a089605e636077ac001d18781b3cf489b9546c7c80d6"},
-    {file = "protobuf-4.25.1.tar.gz", hash = "sha256:57d65074b4f5baa4ab5da1605c02be90ac20c8b40fb137d6a8df9f416b0d0ce2"},
+    {file = "protobuf-4.25.2-cp310-abi3-win32.whl", hash = "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6"},
+    {file = "protobuf-4.25.2-cp310-abi3-win_amd64.whl", hash = "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9"},
+    {file = "protobuf-4.25.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d"},
+    {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62"},
+    {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020"},
+    {file = "protobuf-4.25.2-cp38-cp38-win32.whl", hash = "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61"},
+    {file = "protobuf-4.25.2-cp38-cp38-win_amd64.whl", hash = "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62"},
+    {file = "protobuf-4.25.2-cp39-cp39-win32.whl", hash = "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3"},
+    {file = "protobuf-4.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0"},
+    {file = "protobuf-4.25.2-py3-none-any.whl", hash = "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830"},
+    {file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"},
 ]
 
 [[package]]
@@ -2219,7 +2241,6 @@ files = [
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
-    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
     {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
     {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
     {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@@ -2227,15 +2248,8 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
-    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
     {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
-    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
     {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -2252,7 +2266,6 @@ files = [
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
-    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
     {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
     {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
     {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@@ -2260,7 +2273,6 @@ files = [
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
-    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
     {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
     {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
     {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@@ -2496,28 +2508,28 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "ruff"
-version = "0.1.11"
+version = "0.1.13"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.1.11-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:a7f772696b4cdc0a3b2e527fc3c7ccc41cdcb98f5c80fdd4f2b8c50eb1458196"},
-    {file = "ruff-0.1.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:934832f6ed9b34a7d5feea58972635c2039c7a3b434fe5ba2ce015064cb6e955"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea0d3e950e394c4b332bcdd112aa566010a9f9c95814844a7468325290aabfd9"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9bd4025b9c5b429a48280785a2b71d479798a69f5c2919e7d274c5f4b32c3607"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1ad00662305dcb1e987f5ec214d31f7d6a062cae3e74c1cbccef15afd96611d"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:4b077ce83f47dd6bea1991af08b140e8b8339f0ba8cb9b7a484c30ebab18a23f"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4a88efecec23c37b11076fe676e15c6cdb1271a38f2b415e381e87fe4517f18"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b25093dad3b055667730a9b491129c42d45e11cdb7043b702e97125bcec48a1"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:231d8fb11b2cc7c0366a326a66dafc6ad449d7fcdbc268497ee47e1334f66f77"},
-    {file = "ruff-0.1.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:09c415716884950080921dd6237767e52e227e397e2008e2bed410117679975b"},
-    {file = "ruff-0.1.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0f58948c6d212a6b8d41cd59e349751018797ce1727f961c2fa755ad6208ba45"},
-    {file = "ruff-0.1.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:190a566c8f766c37074d99640cd9ca3da11d8deae2deae7c9505e68a4a30f740"},
-    {file = "ruff-0.1.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6464289bd67b2344d2a5d9158d5eb81025258f169e69a46b741b396ffb0cda95"},
-    {file = "ruff-0.1.11-py3-none-win32.whl", hash = "sha256:9b8f397902f92bc2e70fb6bebfa2139008dc72ae5177e66c383fa5426cb0bf2c"},
-    {file = "ruff-0.1.11-py3-none-win_amd64.whl", hash = "sha256:eb85ee287b11f901037a6683b2374bb0ec82928c5cbc984f575d0437979c521a"},
-    {file = "ruff-0.1.11-py3-none-win_arm64.whl", hash = "sha256:97ce4d752f964ba559c7023a86e5f8e97f026d511e48013987623915431c7ea9"},
-    {file = "ruff-0.1.11.tar.gz", hash = "sha256:f9d4d88cb6eeb4dfe20f9f0519bd2eaba8119bde87c3d5065c541dbae2b5a2cb"},
+    {file = "ruff-0.1.13-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e3fd36e0d48aeac672aa850045e784673449ce619afc12823ea7868fcc41d8ba"},
+    {file = "ruff-0.1.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9fb6b3b86450d4ec6a6732f9f60c4406061b6851c4b29f944f8c9d91c3611c7a"},
+    {file = "ruff-0.1.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b13ba5d7156daaf3fd08b6b993360a96060500aca7e307d95ecbc5bb47a69296"},
+    {file = "ruff-0.1.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9ebb40442f7b531e136d334ef0851412410061e65d61ca8ce90d894a094feb22"},
+    {file = "ruff-0.1.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226b517f42d59a543d6383cfe03cccf0091e3e0ed1b856c6824be03d2a75d3b6"},
+    {file = "ruff-0.1.13-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5f0312ba1061e9b8c724e9a702d3c8621e3c6e6c2c9bd862550ab2951ac75c16"},
+    {file = "ruff-0.1.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2f59bcf5217c661254bd6bc42d65a6fd1a8b80c48763cb5c2293295babd945dd"},
+    {file = "ruff-0.1.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6894b00495e00c27b6ba61af1fc666f17de6140345e5ef27dd6e08fb987259d"},
+    {file = "ruff-0.1.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1600942485c6e66119da294c6294856b5c86fd6df591ce293e4a4cc8e72989"},
+    {file = "ruff-0.1.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ee3febce7863e231a467f90e681d3d89210b900d49ce88723ce052c8761be8c7"},
+    {file = "ruff-0.1.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dcaab50e278ff497ee4d1fe69b29ca0a9a47cd954bb17963628fa417933c6eb1"},
+    {file = "ruff-0.1.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f57de973de4edef3ad3044d6a50c02ad9fc2dff0d88587f25f1a48e3f72edf5e"},
+    {file = "ruff-0.1.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:7a36fa90eb12208272a858475ec43ac811ac37e91ef868759770b71bdabe27b6"},
+    {file = "ruff-0.1.13-py3-none-win32.whl", hash = "sha256:a623349a505ff768dad6bd57087e2461be8db58305ebd5577bd0e98631f9ae69"},
+    {file = "ruff-0.1.13-py3-none-win_amd64.whl", hash = "sha256:f988746e3c3982bea7f824c8fa318ce7f538c4dfefec99cd09c8770bd33e6539"},
+    {file = "ruff-0.1.13-py3-none-win_arm64.whl", hash = "sha256:6bbbc3042075871ec17f28864808540a26f0f79a4478c357d3e3d2284e832998"},
+    {file = "ruff-0.1.13.tar.gz", hash = "sha256:e261f1baed6291f434ffb1d5c6bd8051d1c2a26958072d38dfbec39b3dda7352"},
 ]
 
 [[package]]
@@ -3212,9 +3224,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [extras]
 fastembed = ["fastembed"]
 hybrid = ["pinecone-text"]
-local = ["torch", "transformers"]
+local = ["llama-cpp-python", "torch", "transformers"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "5b459c6820bcf5c2b73daf0ecfcbbac95019311c74d88634bd7188650e48b749"
+content-hash = "08cbfde28f9021460deb0fefacac6983d3f48b5a040f2c0e665e76b0924d4122"
diff --git a/pyproject.toml b/pyproject.toml
index 45f105fd9ac63138f7b5d4d82b6300d1052f2ccb..5471837c17b95de29e9ff3a40eb1d166a9b70e1d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,11 +25,12 @@ pinecone-text = {version = "^0.7.1", optional = true}
 fastembed = {version = "^0.1.3", optional = true, python = "<3.12"}
 torch = {version = "^2.1.2", optional = true}
 transformers = {version = "^4.36.2", optional = true}
+llama-cpp-python = "^0.2.28"
 
 [tool.poetry.extras]
 hybrid = ["pinecone-text"]
 fastembed = ["fastembed"]
-local = ["torch", "transformers"]
+local = ["torch", "transformers", "llama-cpp-python"]
 
 [tool.poetry.group.dev.dependencies]
 ipykernel = "^6.25.0"
diff --git a/semantic_router/llms/__init__.py b/semantic_router/llms/__init__.py
index e5aedc85fd30cc0b576fc2170c1b7ca694bdf200..02b3fd5b2422e718fcdf9fd4b34e4ace7fb3d957 100644
--- a/semantic_router/llms/__init__.py
+++ b/semantic_router/llms/__init__.py
@@ -1,6 +1,7 @@
 from semantic_router.llms.base import BaseLLM
 from semantic_router.llms.cohere import CohereLLM
+from semantic_router.llms.llamacpp import LlamaCppLLM
 from semantic_router.llms.openai import OpenAILLM
 from semantic_router.llms.openrouter import OpenRouterLLM
 
-__all__ = ["BaseLLM", "OpenAILLM", "OpenRouterLLM", "CohereLLM"]
+__all__ = ["BaseLLM", "OpenAILLM", "OpenRouterLLM", "CohereLLM", "LlamaCppLLM"]
diff --git a/semantic_router/llms/base.py b/semantic_router/llms/base.py
index 2560261173e61bac8f7769b12028261e812b1327..40a7f13aa9a42ac5518f136be06dbe8d1c8eecfe 100644
--- a/semantic_router/llms/base.py
+++ b/semantic_router/llms/base.py
@@ -1,8 +1,10 @@
-from typing import List, Optional
+import json
+from typing import Any, Optional, List
 
 from pydantic import BaseModel
 
 from semantic_router.schema import Message
+from semantic_router.utils.logger import logger
 
 
 class BaseLLM(BaseModel):
@@ -13,3 +15,64 @@ class BaseLLM(BaseModel):
 
     def __call__(self, messages: List[Message]) -> Optional[str]:
         raise NotImplementedError("Subclasses must implement this method")
+
+    def _is_valid_inputs(self, inputs: dict[str, Any], function_schema: dict[str, Any]) -> bool:
+        """Validate the extracted inputs against the function schema"""
+        try:
+            # Extract parameter names and types from the signature string
+            signature = function_schema["signature"]
+            param_info = [param.strip() for param in signature[1:-1].split(",")]
+            param_names = [info.split(":")[0].strip() for info in param_info]
+            param_types = [info.split(":")[1].strip().split("=")[0].strip() for info in param_info]
+
+            for name, type_str in zip(param_names, param_types):
+                if name not in inputs:
+                    logger.error(f"Input {name} missing from query")
+                    return False
+            return True
+        except Exception as e:
+            logger.error(f"Input validation error: {str(e)}")
+            return False
+
+    def extract_function_inputs(self, query: str, function_schema: dict[str, Any]) -> dict:
+        logger.info("Extracting function input...")
+
+        prompt = f"""
+        You are a helpful assistant designed to output JSON.
+        Given the following function schema
+        << {function_schema} >>
+        and query
+        << {query} >>
+        extract the parameters values from the query, in a valid JSON format.
+        Example:
+        Input:
+        query: "How is the weather in Hawaii right now in International units?"
+        schema:
+        {{
+            "name": "get_weather",
+            "description": "Useful to get the weather in a specific location",
+            "signature": "(location: str, degree: str) -> str",
+            "output": "<class 'str'>",
+        }}
+
+        Result: {{
+            "location": "London",
+            "degree": "Celsius",
+        }}
+
+        Input:
+        query: {query}
+        schema: {function_schema}
+        Result:
+        """
+        llm_input = [Message(role="user", content=prompt)]
+        output = self(llm_input)
+        if not output:
+            raise Exception("No output generated for extract function input")
+
+        output = output.replace("'", '"').strip().rstrip(",")
+
+        function_inputs = json.loads(output)
+        if not self._is_valid_inputs(function_inputs, function_schema):
+            raise ValueError("Invalid inputs")
+        return function_inputs
diff --git a/semantic_router/llms/grammars/json.gbnf b/semantic_router/llms/grammars/json.gbnf
new file mode 100644
index 0000000000000000000000000000000000000000..a9537cdf9fbe49c79967090eab759973c52f2136
--- /dev/null
+++ b/semantic_router/llms/grammars/json.gbnf
@@ -0,0 +1,25 @@
+root   ::= object
+value  ::= object | array | string | number | ("true" | "false" | "null") ws
+
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}" ws
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]" ws
+
+string ::=
+  "\"" (
+    [^"\\] |
+    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+  )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
+
+# Optional space: by convention, applied in this grammar after literal chars when allowed
+ws ::= ([ \t\n] ws)?
diff --git a/semantic_router/llms/llamacpp.py b/semantic_router/llms/llamacpp.py
new file mode 100644
index 0000000000000000000000000000000000000000..58e4dc3e096a362f304abc4a80c3ad1f2a4b5525
--- /dev/null
+++ b/semantic_router/llms/llamacpp.py
@@ -0,0 +1,64 @@
+import json
+from pathlib import Path
+from typing import Any
+from contextlib import contextmanager
+
+from llama_cpp import Llama, LlamaGrammar
+
+from semantic_router.llms.base import BaseLLM
+from semantic_router.schema import Message
+from semantic_router.utils.logger import logger
+
+
+class LlamaCppLLM(BaseLLM):
+    llm: Llama | None
+    temperature: float | None
+    max_tokens: int | None
+    grammar: LlamaGrammar | None
+
+    def __init__(
+        self,
+        llm: Llama,
+        name: str = "llama.cpp",
+        temperature: float = 0.2,
+        max_tokens: int = 200,
+    ):
+        super().__init__(name=name)
+        self.llm = llm
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+
+    def __call__(
+        self,
+        messages: list[Message],
+    ) -> str:
+        try:
+            completion = self.llm.create_chat_completion(
+                messages=[m.to_llamacpp() for m in messages],
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                grammar=self.grammar,
+            )
+
+            output = completion["choices"][0]["message"]["content"]
+
+            if not output:
+                raise Exception("No output generated")
+            return output
+        except Exception as e:
+            logger.error(f"LLM error: {e}")
+            raise
+
+    @contextmanager
+    def _grammar(self):
+        grammar_path = Path(__file__).parent.joinpath("grammars", "json.gbnf")
+        assert grammar_path.exists(), f"{grammar_path}\ndoes not exist"
+        try:
+            self.grammar = LlamaGrammar.from_file(grammar_path)
+            yield
+        finally:
+            self.grammar = None
+
+    def extract_function_inputs(self, query: str, function_schema: dict[str, Any]) -> dict:
+        with self._grammar():
+            return super().extract_function_inputs(query=query, function_schema=function_schema)
diff --git a/semantic_router/route.py b/semantic_router/route.py
index 3934d64fb700c3b61606fd7929fcbe85e9ba56e5..8c797ccec7e3460669ff4e5830f399cd156df5c3 100644
--- a/semantic_router/route.py
+++ b/semantic_router/route.py
@@ -19,17 +19,13 @@ def is_valid(route_config: str) -> bool:
             for item in output_json:
                 missing_keys = [key for key in required_keys if key not in item]
                 if missing_keys:
-                    logger.warning(
-                        f"Missing keys in route config: {', '.join(missing_keys)}"
-                    )
+                    logger.warning(f"Missing keys in route config: {', '.join(missing_keys)}")
                     return False
             return True
         else:
             missing_keys = [key for key in required_keys if key not in output_json]
             if missing_keys:
-                logger.warning(
-                    f"Missing keys in route config: {', '.join(missing_keys)}"
-                )
+                logger.warning(f"Missing keys in route config: {', '.join(missing_keys)}")
                 return False
             else:
                 return True
@@ -48,14 +44,9 @@ class Route(BaseModel):
     def __call__(self, query: str) -> RouteChoice:
         if self.function_schema:
             if not self.llm:
-                raise ValueError(
-                    "LLM is required for dynamic routes. Please ensure the `llm` "
-                    "attribute is set."
-                )
+                raise ValueError("LLM is required for dynamic routes. Please ensure the `llm` " "attribute is set.")
             # if a function schema is provided we generate the inputs
-            extracted_inputs = function_call.extract_function_inputs(
-                query=query, llm=self.llm, function_schema=self.function_schema
-            )
+            extracted_inputs = self.llm.extract_function_inputs(query=query, function_schema=self.function_schema)
             func_call = extracted_inputs
         else:
             # otherwise we just pass None for the call
diff --git a/semantic_router/schema.py b/semantic_router/schema.py
index 7529750df20999e767b50b21517a297994ee75ca..e4825999f37136f1c99ef7e89c8e69dd633d3ce2 100644
--- a/semantic_router/schema.py
+++ b/semantic_router/schema.py
@@ -63,6 +63,9 @@ class Message(BaseModel):
     def to_cohere(self):
         return {"role": self.role, "message": self.content}
 
+    def to_llamacpp(self):
+        return {"role": self.role, "content": self.content}
+
 
 class Conversation(BaseModel):
     messages: List[Message]
diff --git a/semantic_router/utils/function_call.py b/semantic_router/utils/function_call.py
index fd009c40f1ca96ecd1c709f4af9b1bb4f13e68c9..2c0be647a095b15e28048a5972a37124619dacf7 100644
--- a/semantic_router/utils/function_call.py
+++ b/semantic_router/utils/function_call.py
@@ -5,6 +5,7 @@ from typing import Any, Callable, Dict, List, Union
 from pydantic import BaseModel
 
 from semantic_router.llms import BaseLLM
+from semantic_router.llms.llamacpp import LlamaCppLLM
 from semantic_router.schema import Message, RouteChoice
 from semantic_router.utils.logger import logger
 
@@ -18,9 +19,7 @@ def get_schema(item: Union[BaseModel, Callable]) -> Dict[str, Any]:
 
             if default_value:
                 default_repr = repr(default_value)
-                signature_part = (
-                    f"{field_name}: {field_model.__name__} = {default_repr}"
-                )
+                signature_part = f"{field_name}: {field_model.__name__} = {default_repr}"
             else:
                 signature_part = f"{field_name}: {field_model.__name__}"
 
@@ -41,80 +40,8 @@ def get_schema(item: Union[BaseModel, Callable]) -> Dict[str, Any]:
     return schema
 
 
-def extract_function_inputs(
-    query: str, llm: BaseLLM, function_schema: Dict[str, Any]
-) -> Dict[str, Any]:
-    logger.info("Extracting function input...")
-
-    prompt = f"""
-You are a helpful assistant designed to output JSON.
-Given the following function schema
-<< {function_schema} >>
-and query
-<< {query} >>
-extract the parameters values from the query, in a valid JSON format.
-Example:
-Input:
-query: "How is the weather in Hawaii right now in International units?"
-schema:
-{{
-    "name": "get_weather",
-    "description": "Useful to get the weather in a specific location",
-    "signature": "(location: str, degree: str) -> float",
-    "output": "<class 'float'>",
-}}
-
-Result:
-{{
-    "location": "Hawaii",
-    "degree": "Kelvin",
-}}
-
-Input:
-query: \"{query}\"
-schema:
-{json.dumps(function_schema, indent=4)}
-
-Result:
-"""
-    llm_input = [Message(role="user", content=prompt)]
-    output = llm(llm_input)
-    if not output:
-        raise Exception("No output generated for extract function input")
-
-    output = output.replace("'", '"').strip().rstrip(",")
-
-    function_inputs = json.loads(output)
-    if not is_valid_inputs(function_inputs, function_schema):
-        raise ValueError("Invalid inputs")
-    return function_inputs
-
-
-def is_valid_inputs(inputs: Dict[str, Any], function_schema: Dict[str, Any]) -> bool:
-    """Validate the extracted inputs against the function schema"""
-    try:
-        # Extract parameter names and types from the signature string
-        signature = function_schema["signature"]
-        param_info = [param.strip() for param in signature[1:-1].split(",")]
-        param_names = [info.split(":")[0].strip() for info in param_info]
-        param_types = [
-            info.split(":")[1].strip().split("=")[0].strip() for info in param_info
-        ]
-
-        for name, type_str in zip(param_names, param_types):
-            if name not in inputs:
-                logger.error(f"Input {name} missing from query")
-                return False
-        return True
-    except Exception as e:
-        logger.error(f"Input validation error: {str(e)}")
-        return False
-
-
 # TODO: Add route layer object to the input, solve circular import issue
-async def route_and_execute(
-    query: str, llm: BaseLLM, functions: List[Callable], layer
-) -> Any:
+async def route_and_execute(query: str, llm: BaseLLM, functions: List[Callable], layer) -> Any:
     route_choice: RouteChoice = layer(query)
 
     for function in functions:
diff --git a/tests/unit/llms/test_llm_llamacpp.py b/tests/unit/llms/test_llm_llamacpp.py
new file mode 100644
index 0000000000000000000000000000000000000000..223fdaf1eeefed21015f1c3dd94adc1fa5e1ec71
--- /dev/null
+++ b/tests/unit/llms/test_llm_llamacpp.py
@@ -0,0 +1,27 @@
+import pytest
+
+from semantic_router.llms import LlamaCppLLM
+from semantic_router.schema import Message
+
+
+@pytest.fixture
+def llamacpp_llm(mocker):
+    llm = mocker.Mock()
+    return LlamaCppLLM(llm=llm)
+
+
+class TestLlamaCppLLM:
+    def test_llamacpp_llm_init_success(self, llamacpp_llm):
+        assert llamacpp_llm.name == "llama.cpp"
+        assert llamacpp_llm.temperature == 0.2
+        assert llamacpp_llm.max_tokens == 200
+        assert llamacpp_llm.llm is not None
+
+    def test_llamacpp_llm_call_success(self, llamacpp_llm, mocker):
+        llamacpp_llm.llm.create_chat_completion = mocker.Mock(
+            return_value={"choices": [{"message": {"content": "test"}}]}
+        )
+
+        llm_input = [Message(role="user", content="test")]
+        output = llamacpp_llm(llm_input)
+        assert output == "test"