diff --git a/semantic_router/llms/llamacpp.py b/semantic_router/llms/llamacpp.py
index e6787953c089224200b66daba954f051751d56f6..d344636ffe41c24541227a4ad92c469ec43d87de 100644
--- a/semantic_router/llms/llamacpp.py
+++ b/semantic_router/llms/llamacpp.py
@@ -2,7 +2,7 @@ from contextlib import contextmanager
 from pathlib import Path
 from typing import Any, Optional
 
-from llama_cpp import Llama, LlamaGrammar, CreateChatCompletionResponse
+from llama_cpp import Llama, LlamaGrammar
 
 from semantic_router.llms.base import BaseLLM
 from semantic_router.schema import Message
@@ -57,7 +57,7 @@ class LlamaCppLLM(BaseLLM):
                 grammar=self.grammar,
                 stream=False,
             )
-            assert type(completion) is CreateChatCompletionResponse
+            assert isinstance(completion, dict)  # keep mypy happy
             output = completion["choices"][0]["message"]["content"]
 
             if not output: