diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py b/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py
index 61d28b47d2787c2990174727c5c7dde653921331..80b32393c160d49f5ac42e360f684dbebc6559eb 100644
--- a/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py
+++ b/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py
@@ -10,7 +10,6 @@ from llama_index.core.base.llms.generic_utils import (
     get_from_param_or_env,
 )
 
-from llama_index.llms.nvidia.utils import API_CATALOG_MODELS
 
 from llama_index.llms.openai_like import OpenAILike
 
@@ -54,10 +53,19 @@ class NVIDIA(OpenAILike):
 
     @property
     def available_models(self) -> List[Model]:
-        ids = API_CATALOG_MODELS.keys()
+        exclude = {
+            "mistralai/mixtral-8x22b-v0.1",  # not a /chat/completion endpoint
+        }
+        # do not exclude models in nim mode. the nim administrator has control
+        # over the model name and may deploy an excluded name on the nim's
+        # /chat/completion endpoint.
         if self._mode == "nim":
-            ids = [model.id for model in self._get_client().models.list()]
-        return [Model(id=name) for name in ids]
+            exclude = set()
+        return [
+            model
+            for model in self._get_client().models.list().data
+            if model.id not in exclude
+        ]
 
     @classmethod
     def class_name(cls) -> str:
diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml
index e6a493855f714712b867e4f8bcf2f5a140c15235..9321b6536d96a970a53beb400c0e3e1e9438816d 100644
--- a/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml
+++ b/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml
@@ -30,7 +30,7 @@ license = "MIT"
 name = "llama-index-llms-nvidia"
 packages = [{include = "llama_index/"}]
 readme = "README.md"
-version = "0.1.1"
+version = "0.1.2"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py
index 6915a796816051dd6e6194cb0add74230b3020d7..f26fb1e569dc9cbc4ab37a94eb443656dfa9af1a 100644
--- a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py
+++ b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py
@@ -73,3 +73,16 @@ async def test_astream_complete(chat_model: str, mode: dict) -> None:
     responses = [response async for response in gen]
     assert all(isinstance(response, CompletionResponse) for response in responses)
     assert all(isinstance(response.delta, str) for response in responses)
+
+
+@pytest.mark.integration()
+@pytest.mark.parametrize(
+    "excluded",
+    [
+        "mistralai/mixtral-8x22b-v0.1",  # not a /chat/completion endpoint
+    ],
+)
+def test_exclude_models(mode: dict, excluded: str) -> None:
+    assert excluded not in [
+        model.id for model in NVIDIA().mode(**mode).available_models
+    ]