diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py b/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py index 61d28b47d2787c2990174727c5c7dde653921331..80b32393c160d49f5ac42e360f684dbebc6559eb 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py @@ -10,7 +10,6 @@ from llama_index.core.base.llms.generic_utils import ( get_from_param_or_env, ) -from llama_index.llms.nvidia.utils import API_CATALOG_MODELS from llama_index.llms.openai_like import OpenAILike @@ -54,10 +53,19 @@ class NVIDIA(OpenAILike): @property def available_models(self) -> List[Model]: - ids = API_CATALOG_MODELS.keys() + exclude = { + "mistralai/mixtral-8x22b-v0.1", # not a /chat/completion endpoint + } + # do not exclude models in nim mode. the nim administrator has control + # over the model name and may deploy an excluded name on the nim's + # /chat/completion endpoint. if self._mode == "nim": - ids = [model.id for model in self._get_client().models.list()] - return [Model(id=name) for name in ids] + exclude = set() + return [ + model + for model in self._get_client().models.list().data + if model.id not in exclude + ] @classmethod def class_name(cls) -> str: diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml index e6a493855f714712b867e4f8bcf2f5a140c15235..9321b6536d96a970a53beb400c0e3e1e9438816d 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml @@ -30,7 +30,7 @@ license = "MIT" name = "llama-index-llms-nvidia" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.1" +version = "0.1.2" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py index 6915a796816051dd6e6194cb0add74230b3020d7..f26fb1e569dc9cbc4ab37a94eb443656dfa9af1a 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py @@ -73,3 +73,16 @@ async def test_astream_complete(chat_model: str, mode: dict) -> None: responses = [response async for response in gen] assert all(isinstance(response, CompletionResponse) for response in responses) assert all(isinstance(response.delta, str) for response in responses) + + +@pytest.mark.integration() +@pytest.mark.parametrize( + "excluded", + [ + "mistralai/mixtral-8x22b-v0.1", # not a /chat/completion endpoint + ], +) +def test_exclude_models(mode: dict, excluded: str) -> None: + assert excluded not in [ + model.id for model in NVIDIA().mode(**mode).available_models + ]