diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py index cdf2cc7dc9dca9f1e09eb3fe42db038b730d4e40..7d0c99072b0d70b0c90d344b3c8e71c2cf581203 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py @@ -1,6 +1,8 @@ """NVIDIA embeddings file.""" from typing import Any, List, Literal, Optional +import warnings +from deprecated import deprecated from llama_index.core.base.embeddings.base import ( DEFAULT_EMBED_BATCH_SIZE, @@ -12,14 +14,16 @@ from llama_index.core.base.llms.generic_utils import get_from_param_or_env from openai import OpenAI, AsyncOpenAI -BASE_RETRIEVAL_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia" +BASE_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia/" DEFAULT_MODEL = "NV-Embed-QA" MODEL_ENDPOINT_MAP = { - DEFAULT_MODEL: BASE_RETRIEVAL_URL, + DEFAULT_MODEL: BASE_URL, "snowflake/arctic-embed-l": "https://ai.api.nvidia.com/v1/retrieval/snowflake/arctic-embed-l", } +KNOWN_URLS = list(MODEL_ENDPOINT_MAP.values()) + class Model(BaseModel): id: str @@ -55,6 +59,7 @@ class NVIDIAEmbedding(BaseEmbedding): _client: Any = PrivateAttr() _aclient: Any = PrivateAttr() _mode: str = PrivateAttr("nvidia") + _is_hosted: bool = PrivateAttr(True) def __init__( self, @@ -63,22 +68,62 @@ class NVIDIAEmbedding(BaseEmbedding): max_retries: Optional[int] = 5, nvidia_api_key: Optional[str] = None, api_key: Optional[str] = None, + base_url: Optional[str] = None, embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE, # This could default to 50 callback_manager: Optional[CallbackManager] = None, **kwargs: Any, ): + """ + Construct an Embedding interface for NVIDIA NIM. + + This constructor initializes an instance of the NVIDIAEmbedding class, which provides + an interface for embedding text using NVIDIA's NIM service. + + Parameters: + - model (str, optional): The name of the model to use for embeddings. + - timeout (float, optional): The timeout for requests to the NIM service, in seconds. Defaults to 120. + - max_retries (int, optional): The maximum number of retries for requests to the NIM service. Defaults to 5. + - nvidia_api_key (str, optional): The API key for the NIM service. This is required if using a hosted NIM. + - api_key (str, optional): An alternative parameter for providing the API key. + - base_url (str, optional): The base URL for the NIM service. If not provided, the service will default to a hosted NIM. + - **kwargs: Additional keyword arguments. + + API Keys: + - The recommended way to provide the API key is through the `NVIDIA_API_KEY` environment variable. + + Note: + - Switch from a hosted NIM (default) to an on-premises NIM using the `base_url` parameter. An API key is required for hosted NIM. + """ + super().__init__( + model=model, + embed_batch_size=embed_batch_size, + callback_manager=callback_manager, + **kwargs, + ) + if embed_batch_size > 259: raise ValueError("The batch size should not be larger than 259.") + if base_url is None: + # TODO: we should not assume unknown models are at the base url, but + # we cannot error out here because + # NVIDIAEmbedding(model="special").mode("nim", base_url=...) + # is valid usage + base_url = MODEL_ENDPOINT_MAP.get(model, BASE_URL) + api_key = get_from_param_or_env( - "api_key", nvidia_api_key or api_key, "NVIDIA_API_KEY", "none" + "api_key", + nvidia_api_key or api_key, + "NVIDIA_API_KEY", + "NO_API_KEY_PROVIDED", ) - # TODO: we should not assume unknown models are at the base url, but - # we cannot error out here because - # NVIDIAEmbedding(model="special").mode("nim", base_url=...) - # is valid usage - base_url = MODEL_ENDPOINT_MAP.get(model, BASE_RETRIEVAL_URL) + self._is_hosted = base_url in KNOWN_URLS + + if self._is_hosted and api_key == "NO_API_KEY_PROVIDED": + warnings.warn( + "An API key is required for hosted NIM. This will become an error in 0.2.0." + ) self._client = OpenAI( api_key=api_key, @@ -96,18 +141,11 @@ class NVIDIAEmbedding(BaseEmbedding): ) self._aclient._custom_headers = {"User-Agent": "llama-index-embeddings-nvidia"} - super().__init__( - model=model, - embed_batch_size=embed_batch_size, - callback_manager=callback_manager, - **kwargs, - ) - @property def available_models(self) -> List[Model]: """Get available models.""" ids = MODEL_ENDPOINT_MAP.keys() - if self._mode == "nim": + if not self._is_hosted: ids = [model.id for model in self._client.models.list()] return [Model(id=id) for id in ids] @@ -115,6 +153,10 @@ class NVIDIAEmbedding(BaseEmbedding): def class_name(cls) -> str: return "NVIDIAEmbedding" + @deprecated( + version="0.1.2", + reason="Will be removed in 0.2. Construct with `base_url` instead.", + ) def mode( self, mode: Optional[Literal["nvidia", "nim"]] = "nvidia", @@ -123,14 +165,20 @@ class NVIDIAEmbedding(BaseEmbedding): model: Optional[str] = None, api_key: Optional[str] = None, ) -> "NVIDIAEmbedding": + """ + Deprecated: use NVIDIAEmbedding(base_url="...") instead. + """ if mode == "nim": if not base_url: raise ValueError("base_url is required for nim mode") + if mode == "nvidia": + api_key = get_from_param_or_env("api_key", api_key, "NVIDIA_API_KEY") if not base_url: # TODO: we should not assume unknown models are at the base url - base_url = MODEL_ENDPOINT_MAP.get(model or self.model, BASE_RETRIEVAL_URL) + base_url = MODEL_ENDPOINT_MAP.get(model or self.model, BASE_URL) self._mode = mode + self._is_hosted = base_url in KNOWN_URLS if base_url: self._client.base_url = base_url self._aclient.base_url = base_url diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml index 7256f262c5b28c270658cdcefdca0a76be932922..18eb13a9eb386f37aa4d326c71ffa2b4a82a0d53 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-embeddings-nvidia" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/conftest.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/conftest.py index 6a5fb78d6aeb06cf23c13a400bccbc4966bf620e..fdbc0ab0aa0319d5797a5ac618324628299fb2fa 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/conftest.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/conftest.py @@ -1,12 +1,25 @@ import pytest import os -from llama_index.embeddings.nvidia import NVIDIAEmbedding +from llama_index.embeddings.nvidia import NVIDIAEmbedding as Interface from llama_index.embeddings.nvidia.base import DEFAULT_MODEL from typing import Generator -from contextlib import contextmanager + +# this fixture is used to mask the NVIDIA_API_KEY environment variable and restore it +# after the test. it also returns the value of the NVIDIA_API_KEY environment variable +# before it was masked so that it can be used in the test. +@pytest.fixture() +def masked_env_var() -> Generator[str, None, None]: + var = "NVIDIA_API_KEY" + try: + if val := os.environ.get(var, None): + del os.environ[var] + yield val + finally: + if val: + os.environ[var] = val def pytest_collection_modifyitems(config, items): @@ -42,7 +55,7 @@ def pytest_addoption(parser: pytest.Parser) -> None: def get_mode(config: pytest.Config) -> dict: nim_endpoint = config.getoption("--nim-endpoint") if nim_endpoint: - return {"mode": "nim", "base_url": nim_endpoint} + return {"base_url": nim_endpoint} return {} @@ -54,23 +67,10 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: if model := metafunc.config.getoption("--model-id"): models = [model] elif metafunc.config.getoption("--all-models"): - models = [ - model.id for model in NVIDIAEmbedding().mode(**mode).available_models - ] + models = [model.id for model in Interface(**mode).available_models] metafunc.parametrize("model", models, ids=models) @pytest.fixture() def mode(request: pytest.FixtureRequest) -> dict: return get_mode(request.config) - - -@contextmanager -def no_env_var(var: str) -> Generator[None, None, None]: - try: - if val := os.environ.get(var, None): - del os.environ[var] - yield - finally: - if val: - os.environ[var] = val diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_api_key.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_api_key.py index 86278a7368ed4c236a2c34f1e91841cc86d19967..3b5b8ae3dbb169da6c863252a50ced1c4c2adef2 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_api_key.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_api_key.py @@ -2,63 +2,63 @@ import os import pytest -from llama_index.embeddings.nvidia import NVIDIAEmbedding +from llama_index.embeddings.nvidia import NVIDIAEmbedding as Interface from typing import Any -from .conftest import no_env_var def get_api_key(instance: Any) -> str: return instance._client.api_key -def test_create_without_api_key() -> None: - with no_env_var("NVIDIA_API_KEY"): - NVIDIAEmbedding() +def test_create_default_url_without_api_key(masked_env_var: str) -> None: + with pytest.warns(UserWarning): + Interface() + + +def test_create_unknown_url_without_api_key(masked_env_var: str) -> None: + Interface(base_url="https://test_url/v1") @pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) -def test_create_with_api_key(param: str) -> None: - with no_env_var("NVIDIA_API_KEY"): - instance = NVIDIAEmbedding(**{param: "just testing no failure"}) - assert get_api_key(instance) == "just testing no failure" +def test_create_with_api_key(param: str, masked_env_var: str) -> None: + instance = Interface(**{param: "just testing no failure"}) + assert get_api_key(instance) == "just testing no failure" -def test_api_key_priority() -> None: - with no_env_var("NVIDIA_API_KEY"): +def test_api_key_priority(masked_env_var: str) -> None: + try: os.environ["NVIDIA_API_KEY"] = "ENV" - assert get_api_key(NVIDIAEmbedding()) == "ENV" - assert get_api_key(NVIDIAEmbedding(nvidia_api_key="PARAM")) == "PARAM" - assert get_api_key(NVIDIAEmbedding(api_key="PARAM")) == "PARAM" - assert ( - get_api_key(NVIDIAEmbedding(api_key="LOW", nvidia_api_key="HIGH")) == "HIGH" - ) + assert get_api_key(Interface()) == "ENV" + assert get_api_key(Interface(nvidia_api_key="PARAM")) == "PARAM" + assert get_api_key(Interface(api_key="PARAM")) == "PARAM" + assert get_api_key(Interface(api_key="LOW", nvidia_api_key="HIGH")) == "HIGH" + finally: + # we must clean up environ or it may impact other tests + del os.environ["NVIDIA_API_KEY"] @pytest.mark.integration() -def test_missing_api_key_error() -> None: - with no_env_var("NVIDIA_API_KEY"): - client = NVIDIAEmbedding() - with pytest.raises(Exception) as exc_info: - client.get_query_embedding("Hello, world!") - message = str(exc_info.value) - assert "401" in message +def test_missing_api_key_error(masked_env_var: str) -> None: + with pytest.warns(UserWarning): + client = Interface() + with pytest.raises(Exception) as exc_info: + client.get_query_embedding("Hello, world!") + message = str(exc_info.value) + assert "401" in message @pytest.mark.integration() -def test_bogus_api_key_error() -> None: - with no_env_var("NVIDIA_API_KEY"): - client = NVIDIAEmbedding(nvidia_api_key="BOGUS") - with pytest.raises(Exception) as exc_info: - client.get_query_embedding("Hello, world!") - message = str(exc_info.value) - assert "401" in message +def test_bogus_api_key_error(masked_env_var: str) -> None: + client = Interface(nvidia_api_key="BOGUS") + with pytest.raises(Exception) as exc_info: + client.get_query_embedding("Hello, world!") + message = str(exc_info.value) + assert "401" in message @pytest.mark.integration() @pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) -def test_api_key(param: str, model: str, mode: dict) -> None: - api_key = os.environ.get("NVIDIA_API_KEY") - with no_env_var("NVIDIA_API_KEY"): - client = NVIDIAEmbedding(**{"model": model, param: api_key}).mode(**mode) - assert client.get_query_embedding("Hello, world!") +def test_api_key(model: str, mode: dict, param: str, masked_env_var: str) -> None: + client = Interface(model=model, **{**mode, **{param: masked_env_var}}) + client.get_query_embedding("Hello, world!") diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_available_models.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_available_models.py index 3b5412b9e6835e4d36b503d4400e91cac0e30936..bdd95fe04185ea148459cb23567d658f9b41b08a 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_available_models.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_available_models.py @@ -1,11 +1,11 @@ import pytest -from llama_index.embeddings.nvidia import NVIDIAEmbedding +from llama_index.embeddings.nvidia import NVIDIAEmbedding as Interface @pytest.mark.integration() def test_available_models(mode: dict) -> None: - models = NVIDIAEmbedding().mode(**mode).available_models + models = Interface(**mode).available_models assert models assert isinstance(models, list) assert all(isinstance(model.id, str) for model in models) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py index 146a017bf8f22c69e5bee978c60b7a47156620d1..30873c1baee59f7ebb50ef1c9e29acbc3ee7601b 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py @@ -5,7 +5,7 @@ from llama_index.embeddings.nvidia import NVIDIAEmbedding @pytest.mark.integration() def test_basic(model: str, mode: dict) -> None: - client = NVIDIAEmbedding(model=model).mode(**mode) + client = NVIDIAEmbedding(model=model, **mode) response = client.get_query_embedding("Hello, world!") assert isinstance(response, list) assert len(response) > 0 diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_mode_switch.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_mode_switch.py index 14d2b0ed09e97e0c248b948f9be72f681e679e10..128f8e7a4e22122f52658ec97462e36607a09555 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_mode_switch.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_mode_switch.py @@ -1,49 +1,80 @@ import pytest -from llama_index.embeddings.nvidia import NVIDIAEmbedding +from llama_index.embeddings.nvidia import NVIDIAEmbedding as Interface +from llama_index.embeddings.nvidia.base import BASE_URL, KNOWN_URLS -from .conftest import no_env_var -# we don't test this because we do not want to force users to have an API key -# NVIDIAEmbedding().mode("nim", base_url=...) must work without an API key -# def test_mode_switch_nvidia_throws_without_key(): -# emb = NVIDIAEmbedding() -# with pytest.raises(ValueError): -# emb.mode("nvidia") +def test_mode_switch_throws_without_key_deprecated(masked_env_var: str): + x = Interface() + with pytest.raises(ValueError): + with pytest.warns(DeprecationWarning): + x.mode("nvidia") -def test_mode_switch_nvidia_with_key(): - with no_env_var("NVIDIA_API_KEY"): - NVIDIAEmbedding().mode("nvidia", api_key="test") +def test_mode_switch_with_key_deprecated(masked_env_var: str): + with pytest.warns(DeprecationWarning): + Interface().mode("nvidia", api_key="test") -def test_mode_switch_nim_throws_without_url(): - emb = NVIDIAEmbedding() +def test_mode_switch_nim_throws_without_url_deprecated(): + instance = Interface() with pytest.raises(ValueError): - emb.mode("nim") + with pytest.warns(DeprecationWarning): + instance.mode("nim") + + +def test_mode_switch_nim_with_url_deprecated(): + with pytest.warns(DeprecationWarning): + Interface().mode("nim", base_url="test") + + +def test_mode_switch_param_setting_deprecated(): + instance = Interface(model="dummy") + + with pytest.warns(DeprecationWarning): + instance1 = instance.mode("nim", base_url="https://test_url/v1/") + assert instance1.model == "dummy" + assert str(instance1._client.base_url) == "https://test_url/v1/" + + with pytest.warns(DeprecationWarning): + instance2 = instance1.mode("nvidia", api_key="test", model="dummy-2") + assert instance2.model == "dummy-2" + assert str(instance2._client.base_url) == BASE_URL + assert instance2._client.api_key == "test" + + +UNKNOWN_URLS = [ + "https://test_url/v1", + "https://test_url/v1/", + "https://test_url/.../v1", + "http://test_url/v1", + "http://test_url/v1/", + "http://test_url/.../v1/", +] + + +@pytest.mark.parametrize("base_url", UNKNOWN_URLS) +def test_mode_switch_unknown_base_url_without_key(masked_env_var: str, base_url: str): + Interface(base_url=base_url) -def test_mode_switch_nim_with_url(): - NVIDIAEmbedding().mode("nim", base_url="test") +@pytest.mark.parametrize("base_url", UNKNOWN_URLS) +@pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) +def test_mode_switch_unknown_base_url_with_key( + masked_env_var: str, param: str, base_url: str +): + Interface(base_url=base_url, **{param: "test"}) -def test_mode_switch_param_setting(): - emb = NVIDIAEmbedding(model="dummy") +@pytest.mark.parametrize("base_url", KNOWN_URLS) +def test_mode_switch_known_base_url_without_key(masked_env_var: str, base_url: str): + with pytest.warns(UserWarning): + Interface(base_url=base_url) - nim_emb = emb.mode("nim", base_url="https://test_url/v1/") - assert nim_emb.model == "dummy" - assert str(nim_emb._client.base_url) == "https://test_url/v1/" - assert str(nim_emb._aclient.base_url) == "https://test_url/v1/" - cat_emb = nim_emb.mode("nvidia", api_key="test", model="dummy-2") - assert cat_emb.model == "dummy-2" - assert ( - str(cat_emb._client.base_url) - == "https://ai.api.nvidia.com/v1/retrieval/nvidia/" - ) - assert ( - str(cat_emb._aclient.base_url) - == "https://ai.api.nvidia.com/v1/retrieval/nvidia/" - ) - assert cat_emb._client.api_key == "test" - assert cat_emb._aclient.api_key == "test" +@pytest.mark.parametrize("base_url", KNOWN_URLS) +@pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) +def test_mode_switch_known_base_url_with_key( + masked_env_var: str, base_url: str, param: str +): + Interface(base_url=base_url, **{param: "test"}) diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py b/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py index 80b32393c160d49f5ac42e360f684dbebc6559eb..2d761043ea4a3f200911fec4b9da91019cd24b0b 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py @@ -4,6 +4,8 @@ from typing import ( List, Literal, ) +from deprecated import deprecated +import warnings from llama_index.core.bridge.pydantic import PrivateAttr, BaseModel from llama_index.core.base.llms.generic_utils import ( @@ -16,6 +18,11 @@ from llama_index.llms.openai_like import OpenAILike DEFAULT_MODEL = "meta/llama3-8b-instruct" BASE_URL = "https://integrate.api.nvidia.com/v1/" +KNOWN_URLS = [ + BASE_URL, + "https://integrate.api.nvidia.com/v1", +] + class Model(BaseModel): id: str @@ -24,6 +31,7 @@ class Model(BaseModel): class NVIDIA(OpenAILike): """NVIDIA's API Catalog Connector.""" + _is_hosted: bool = PrivateAttr(True) _mode: str = PrivateAttr("nvidia") def __init__( @@ -31,9 +39,30 @@ class NVIDIA(OpenAILike): model: str = DEFAULT_MODEL, nvidia_api_key: Optional[str] = None, api_key: Optional[str] = None, + base_url: Optional[str] = BASE_URL, max_tokens: Optional[int] = 1024, **kwargs: Any, ) -> None: + """ + Initialize an instance of the NVIDIA class. + + This class provides an interface to the NVIDIA NIM. By default, it connects to a hosted NIM, + but you can switch to an on-premises NIM by providing a `base_url`. + + Args: + model (str, optional): The model to use for the NIM. + nvidia_api_key (str, optional): The API key for the NVIDIA NIM. Defaults to None. + api_key (str, optional): An alternative parameter for providing the API key. Defaults to None. + base_url (str, optional): The base URL for the NIM. Use this to switch to an on-premises NIM. + max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 1024. + **kwargs: Additional keyword arguments. + + API Keys: + - The recommended way to provide the API key is through the `NVIDIA_API_KEY` environment variable. + + Raises: + DeprecationWarning: If an API key is not provided for a hosted NIM, a warning is issued. This will become an error in version 0.2.0. + """ api_key = get_from_param_or_env( "api_key", nvidia_api_key or api_key, @@ -41,10 +70,17 @@ class NVIDIA(OpenAILike): "NO_API_KEY_PROVIDED", ) + self._is_hosted = base_url in KNOWN_URLS + + if self._is_hosted and api_key == "NO_API_KEY_PROVIDED": + warnings.warn( + "An API key is required for the hosted NIM. This will become an error in 0.2.0.", + ) + super().__init__( model=model, api_key=api_key, - api_base=BASE_URL, + api_base=base_url, max_tokens=max_tokens, is_chat_model=True, default_headers={"User-Agent": "llama-index-llms-nvidia"}, @@ -53,24 +89,24 @@ class NVIDIA(OpenAILike): @property def available_models(self) -> List[Model]: - exclude = { - "mistralai/mixtral-8x22b-v0.1", # not a /chat/completion endpoint - } - # do not exclude models in nim mode. the nim administrator has control - # over the model name and may deploy an excluded name on the nim's - # /chat/completion endpoint. - if self._mode == "nim": - exclude = set() - return [ - model - for model in self._get_client().models.list().data - if model.id not in exclude - ] + models = self._get_client().models.list().data + # only exclude models in hosted mode. in non-hosted mode, the administrator has control + # over the model name and may deploy an excluded name that will work. + if self._is_hosted: + exclude = { + "mistralai/mixtral-8x22b-v0.1", # not a /chat/completion endpoint + } + models = [model for model in models if model.id not in exclude] + return models @classmethod def class_name(cls) -> str: return "NVIDIA" + @deprecated( + version="0.1.3", + reason="Will be removed in 0.2. Construct with `base_url` instead.", + ) def mode( self, mode: Optional[Literal["nvidia", "nim"]] = "nvidia", @@ -80,20 +116,7 @@ class NVIDIA(OpenAILike): api_key: Optional[str] = None, ) -> "NVIDIA": """ - Change the mode. - - There are two modes, "nvidia" and "nim". The "nvidia" mode is the default - mode and is used to interact with hosted NIMs. The "nim" mode is used to - interact with NVIDIA NIM endpoints, which are typically hosted on-premises. - - For the "nvidia" mode, the "api_key" parameter is available to specify - your API key. If not specified, the NVIDIA_API_KEY environment variable - will be used. - - For the "nim" mode, the "base_url" parameter is required and the "model" - parameter may be necessary. Set base_url to the url of your local NIM - endpoint. For instance, "https://localhost:9999/v1". Additionally, the - "model" parameter must be set to the name of the model inside the NIM. + Deprecated: use NVIDIA(base_url="...") instead. """ if mode == "nim": if not base_url: diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml index 9321b6536d96a970a53beb400c0e3e1e9438816d..edee1b459c7d796bb8017b855497df90a468cd2a 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml @@ -30,7 +30,7 @@ license = "MIT" name = "llama-index-llms-nvidia" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/conftest.py b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/conftest.py index 2378be82a59c88c5fbe8afe8a4b77b197172f2b1..20f315ff33aa14c1cea55910d5f5d25487d62731 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/conftest.py +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/conftest.py @@ -55,7 +55,7 @@ def pytest_addoption(parser: pytest.Parser) -> None: def get_mode(config: pytest.Config) -> dict: nim_endpoint = config.getoption("--nim-endpoint") if nim_endpoint: - return {"mode": "nim", "base_url": nim_endpoint} + return {"base_url": nim_endpoint} return {} @@ -67,7 +67,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: if model := metafunc.config.getoption("--model-id"): models = [model] elif metafunc.config.getoption("--all-models"): - models = [model.id for model in NVIDIA().mode(**mode).available_models] + models = [model.id for model in NVIDIA(**mode).available_models] metafunc.parametrize("chat_model", models, ids=models) diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_additional_kwargs.py b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_additional_kwargs.py index d6b82deefdae7c3174a20365595e060316a177de..2536f7cf730cbe69977257b9b73becffeb61c430 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_additional_kwargs.py +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_additional_kwargs.py @@ -5,7 +5,7 @@ from llama_index.llms.nvidia import NVIDIA @pytest.mark.integration() def test_additional_kwargs_success(chat_model: str, mode: dict) -> None: - client = NVIDIA(chat_model).mode(**mode) + client = NVIDIA(chat_model, **mode) assert client.complete( "Hello, world!", stop=["cat", "Cats"], @@ -17,7 +17,7 @@ def test_additional_kwargs_success(chat_model: str, mode: dict) -> None: @pytest.mark.integration() def test_additional_kwargs_wrong_dtype(chat_model: str, mode: dict) -> None: - client = NVIDIA(chat_model).mode(**mode) + client = NVIDIA(chat_model, **mode) with pytest.raises(Exception) as exc_info: client.complete( "Hello, world!", @@ -29,7 +29,7 @@ def test_additional_kwargs_wrong_dtype(chat_model: str, mode: dict) -> None: @pytest.mark.integration() def test_additional_kwargs_wrong_dtype(chat_model: str, mode: dict) -> None: - client = NVIDIA(chat_model).mode(**mode) + client = NVIDIA(chat_model, **mode) with pytest.raises(Exception) as exc_info: client.complete( "Hello, world!", diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_api_key.py b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_api_key.py index b96a7f66c39d4c6f4e81ffe928dcc71814400234..e040b2c4975c33c4274f5842bc412cb44dac43e7 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_api_key.py +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_api_key.py @@ -11,8 +11,13 @@ def get_api_key(instance: Any) -> str: return instance.api_key -def test_create_without_api_key(masked_env_var: str) -> None: - NVIDIA() +def test_create_default_url_without_api_key(masked_env_var: str) -> None: + with pytest.warns(UserWarning): + NVIDIA() + + +def test_create_unknown_url_without_api_key(masked_env_var: str) -> None: + NVIDIA(base_url="https://test_url/v1") @pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) @@ -35,7 +40,8 @@ def test_api_key_priority(masked_env_var: str) -> None: @pytest.mark.integration() def test_missing_api_key_error(masked_env_var: str) -> None: - client = NVIDIA() + with pytest.warns(UserWarning): + client = NVIDIA() with pytest.raises(Exception) as exc_info: client.complete("Hello, world!").text message = str(exc_info.value) @@ -53,6 +59,6 @@ def test_bogus_api_key_error(masked_env_var: str) -> None: @pytest.mark.integration() @pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) -def test_api_key(param: str, masked_env_var: str) -> None: - client = NVIDIA(**{param: masked_env_var}) +def test_api_key(chat_model: str, mode: dict, param: str, masked_env_var: str) -> None: + client = NVIDIA(model=chat_model, **{**mode, **{param: masked_env_var}}) assert client.complete("Hello, world!").text diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_available_models.py b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_available_models.py index 01cdd7f7458ce7126728170687858e2735aea6c2..dd32989604e606c459f383ccabf18727b90a708c 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_available_models.py +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_available_models.py @@ -5,7 +5,7 @@ from llama_index.llms.nvidia import NVIDIA @pytest.mark.integration() def test_available_models(mode: dict) -> None: - models = NVIDIA().mode(**mode).available_models + models = NVIDIA(**mode).available_models assert models assert isinstance(models, list) assert all(isinstance(model.id, str) for model in models) diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py index f26fb1e569dc9cbc4ab37a94eb443656dfa9af1a..2fb7d88c2625cba731dba3f59b24ab5d09ced664 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_integration.py @@ -10,7 +10,7 @@ from llama_index.llms.nvidia import NVIDIA @pytest.mark.integration() def test_chat(chat_model: str, mode: dict) -> None: message = ChatMessage(content="Hello") - response = NVIDIA(model=chat_model).mode(**mode).chat([message]) + response = NVIDIA(model=chat_model, **mode).chat([message]) assert isinstance(response, ChatResponse) assert isinstance(response.message, ChatMessage) assert isinstance(response.message.content, str) @@ -18,7 +18,7 @@ def test_chat(chat_model: str, mode: dict) -> None: @pytest.mark.integration() def test_complete(chat_model: str, mode: dict) -> None: - response = NVIDIA(model=chat_model).mode(**mode).complete("Hello") + response = NVIDIA(model=chat_model, **mode).complete("Hello") assert isinstance(response, CompletionResponse) assert isinstance(response.text, str) @@ -26,14 +26,14 @@ def test_complete(chat_model: str, mode: dict) -> None: @pytest.mark.integration() def test_stream_chat(chat_model: str, mode: dict) -> None: message = ChatMessage(content="Hello") - gen = NVIDIA(model=chat_model).mode(**mode).stream_chat([message]) + gen = NVIDIA(model=chat_model, **mode).stream_chat([message]) assert all(isinstance(response, ChatResponse) for response in gen) assert all(isinstance(response.delta, str) for response in gen) @pytest.mark.integration() def test_stream_complete(chat_model: str, mode: dict) -> None: - gen = NVIDIA(model=chat_model).mode(**mode).stream_complete("Hello") + gen = NVIDIA(model=chat_model, **mode).stream_complete("Hello") assert all(isinstance(response, CompletionResponse) for response in gen) assert all(isinstance(response.delta, str) for response in gen) @@ -42,7 +42,7 @@ def test_stream_complete(chat_model: str, mode: dict) -> None: @pytest.mark.asyncio() async def test_achat(chat_model: str, mode: dict) -> None: message = ChatMessage(content="Hello") - response = await NVIDIA(model=chat_model).mode(**mode).achat([message]) + response = await NVIDIA(model=chat_model, **mode).achat([message]) assert isinstance(response, ChatResponse) assert isinstance(response.message, ChatMessage) assert isinstance(response.message.content, str) @@ -51,7 +51,7 @@ async def test_achat(chat_model: str, mode: dict) -> None: @pytest.mark.integration() @pytest.mark.asyncio() async def test_acomplete(chat_model: str, mode: dict) -> None: - response = await NVIDIA(model=chat_model).mode(**mode).acomplete("Hello") + response = await NVIDIA(model=chat_model, **mode).acomplete("Hello") assert isinstance(response, CompletionResponse) assert isinstance(response.text, str) @@ -60,7 +60,7 @@ async def test_acomplete(chat_model: str, mode: dict) -> None: @pytest.mark.asyncio() async def test_astream_chat(chat_model: str, mode: dict) -> None: message = ChatMessage(content="Hello") - gen = await NVIDIA(model=chat_model).mode(**mode).astream_chat([message]) + gen = await NVIDIA(model=chat_model, **mode).astream_chat([message]) responses = [response async for response in gen] assert all(isinstance(response, ChatResponse) for response in responses) assert all(isinstance(response.delta, str) for response in responses) @@ -69,7 +69,7 @@ async def test_astream_chat(chat_model: str, mode: dict) -> None: @pytest.mark.integration() @pytest.mark.asyncio() async def test_astream_complete(chat_model: str, mode: dict) -> None: - gen = await NVIDIA(model=chat_model).mode(**mode).astream_complete("Hello") + gen = await NVIDIA(model=chat_model, **mode).astream_complete("Hello") responses = [response async for response in gen] assert all(isinstance(response, CompletionResponse) for response in responses) assert all(isinstance(response.delta, str) for response in responses) @@ -83,6 +83,4 @@ async def test_astream_complete(chat_model: str, mode: dict) -> None: ], ) def test_exclude_models(mode: dict, excluded: str) -> None: - assert excluded not in [ - model.id for model in NVIDIA().mode(**mode).available_models - ] + assert excluded not in [model.id for model in NVIDIA(**mode).available_models] diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_mode_switch.py b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_mode_switch.py index 887a12994293732cc9afebad7997a80d567a0d8e..96bda87eed6154bf992defcde82f1bd2da1172c7 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_mode_switch.py +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/tests/test_mode_switch.py @@ -1,37 +1,80 @@ import pytest from llama_index.llms.nvidia import NVIDIA as Interface -from llama_index.llms.nvidia.base import BASE_URL +from llama_index.llms.nvidia.base import BASE_URL, KNOWN_URLS -def test_mode_switch_nvidia_throws_without_key(masked_env_var: str): +def test_mode_switch_nvidia_throws_without_key_deprecated(masked_env_var: str): x = Interface() with pytest.raises(ValueError): - x.mode("nvidia") + with pytest.warns(DeprecationWarning): + x.mode("nvidia") -def test_mode_switch_nvidia_with_key(masked_env_var: str): - Interface().mode("nvidia", api_key="test") +def test_mode_switch_nvidia_with_key_deprecated(masked_env_var: str): + with pytest.warns(DeprecationWarning): + Interface().mode("nvidia", api_key="test") -def test_mode_switch_nim_throws_without_url(): +def test_mode_switch_nim_throws_without_url_deprecated(): instance = Interface() with pytest.raises(ValueError): - instance.mode("nim") + with pytest.warns(DeprecationWarning): + instance.mode("nim") -def test_mode_switch_nim_with_url(): - Interface().mode("nim", base_url="test") +def test_mode_switch_nim_with_url_deprecated(): + with pytest.warns(DeprecationWarning): + Interface().mode("nim", base_url="test") -def test_mode_switch_param_setting(): +def test_mode_switch_param_setting_deprecated(): instance = Interface(model="dummy") - instance1 = instance.mode("nim", base_url="https://test_url/v1/") + with pytest.warns(DeprecationWarning): + instance1 = instance.mode("nim", base_url="https://test_url/v1/") assert instance1.model == "dummy" assert str(instance1.api_base) == "https://test_url/v1/" - instance2 = instance1.mode("nvidia", api_key="test", model="dummy-2") + with pytest.warns(DeprecationWarning): + instance2 = instance1.mode("nvidia", api_key="test", model="dummy-2") assert instance2.model == "dummy-2" assert str(instance2.api_base) == BASE_URL assert instance2.api_key == "test" + + +UNKNOWN_URLS = [ + "https://test_url/v1", + "https://test_url/v1/", + "https://test_url/.../v1", + "http://test_url/v1", + "http://test_url/v1/", + "http://test_url/.../v1/", +] + + +@pytest.mark.parametrize("base_url", UNKNOWN_URLS) +def test_mode_switch_unknown_base_url_without_key(masked_env_var: str, base_url: str): + Interface(base_url=base_url) + + +@pytest.mark.parametrize("base_url", UNKNOWN_URLS) +@pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) +def test_mode_switch_unknown_base_url_with_key( + masked_env_var: str, param: str, base_url: str +): + Interface(base_url=base_url, **{param: "test"}) + + +@pytest.mark.parametrize("base_url", KNOWN_URLS) +def test_mode_switch_known_base_url_without_key(masked_env_var: str, base_url: str): + with pytest.warns(UserWarning): + Interface(base_url=base_url) + + +@pytest.mark.parametrize("base_url", KNOWN_URLS) +@pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) +def test_mode_switch_known_base_url_with_key( + masked_env_var: str, base_url: str, param: str +): + Interface(base_url=base_url, **{param: "test"}) diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py index 91420d68cf3439938b310ab7acd2d8f590e65f09..bed8c6c0718aa9c8ef9452e3886bf0cde0f06d2d 100644 --- a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py +++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py @@ -11,12 +11,19 @@ from llama_index.core.instrumentation.events.rerank import ( from llama_index.core.postprocessor.types import BaseNodePostprocessor from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle import requests - +import warnings +from deprecated import deprecated from llama_index.core.base.llms.generic_utils import get_from_param_or_env DEFAULT_MODEL = "nv-rerank-qa-mistral-4b:1" -DEFAULT_BASE_URL = "https://ai.api.nvidia.com/v1" +BASE_URL = "https://ai.api.nvidia.com/v1" + +MODEL_ENDPOINT_MAP = { + DEFAULT_MODEL: BASE_URL, +} + +KNOWN_URLS = list(MODEL_ENDPOINT_MAP.values()) dispatcher = get_dispatcher(__name__) @@ -45,35 +52,71 @@ class NVIDIARerank(BaseNodePostprocessor): ge=1, description="The maximum batch size supported by the inference server.", ) - _api_key: str = PrivateAttr("API_KEY_NOT_PROVIDED") # TODO: should be SecretStr + _api_key: str = PrivateAttr("NO_API_KEY_PROVIDED") # TODO: should be SecretStr _mode: str = PrivateAttr("nvidia") - _base_url: str = PrivateAttr(DEFAULT_BASE_URL) + _is_hosted: bool = PrivateAttr(True) + _base_url: str = PrivateAttr(BASE_URL) def _set_api_key(self, nvidia_api_key: str = None, api_key: str = None) -> None: self._api_key = get_from_param_or_env( "api_key", nvidia_api_key or api_key, "NVIDIA_API_KEY", - "API_KEY_NOT_PROVIDED", + "NO_API_KEY_PROVIDED", ) def __init__( self, + model: str = DEFAULT_MODEL, nvidia_api_key: Optional[str] = None, api_key: Optional[str] = None, + base_url: Optional[str] = None, **kwargs: Any, ): - super().__init__(**kwargs) + """ + Initialize a NVIDIARerank instance. + + This class provides access to a NVIDIA NIM for reranking. By default, it connects to a hosted NIM, but can be configured to connect to an on-premises NIM using the `base_url` parameter. An API key is required for hosted NIM. - self._set_api_key(nvidia_api_key, api_key) + Args: + model (str): The model to use for reranking. + nvidia_api_key (str, optional): The NVIDIA API key. Defaults to None. + api_key (str, optional): The API key. Defaults to None. + base_url (str, optional): The base URL of the on-premises NIM. Defaults to None. + **kwargs: Additional keyword arguments. + + API Key: + - The recommended way to provide the API key is through the `NVIDIA_API_KEY` environment variable. + """ + super().__init__(model=model, **kwargs) + + self._base_url = base_url or MODEL_ENDPOINT_MAP.get(model, BASE_URL) + + self._api_key = get_from_param_or_env( + "api_key", + nvidia_api_key or api_key, + "NVIDIA_API_KEY", + "NO_API_KEY_PROVIDED", + ) + + self._is_hosted = self._base_url in KNOWN_URLS + + if self._is_hosted and self._api_key == "NO_API_KEY_PROVIDED": + warnings.warn( + "An API key is required for hosted NIM. This will become an error in 0.2.0." + ) @property def available_models(self) -> List[Model]: """Get available models.""" - # there is one model on ai.nvidia.com and available as a local NIM - ids = [DEFAULT_MODEL] + # all available models are in the map + ids = MODEL_ENDPOINT_MAP.keys() return [Model(id=id) for id in ids] + @deprecated( + version="0.1.2", + reason="Will be removed in 0.2. Construct with `base_url` instead.", + ) def mode( self, mode: Literal["nvidia", "nim"] = "nvidia", @@ -83,29 +126,20 @@ class NVIDIARerank(BaseNodePostprocessor): api_key: Optional[str] = None, ) -> "NVIDIARerank": """ - Change the mode. - - There are two modes, "nvidia" and "nim". The "nvidia" mode is the default mode - and is used to interact with hosted NVIDIA NIMs. The "nim" mode is - used to interact with local NVIDIA NIM endpoints, which are typically hosted - on-premises. - - For the "nvidia" mode, the "api_key" parameter is available to specify your - API key. If not specified, the NVIDIA_API_KEY environment variable will be used. - - For the "nim" mode, the "base_url" is required and "model" is recommended. Set - base_url to the url of your NVIDIA NIM endpoint. For instance, - "https://localhost:1976/v1", it should end in "/v1". Additionally, the "model" - parameter must be set to the name of the model inside the NIM. + Deprecated: use NVIDIARerank(base_url=...) instead. """ if isinstance(self, str): raise ValueError("Please construct the model before calling mode()") - if mode == "nim": + self._is_hosted = mode == "nvidia" + + if not self._is_hosted: if not base_url: raise ValueError("base_url is required for nim mode") + else: + api_key = get_from_param_or_env("api_key", api_key, "NVIDIA_API_KEY") if not base_url: - base_url = DEFAULT_BASE_URL + base_url = BASE_URL self._mode = mode if base_url: @@ -128,7 +162,7 @@ class NVIDIARerank(BaseNodePostprocessor): if model: self.model = model if api_key: - self._set_api_key(api_key) + self._api_key = api_key return self @@ -191,7 +225,7 @@ class NVIDIARerank(BaseNodePostprocessor): } # the hosted NIM path is different from the local NIM path url = self._base_url - if self._mode == "nvidia": + if self._is_hosted: url += "/retrieval/nvidia/reranking" else: url += "/ranking" diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/pyproject.toml b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/pyproject.toml index 3e836c160c66fc4274b6fc4714eda9beebcc602f..348f03535142ecf2ffd7b1a17df6c89a9ebf0893 100644 --- a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/pyproject.toml +++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/pyproject.toml @@ -30,7 +30,7 @@ license = "MIT" name = "llama-index-postprocessor-nvidia-rerank" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.1" +version = "0.1.2" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/conftest.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/conftest.py index 1521e3f97aa874d64c1b492652ecd1e21ecc5ea2..6f0b5959ba0ad417fc9aa5746af57a7962097afe 100644 --- a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/conftest.py +++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/conftest.py @@ -1,12 +1,25 @@ import pytest import os -from llama_index.postprocessor.nvidia_rerank import NVIDIARerank +from llama_index.postprocessor.nvidia_rerank import NVIDIARerank as Interface from llama_index.postprocessor.nvidia_rerank.base import DEFAULT_MODEL from typing import Generator -from contextlib import contextmanager + +# this fixture is used to mask the NVIDIA_API_KEY environment variable and restore it +# after the test. it also returns the value of the NVIDIA_API_KEY environment variable +# before it was masked so that it can be used in the test. +@pytest.fixture() +def masked_env_var() -> Generator[str, None, None]: + var = "NVIDIA_API_KEY" + try: + if val := os.environ.get(var, None): + del os.environ[var] + yield val + finally: + if val: + os.environ[var] = val def pytest_collection_modifyitems(config, items): @@ -54,23 +67,10 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: if model := metafunc.config.getoption("--model-id"): models = [model] elif metafunc.config.getoption("--all-models"): - models = [ - model.id for model in NVIDIARerank().mode(**mode).available_models - ] + models = [model.id for model in Interface(**mode).available_models] metafunc.parametrize("model", models, ids=models) @pytest.fixture() def mode(request: pytest.FixtureRequest) -> dict: return get_mode(request.config) - - -@contextmanager -def no_env_var(var: str) -> Generator[None, None, None]: - try: - if val := os.environ.get(var, None): - del os.environ[var] - yield - finally: - if val: - os.environ[var] = val diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_api_key.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_api_key.py index 21ce8398c46936b6f165711165a6653de1058379..86c0a55db2430ccdc4dd4ba5e40078de6010e2e3 100644 --- a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_api_key.py +++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_api_key.py @@ -2,71 +2,73 @@ import os import pytest -from llama_index.postprocessor.nvidia_rerank import NVIDIARerank +from llama_index.postprocessor.nvidia_rerank import NVIDIARerank as Interface from llama_index.core.schema import NodeWithScore, Document from typing import Any -from .conftest import no_env_var def get_api_key(instance: Any) -> str: return instance._api_key -def test_create_without_api_key() -> None: - with no_env_var("NVIDIA_API_KEY"): - NVIDIARerank() +def test_create_default_url_without_api_key(masked_env_var: str) -> None: + with pytest.warns(UserWarning): + Interface() + + +def test_create_unknown_url_without_api_key(masked_env_var: str) -> None: + Interface(base_url="https://test_url/v1") @pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) -def test_create_with_api_key(param: str) -> None: - with no_env_var("NVIDIA_API_KEY"): - instance = NVIDIARerank(**{param: "just testing no failure"}) - assert get_api_key(instance) == "just testing no failure" +def test_create_with_api_key(param: str, masked_env_var: str) -> None: + instance = Interface(**{param: "just testing no failure"}) + assert get_api_key(instance) == "just testing no failure" -def test_api_key_priority() -> None: - with no_env_var("NVIDIA_API_KEY"): +def test_api_key_priority(masked_env_var: str) -> None: + try: os.environ["NVIDIA_API_KEY"] = "ENV" - assert get_api_key(NVIDIARerank()) == "ENV" - assert get_api_key(NVIDIARerank(nvidia_api_key="PARAM")) == "PARAM" - assert get_api_key(NVIDIARerank(api_key="PARAM")) == "PARAM" - assert get_api_key(NVIDIARerank(api_key="LOW", nvidia_api_key="HIGH")) == "HIGH" + assert get_api_key(Interface()) == "ENV" + assert get_api_key(Interface(nvidia_api_key="PARAM")) == "PARAM" + assert get_api_key(Interface(api_key="PARAM")) == "PARAM" + assert get_api_key(Interface(api_key="LOW", nvidia_api_key="HIGH")) == "HIGH" + finally: + # we must clean up environ or it may impact other tests + del os.environ["NVIDIA_API_KEY"] @pytest.mark.integration() -def test_missing_api_key_error() -> None: - with no_env_var("NVIDIA_API_KEY"): - client = NVIDIARerank() - with pytest.raises(Exception) as exc_info: - client.postprocess_nodes( - [NodeWithScore(node=Document(text="Hello, world!"))], - query_str="Hello, world!", - ) - message = str(exc_info.value) - assert "401" in message +def test_missing_api_key_error(masked_env_var: str) -> None: + with pytest.warns(UserWarning): + client = Interface() + with pytest.raises(Exception) as exc_info: + client.postprocess_nodes( + [NodeWithScore(node=Document(text="Hello, world!"))], + query_str="Hello, world!", + ) + message = str(exc_info.value) + assert "401" in message @pytest.mark.integration() -def test_bogus_api_key_error() -> None: - with no_env_var("NVIDIA_API_KEY"): - client = NVIDIARerank(nvidia_api_key="BOGUS") - with pytest.raises(Exception) as exc_info: - client.postprocess_nodes( - [NodeWithScore(node=Document(text="Hello, world!"))], - query_str="Hello, world!", - ) - message = str(exc_info.value) - assert "401" in message +def test_bogus_api_key_error(masked_env_var: str) -> None: + client = Interface(nvidia_api_key="BOGUS") + with pytest.raises(Exception) as exc_info: + client.postprocess_nodes( + [NodeWithScore(node=Document(text="Hello, world!"))], + query_str="Hello, world!", + ) + message = str(exc_info.value) + assert "401" in message @pytest.mark.integration() @pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) -def test_api_key(param: str, model: str, mode: dict) -> None: - api_key = os.environ.get("NVIDIA_API_KEY") - with no_env_var("NVIDIA_API_KEY"): - client = NVIDIARerank(**{"model": model, param: api_key}).mode(**mode) - assert client.postprocess_nodes( - [NodeWithScore(node=Document(text="Hello, world!"))], - query_str="Hello, world!", - ) +def test_api_key(model: str, mode: dict, param: str, masked_env_var: str) -> None: + client = Interface(model=model, **{**mode, **{param: masked_env_var}}) + assert client.postprocess_nodes( + [NodeWithScore(node=Document(text="Hello, world!"))], + query_str="Hello, world!", + ) diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_available_models.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_available_models.py index 69752c566c39697a6bfe56ac8ff5047ec54fd127..698118baa6cc9de343520b4a97b8cceae8e65b8b 100644 --- a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_available_models.py +++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_available_models.py @@ -5,7 +5,7 @@ from llama_index.postprocessor.nvidia_rerank import NVIDIARerank @pytest.mark.integration() def test_available_models(mode: dict) -> None: - models = NVIDIARerank().mode(**mode).available_models + models = NVIDIARerank(**mode).available_models assert models assert isinstance(models, list) assert all(isinstance(model.id, str) for model in models) diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_mode_switch.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_mode_switch.py index 7801878e008421df413c9ee0665fab0f69a46869..eb2ef2334b8c79c3273462d5ebd2b2179ddb7f29 100644 --- a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_mode_switch.py +++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_mode_switch.py @@ -1,41 +1,80 @@ import pytest -from llama_index.postprocessor.nvidia_rerank import NVIDIARerank -from llama_index.postprocessor.nvidia_rerank.base import DEFAULT_BASE_URL +from llama_index.postprocessor.nvidia_rerank import NVIDIARerank as Interface +from llama_index.postprocessor.nvidia_rerank.base import KNOWN_URLS, BASE_URL -from .conftest import no_env_var -# we don't test this because we do not want to force users to have an API key -# NVIDIARerank().mode("nim", base_url=...) must work without an API key -# def test_mode_switch_nvidia_throws_without_key(): -# emb = NVIDIARerank() -# with pytest.raises(ValueError): -# emb.mode("nvidia") +def test_mode_switch_throws_without_key_deprecated(masked_env_var: str): + x = Interface() + with pytest.raises(ValueError): + with pytest.warns(DeprecationWarning): + x.mode("nvidia") -def test_mode_switch_nvidia_with_key(): - with no_env_var("NVIDIA_API_KEY"): - NVIDIARerank().mode("nvidia", api_key="test") +def test_mode_switch_with_key_deprecated(masked_env_var: str): + with pytest.warns(DeprecationWarning): + Interface().mode("nvidia", api_key="test") -def test_mode_switch_nim_throws_without_url(): - instance = NVIDIARerank() +def test_mode_switch_nim_throws_without_url_deprecated(): + instance = Interface() with pytest.raises(ValueError): - instance.mode("nim") + with pytest.warns(DeprecationWarning): + instance.mode("nim") -def test_mode_switch_nim_with_url(): - NVIDIARerank().mode("nim", base_url="http://host/test/v1") +def test_mode_switch_nim_with_url_deprecated(): + with pytest.warns(DeprecationWarning): + Interface().mode("nim", base_url="http://test/v1") -def test_mode_switch_param_setting(): - instance0 = NVIDIARerank(model="dummy") +def test_mode_switch_param_setting_deprecated(): + instance = Interface(model="dummy") - isntance1 = instance0.mode("nim", base_url="https://test_url/v1/") - assert isntance1.model == "dummy" - assert str(isntance1._base_url) == "https://test_url/v1/" + with pytest.warns(DeprecationWarning): + instance1 = instance.mode("nim", base_url="https://test_url/v1/") + assert instance1.model == "dummy" + assert str(instance1._base_url) == "https://test_url/v1/" - instance2 = isntance1.mode("nvidia", api_key="test", model="dummy-2") + with pytest.warns(DeprecationWarning): + instance2 = instance1.mode("nvidia", api_key="test", model="dummy-2") assert instance2.model == "dummy-2" - assert str(instance2._base_url) == DEFAULT_BASE_URL + assert str(instance2._base_url) == BASE_URL assert instance2._api_key == "test" + + +UNKNOWN_URLS = [ + "https://test_url/v1", + "https://test_url/v1/", + "https://test_url/.../v1", + "http://test_url/v1", + "http://test_url/v1/", + "http://test_url/.../v1/", +] + + +@pytest.mark.parametrize("base_url", UNKNOWN_URLS) +def test_mode_switch_unknown_base_url_without_key(masked_env_var: str, base_url: str): + Interface(base_url=base_url) + + +@pytest.mark.parametrize("base_url", UNKNOWN_URLS) +@pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) +def test_mode_switch_unknown_base_url_with_key( + masked_env_var: str, param: str, base_url: str +): + Interface(base_url=base_url, **{param: "test"}) + + +@pytest.mark.parametrize("base_url", KNOWN_URLS) +def test_mode_switch_known_base_url_without_key(masked_env_var: str, base_url: str): + with pytest.warns(UserWarning): + Interface(base_url=base_url) + + +@pytest.mark.parametrize("base_url", KNOWN_URLS) +@pytest.mark.parametrize("param", ["nvidia_api_key", "api_key"]) +def test_mode_switch_known_base_url_with_key( + masked_env_var: str, base_url: str, param: str +): + Interface(base_url=base_url, **{param: "test"}) diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_postprocessor_nvidia_rerank.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_postprocessor_nvidia_rerank.py index 3cdd6a33e2af387210425e0a2ecfe1e6ffb9998d..6ef77b64876e8c6a2a3ef701b468382799d8c6ba 100644 --- a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_postprocessor_nvidia_rerank.py +++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/tests/test_postprocessor_nvidia_rerank.py @@ -34,13 +34,9 @@ def nodes(documents: List[Document]) -> List[NodeWithScore]: @pytest.mark.integration() def test_basic(model: str, mode: dict) -> None: text = "Testing leads to failure, and failure leads to understanding." - result = ( - NVIDIARerank(model=model) - .mode(**mode) - .postprocess_nodes( - [NodeWithScore(node=Document(text=text))], - query_str=text, - ) + result = NVIDIARerank(model=model, **mode).postprocess_nodes( + [NodeWithScore(node=Document(text=text))], + query_str=text, ) assert result assert isinstance(result, list) @@ -55,13 +51,9 @@ def test_basic(model: str, mode: dict) -> None: def test_accuracy(model: str, mode: dict) -> None: texts = ["first", "last"] query = "last" - result = ( - NVIDIARerank(model=model) - .mode(**mode) - .postprocess_nodes( - [NodeWithScore(node=Document(text=text)) for text in texts], - query_str=query, - ) + result = NVIDIARerank(model=model, **mode).postprocess_nodes( + [NodeWithScore(node=Document(text=text)) for text in texts], + query_str=query, ) assert result assert isinstance(result, list) @@ -74,7 +66,7 @@ def test_accuracy(model: str, mode: dict) -> None: @pytest.mark.integration() def test_direct_empty_docs(query: str, model: str, mode: dict) -> None: - ranker = NVIDIARerank(model=model).mode(**mode) + ranker = NVIDIARerank(model=model, **mode) result_docs = ranker.postprocess_nodes(nodes=[], query_str=query) assert len(result_docs) == 0 @@ -85,7 +77,7 @@ def test_direct_top_n_negative( ) -> None: orig = NVIDIARerank.Config.validate_assignment NVIDIARerank.Config.validate_assignment = False - ranker = NVIDIARerank(model=model).mode(**mode) + ranker = NVIDIARerank(model=model, **mode) ranker.top_n = -100 NVIDIARerank.Config.validate_assignment = orig result = ranker.postprocess_nodes(nodes=nodes, query_str=query) @@ -96,7 +88,7 @@ def test_direct_top_n_negative( def test_direct_top_n_zero( query: str, nodes: List[NodeWithScore], model: str, mode: dict ) -> None: - ranker = NVIDIARerank(model=model).mode(**mode) + ranker = NVIDIARerank(model=model, **mode) ranker.top_n = 0 result = ranker.postprocess_nodes(nodes=nodes, query_str=query) assert len(result) == 0 @@ -106,7 +98,7 @@ def test_direct_top_n_zero( def test_direct_top_n_one( query: str, nodes: List[NodeWithScore], model: str, mode: dict ) -> None: - ranker = NVIDIARerank(model=model).mode(**mode) + ranker = NVIDIARerank(model=model, **mode) ranker.top_n = 1 result = ranker.postprocess_nodes(nodes=nodes, query_str=query) assert len(result) == 1 @@ -116,7 +108,7 @@ def test_direct_top_n_one( def test_direct_top_n_equal_len_docs( query: str, nodes: List[NodeWithScore], model: str, mode: dict ) -> None: - ranker = NVIDIARerank(model=model).mode(**mode) + ranker = NVIDIARerank(model=model, **mode) ranker.top_n = len(nodes) result = ranker.postprocess_nodes(nodes=nodes, query_str=query) assert len(result) == len(nodes) @@ -126,7 +118,7 @@ def test_direct_top_n_equal_len_docs( def test_direct_top_n_greater_len_docs( query: str, nodes: List[NodeWithScore], model: str, mode: dict ) -> None: - ranker = NVIDIARerank(model=model).mode(**mode) + ranker = NVIDIARerank(model=model, **mode) ranker.top_n = len(nodes) * 2 result = ranker.postprocess_nodes(nodes=nodes, query_str=query) assert len(result) == len(nodes) @@ -134,13 +126,13 @@ def test_direct_top_n_greater_len_docs( @pytest.mark.parametrize("batch_size", [-10, 0]) def test_invalid_max_batch_size(model: str, mode: dict, batch_size: int) -> None: - ranker = NVIDIARerank(model=model).mode(**mode) + ranker = NVIDIARerank(model=model, **mode) with pytest.raises(ValueError): ranker.max_batch_size = batch_size def test_invalid_top_n(model: str, mode: dict) -> None: - ranker = NVIDIARerank(model=model).mode(**mode) + ranker = NVIDIARerank(model=model, **mode) with pytest.raises(ValueError): ranker.top_n = -10 @@ -167,7 +159,7 @@ def test_rerank_batching( ) -> None: assert len(nodes) > batch_size, "test requires more nodes" - ranker = NVIDIARerank(model=model).mode(**mode) + ranker = NVIDIARerank(model=model, **mode) ranker.top_n = top_n ranker.max_batch_size = batch_size result = ranker.postprocess_nodes(nodes=nodes, query_str=query)