From 7c656548313598f7ce7e8ce586de8244328a77b0 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee <matt@cs.wisc.edu> Date: Wed, 8 May 2024 11:33:54 -0400 Subject: [PATCH] set default max_tokens to 1024 (#13371) if unset the service decides and picks a value small enough that users report the model is broken or not accurate --- .../llama-index-llms-nvidia/llama_index/llms/nvidia/base.py | 2 ++ .../llms/llama-index-llms-nvidia/pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py b/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py index 173e92d921..61d28b47d2 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/llama_index/llms/nvidia/base.py @@ -32,6 +32,7 @@ class NVIDIA(OpenAILike): model: str = DEFAULT_MODEL, nvidia_api_key: Optional[str] = None, api_key: Optional[str] = None, + max_tokens: Optional[int] = 1024, **kwargs: Any, ) -> None: api_key = get_from_param_or_env( @@ -45,6 +46,7 @@ class NVIDIA(OpenAILike): model=model, api_key=api_key, api_base=BASE_URL, + max_tokens=max_tokens, is_chat_model=True, default_headers={"User-Agent": "llama-index-llms-nvidia"}, **kwargs, diff --git a/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml index a90fb7fcf0..e6a493855f 100644 --- a/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml +++ b/llama-index-integrations/llms/llama-index-llms-nvidia/pyproject.toml @@ -30,7 +30,7 @@ license = "MIT" name = "llama-index-llms-nvidia" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.0" +version = "0.1.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -- GitLab