diff --git a/CHANGELOG.md b/CHANGELOG.md index e4bbc357023698ecc3097d898f4e173371ffc761..e97f0e924de1e5affefee4c90f4368da962dd87b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # ChangeLog +## Unreleased + +### Bug Fixes / Nits + +- Improved default timeouts/retries for OpenAI (#8819) +- Add back key validation for OpenAI (#8819) +- Disable automatic LLM/Embedding model downloads, give informative error (#8819) + ## [0.8.67] - 2023-11-10 ### New Features diff --git a/docs/getting_started/installation.md b/docs/getting_started/installation.md index 1d736a10e0ea94765d7bd439bde9daef9c0f242d..2652cd7acbc19361cfceb11e81b184af3591129d 100644 --- a/docs/getting_started/installation.md +++ b/docs/getting_started/installation.md @@ -24,11 +24,11 @@ need additional environment keys + tokens setup depending on the LLM provider. ## Local Model Setup -If you don't wish to use OpenAI, the environment will automatically fallback to using `LlamaCPP` and `llama2-chat-13B` for text generation and `BAAI/bge-small-en` for retrieval and embeddings. These models will all run locally. +If you don't wish to use OpenAI, consider setting up a local LLM and embedding model in the service context. -In order to use `LlamaCPP`, follow the installation guide [here](/examples/llm/llama_2_llama_cpp.ipynb). You'll need to install the `llama-cpp-python` package, preferably compiled to support your GPU. This will use around 11.5GB of memory across the CPU and GPU. +A full guide to using and configuring LLMs available [here](/module_guides/models/llms.md). -In order to use the local embeddings, simply run `pip install sentence-transformers`. The local embedding model uses about 500MB of memory. +A full guide to using and configuring embedding models is available [here](/module_guides/models/embeddings.md). ## Installation from Source diff --git a/llama_index/embeddings/openai.py b/llama_index/embeddings/openai.py index 4c6ef9ced6a8001db180aaaca392a8f7e36822aa..7f1654b464a40a6d71d6660427f549c86e3501de 100644 --- a/llama_index/embeddings/openai.py +++ b/llama_index/embeddings/openai.py @@ -233,6 +233,7 @@ class OpenAIEmbedding(BaseEmbedding): max_retries: int = Field( default=10, description="Maximum number of retries.", gte=0 ) + timeout: float = Field(default=60.0, description="Timeout for each request.", gte=0) _query_engine: OpenAIEmbeddingModeModel = PrivateAttr() _text_engine: OpenAIEmbeddingModeModel = PrivateAttr() @@ -249,6 +250,7 @@ class OpenAIEmbedding(BaseEmbedding): api_base: Optional[str] = None, api_version: Optional[str] = None, max_retries: int = 10, + timeout: float = 60.0, callback_manager: Optional[CallbackManager] = None, **kwargs: Any, ) -> None: @@ -272,6 +274,7 @@ class OpenAIEmbedding(BaseEmbedding): api_base=api_base, api_version=api_version, max_retries=max_retries, + timeout=timeout, **kwargs, ) @@ -292,6 +295,7 @@ class OpenAIEmbedding(BaseEmbedding): "api_key": self.api_key, "base_url": self.api_base, "max_retries": self.max_retries, + "timeout": self.timeout, } def _get_query_embedding(self, query: str) -> List[float]: diff --git a/llama_index/embeddings/utils.py b/llama_index/embeddings/utils.py index f5350afa7eee1c642ecd93cbdb523240caed0e5d..b85f8796a6c3ff33a7fdb2ee136bcd906a736f8d 100644 --- a/llama_index/embeddings/utils.py +++ b/llama_index/embeddings/utils.py @@ -7,12 +7,12 @@ from llama_index.embeddings.base import BaseEmbedding from llama_index.embeddings.clip import ClipEmbedding from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.embeddings.huggingface_utils import ( - DEFAULT_HUGGINGFACE_EMBEDDING_MODEL, INSTRUCTOR_MODELS, ) from llama_index.embeddings.instructor import InstructorEmbedding from llama_index.embeddings.langchain import LangchainEmbedding from llama_index.embeddings.openai import OpenAIEmbedding +from llama_index.llms.openai_utils import validate_openai_api_key from llama_index.token_counter.mock_embed_model import MockEmbedding from llama_index.utils import get_cache_dir @@ -39,15 +39,18 @@ def resolve_embed_model(embed_model: Optional[EmbedType] = None) -> BaseEmbeddin if embed_model == "default": try: embed_model = OpenAIEmbedding() + validate_openai_api_key(embed_model.api_key) except ValueError as e: - embed_model = "local" - print( - "******\n" - "Could not load OpenAIEmbedding. Using HuggingFaceBgeEmbeddings " - f"with model_name={DEFAULT_HUGGINGFACE_EMBEDDING_MODEL}. " + raise ValueError( + "\n******\n" + "Could not load OpenAI embedding model. " "If you intended to use OpenAI, please check your OPENAI_API_KEY.\n" "Original error:\n" f"{e!s}" + "\nConsider using embed_model='local'.\n" + "Visit our documentation for more embedding options: " + "https://docs.llamaindex.ai/en/stable/module_guides/models/" + "embeddings.html#modules" "\n******" ) diff --git a/llama_index/llms/azure_openai.py b/llama_index/llms/azure_openai.py index d7fc55ed10b822ed7bf2136835512c3219f3f077..444f0b2c7147c0e03b026338450baf98335794e5 100644 --- a/llama_index/llms/azure_openai.py +++ b/llama_index/llms/azure_openai.py @@ -59,7 +59,8 @@ class AzureOpenAI(OpenAI): temperature: float = 0.1, max_tokens: Optional[int] = None, additional_kwargs: Optional[Dict[str, Any]] = None, - max_retries: int = 10, + max_retries: int = 3, + timeout: float = 60.0, api_key: Optional[str] = None, api_version: Optional[str] = None, # azure specific @@ -94,6 +95,7 @@ class AzureOpenAI(OpenAI): max_tokens=max_tokens, additional_kwargs=additional_kwargs, max_retries=max_retries, + timeout=timeout, api_key=api_key, azure_endpoint=azure_endpoint, azure_deployment=azure_deployment, diff --git a/llama_index/llms/openai.py b/llama_index/llms/openai.py index aa5dc9e641a88fc6c3dc7cfdad65d0be2d125aab..d58a2f4a25708d3d3cecabe2174791c55ecc4d59 100644 --- a/llama_index/llms/openai.py +++ b/llama_index/llms/openai.py @@ -73,7 +73,16 @@ class OpenAI(LLM): additional_kwargs: Dict[str, Any] = Field( default_factory=dict, description="Additional kwargs for the OpenAI API." ) - max_retries: int = Field(description="The maximum number of API retries.") + max_retries: int = Field( + default=3, + description="The maximum number of API retries.", + gte=0, + ) + timeout: float = Field( + default=60.0, + description="The timeout, in seconds, for API requests.", + gte=0, + ) api_key: str = Field(default=None, description="The OpenAI API key.", exclude=True) api_base: str = Field(description="The base URL for OpenAI API.") @@ -88,7 +97,8 @@ class OpenAI(LLM): temperature: float = 0.1, max_tokens: Optional[int] = None, additional_kwargs: Optional[Dict[str, Any]] = None, - max_retries: int = 10, + max_retries: int = 3, + timeout: float = 60.0, api_key: Optional[str] = None, api_base: Optional[str] = None, api_version: Optional[str] = None, @@ -113,6 +123,7 @@ class OpenAI(LLM): api_key=api_key, api_version=api_version, api_base=api_base, + timeout=timeout, **kwargs, ) @@ -195,6 +206,7 @@ class OpenAI(LLM): "api_key": self.api_key, "base_url": self.api_base, "max_retries": self.max_retries, + "timeout": self.timeout, **kwargs, } diff --git a/llama_index/llms/openai_utils.py b/llama_index/llms/openai_utils.py index 12a891961e38b2a35b89ad08b6e891d936a7f9f3..fd594571843f13c138e8820adb55a8b63cf4a5ad 100644 --- a/llama_index/llms/openai_utils.py +++ b/llama_index/llms/openai_utils.py @@ -1,4 +1,5 @@ import logging +import os import time from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, Union @@ -361,3 +362,10 @@ def resolve_from_aliases(*args: Optional[str]) -> Optional[str]: if arg is not None: return arg return None + + +def validate_openai_api_key(api_key: Optional[str] = None) -> None: + openai_api_key = api_key or os.environ.get("OPENAI_API_KEY", "") + + if not openai_api_key: + raise ValueError(MISSING_API_KEY_ERROR_MESSAGE) diff --git a/llama_index/llms/utils.py b/llama_index/llms/utils.py index 1b633e8c243b86f627689ea62d1f8ffe3f8c4263..c4101d101c376b0e1d950349346c94500d984429 100644 --- a/llama_index/llms/utils.py +++ b/llama_index/llms/utils.py @@ -8,6 +8,7 @@ from llama_index.llms.llama_cpp import LlamaCPP from llama_index.llms.llama_utils import completion_to_prompt, messages_to_prompt from llama_index.llms.mock import MockLLM from llama_index.llms.openai import OpenAI +from llama_index.llms.openai_utils import validate_openai_api_key LLMType = Union[str, LLM, BaseLanguageModel] @@ -18,9 +19,10 @@ def resolve_llm(llm: Optional[LLMType] = None) -> LLM: # return default OpenAI model. If it fails, return LlamaCPP try: llm = OpenAI() + validate_openai_api_key(llm.api_key) except ValueError as e: raise ValueError( - "******\n" + "\n******\n" "Could not load OpenAI model. " "If you intended to use OpenAI, please check your OPENAI_API_KEY.\n" "Original error:\n" diff --git a/llama_index/readers/file/base.py b/llama_index/readers/file/base.py index feb78bef501bf8fccb39468c13ba0d2934ccd863..27278d1d2d764754b776f2e86fc49cda62ecff8f 100644 --- a/llama_index/readers/file/base.py +++ b/llama_index/readers/file/base.py @@ -231,9 +231,15 @@ class SimpleDirectoryReader(BaseReader): self.file_extractor[file_suffix] = reader_cls() reader = self.file_extractor[file_suffix] + # load data -- catch all errors except for ImportError try: docs = reader.load_data(input_file, extra_info=metadata) + except ImportError as e: + # ensure that ImportError is raised so user knows + # about missing dependencies + raise ImportError(str(e)) except Exception as e: + # otherwise, just skip the file and report the error print( f"Failed to load file {input_file} with error: {e}. Skipping...", flush=True,