From d33b789de9635dcf19e02050c6a0487fcfeb30ad Mon Sep 17 00:00:00 2001 From: Logan <logan.markewich@live.com> Date: Tue, 12 Mar 2024 12:28:56 -0600 Subject: [PATCH] add retries for openai LLM rate limit errors (#11867) --- .../llama_index/llms/openai/base.py | 17 +++++++++++++++++ .../llama_index/llms/openai/utils.py | 2 +- .../llms/llama-index-llms-openai/pyproject.toml | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/base.py b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/base.py index 379a0f65ac..598e29bad6 100644 --- a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/base.py +++ b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/base.py @@ -46,6 +46,7 @@ from llama_index.core.base.llms.generic_utils import ( from llama_index.core.llms.llm import LLM from llama_index.core.types import BaseOutputParser, PydanticProgramMode from llama_index.llms.openai.utils import ( + create_retry_decorator, from_openai_message, is_chat_model, is_function_calling_model, @@ -64,6 +65,14 @@ from openai.types.chat.chat_completion_chunk import ( DEFAULT_OPENAI_MODEL = "gpt-3.5-turbo" +llm_retry_decorator = create_retry_decorator( + max_retries=6, + random_exponential=True, + stop_after_delay_seconds=60, + min_seconds=1, + max_seconds=20, +) + @runtime_checkable class Tokenizer(Protocol): @@ -290,6 +299,7 @@ class OpenAI(LLM): base_kwargs["max_tokens"] = self.max_tokens return {**base_kwargs, **self.additional_kwargs} + @llm_retry_decorator def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: client = self._get_client() message_dicts = to_openai_message_dicts(messages) @@ -353,6 +363,7 @@ class OpenAI(LLM): t.id += tc_delta.id or "" return tool_calls + @llm_retry_decorator def _stream_chat( self, messages: Sequence[ChatMessage], **kwargs: Any ) -> ChatResponseGen: @@ -405,6 +416,7 @@ class OpenAI(LLM): return gen() + @llm_retry_decorator def _complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: client = self._get_client() all_kwargs = self._get_model_kwargs(**kwargs) @@ -422,6 +434,7 @@ class OpenAI(LLM): additional_kwargs=self._get_response_token_counts(response), ) + @llm_retry_decorator def _stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen: client = self._get_client() all_kwargs = self._get_model_kwargs(**kwargs) @@ -530,6 +543,7 @@ class OpenAI(LLM): astream_complete_fn = self._astream_complete return await astream_complete_fn(prompt, **kwargs) + @llm_retry_decorator async def _achat( self, messages: Sequence[ChatMessage], **kwargs: Any ) -> ChatResponse: @@ -547,6 +561,7 @@ class OpenAI(LLM): additional_kwargs=self._get_response_token_counts(response), ) + @llm_retry_decorator async def _astream_chat( self, messages: Sequence[ChatMessage], **kwargs: Any ) -> ChatResponseAsyncGen: @@ -610,6 +625,7 @@ class OpenAI(LLM): return gen() + @llm_retry_decorator async def _acomplete(self, prompt: str, **kwargs: Any) -> CompletionResponse: aclient = self._get_aclient() all_kwargs = self._get_model_kwargs(**kwargs) @@ -627,6 +643,7 @@ class OpenAI(LLM): additional_kwargs=self._get_response_token_counts(response), ) + @llm_retry_decorator async def _astream_complete( self, prompt: str, **kwargs: Any ) -> CompletionResponseAsyncGen: diff --git a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/utils.py b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/utils.py index d3cd4cdc0a..2adeee0082 100644 --- a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/utils.py +++ b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/utils.py @@ -135,7 +135,7 @@ def create_retry_decorator( random_exponential: bool = False, stop_after_delay_seconds: Optional[float] = None, min_seconds: float = 4, - max_seconds: float = 10, + max_seconds: float = 60, ) -> Callable[[Any], Any]: wait_strategy = ( wait_random_exponential(min=min_seconds, max=max_seconds) diff --git a/llama-index-integrations/llms/llama-index-llms-openai/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-openai/pyproject.toml index 1b0333b9b8..3612f5ca70 100644 --- a/llama-index-integrations/llms/llama-index-llms-openai/pyproject.toml +++ b/llama-index-integrations/llms/llama-index-llms-openai/pyproject.toml @@ -29,7 +29,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-llms-openai" readme = "README.md" -version = "0.1.7" +version = "0.1.8" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -- GitLab