diff --git a/CHANGELOG.md b/CHANGELOG.md
index e4bbc357023698ecc3097d898f4e173371ffc761..e97f0e924de1e5affefee4c90f4368da962dd87b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # ChangeLog
 
+## Unreleased
+
+### Bug Fixes / Nits
+
+- Improved default timeouts/retries for OpenAI (#8819)
+- Add back key validation for OpenAI (#8819)
+- Disable automatic LLM/Embedding model downloads, give informative error (#8819)
+
 ## [0.8.67] - 2023-11-10
 
 ### New Features
diff --git a/docs/getting_started/installation.md b/docs/getting_started/installation.md
index 1d736a10e0ea94765d7bd439bde9daef9c0f242d..2652cd7acbc19361cfceb11e81b184af3591129d 100644
--- a/docs/getting_started/installation.md
+++ b/docs/getting_started/installation.md
@@ -24,11 +24,11 @@ need additional environment keys + tokens setup depending on the LLM provider.
 
 ## Local Model Setup
 
-If you don't wish to use OpenAI, the environment will automatically fallback to using `LlamaCPP` and `llama2-chat-13B` for text generation and `BAAI/bge-small-en` for retrieval and embeddings. These models will all run locally.
+If you don't wish to use OpenAI, consider setting up a local LLM and embedding model in the service context.
 
-In order to use `LlamaCPP`, follow the installation guide [here](/examples/llm/llama_2_llama_cpp.ipynb). You'll need to install the `llama-cpp-python` package, preferably compiled to support your GPU. This will use around 11.5GB of memory across the CPU and GPU.
+A full guide to using and configuring LLMs available [here](/module_guides/models/llms.md).
 
-In order to use the local embeddings, simply run `pip install sentence-transformers`. The local embedding model uses about 500MB of memory.
+A full guide to using and configuring embedding models is available [here](/module_guides/models/embeddings.md).
 
 ## Installation from Source
 
diff --git a/llama_index/embeddings/openai.py b/llama_index/embeddings/openai.py
index 4c6ef9ced6a8001db180aaaca392a8f7e36822aa..7f1654b464a40a6d71d6660427f549c86e3501de 100644
--- a/llama_index/embeddings/openai.py
+++ b/llama_index/embeddings/openai.py
@@ -233,6 +233,7 @@ class OpenAIEmbedding(BaseEmbedding):
     max_retries: int = Field(
         default=10, description="Maximum number of retries.", gte=0
     )
+    timeout: float = Field(default=60.0, description="Timeout for each request.", gte=0)
 
     _query_engine: OpenAIEmbeddingModeModel = PrivateAttr()
     _text_engine: OpenAIEmbeddingModeModel = PrivateAttr()
@@ -249,6 +250,7 @@ class OpenAIEmbedding(BaseEmbedding):
         api_base: Optional[str] = None,
         api_version: Optional[str] = None,
         max_retries: int = 10,
+        timeout: float = 60.0,
         callback_manager: Optional[CallbackManager] = None,
         **kwargs: Any,
     ) -> None:
@@ -272,6 +274,7 @@ class OpenAIEmbedding(BaseEmbedding):
             api_base=api_base,
             api_version=api_version,
             max_retries=max_retries,
+            timeout=timeout,
             **kwargs,
         )
 
@@ -292,6 +295,7 @@ class OpenAIEmbedding(BaseEmbedding):
             "api_key": self.api_key,
             "base_url": self.api_base,
             "max_retries": self.max_retries,
+            "timeout": self.timeout,
         }
 
     def _get_query_embedding(self, query: str) -> List[float]:
diff --git a/llama_index/embeddings/utils.py b/llama_index/embeddings/utils.py
index f5350afa7eee1c642ecd93cbdb523240caed0e5d..b85f8796a6c3ff33a7fdb2ee136bcd906a736f8d 100644
--- a/llama_index/embeddings/utils.py
+++ b/llama_index/embeddings/utils.py
@@ -7,12 +7,12 @@ from llama_index.embeddings.base import BaseEmbedding
 from llama_index.embeddings.clip import ClipEmbedding
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.embeddings.huggingface_utils import (
-    DEFAULT_HUGGINGFACE_EMBEDDING_MODEL,
     INSTRUCTOR_MODELS,
 )
 from llama_index.embeddings.instructor import InstructorEmbedding
 from llama_index.embeddings.langchain import LangchainEmbedding
 from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.openai_utils import validate_openai_api_key
 from llama_index.token_counter.mock_embed_model import MockEmbedding
 from llama_index.utils import get_cache_dir
 
@@ -39,15 +39,18 @@ def resolve_embed_model(embed_model: Optional[EmbedType] = None) -> BaseEmbeddin
     if embed_model == "default":
         try:
             embed_model = OpenAIEmbedding()
+            validate_openai_api_key(embed_model.api_key)
         except ValueError as e:
-            embed_model = "local"
-            print(
-                "******\n"
-                "Could not load OpenAIEmbedding. Using HuggingFaceBgeEmbeddings "
-                f"with model_name={DEFAULT_HUGGINGFACE_EMBEDDING_MODEL}. "
+            raise ValueError(
+                "\n******\n"
+                "Could not load OpenAI embedding model. "
                 "If you intended to use OpenAI, please check your OPENAI_API_KEY.\n"
                 "Original error:\n"
                 f"{e!s}"
+                "\nConsider using embed_model='local'.\n"
+                "Visit our documentation for more embedding options: "
+                "https://docs.llamaindex.ai/en/stable/module_guides/models/"
+                "embeddings.html#modules"
                 "\n******"
             )
 
diff --git a/llama_index/llms/azure_openai.py b/llama_index/llms/azure_openai.py
index d7fc55ed10b822ed7bf2136835512c3219f3f077..444f0b2c7147c0e03b026338450baf98335794e5 100644
--- a/llama_index/llms/azure_openai.py
+++ b/llama_index/llms/azure_openai.py
@@ -59,7 +59,8 @@ class AzureOpenAI(OpenAI):
         temperature: float = 0.1,
         max_tokens: Optional[int] = None,
         additional_kwargs: Optional[Dict[str, Any]] = None,
-        max_retries: int = 10,
+        max_retries: int = 3,
+        timeout: float = 60.0,
         api_key: Optional[str] = None,
         api_version: Optional[str] = None,
         # azure specific
@@ -94,6 +95,7 @@ class AzureOpenAI(OpenAI):
             max_tokens=max_tokens,
             additional_kwargs=additional_kwargs,
             max_retries=max_retries,
+            timeout=timeout,
             api_key=api_key,
             azure_endpoint=azure_endpoint,
             azure_deployment=azure_deployment,
diff --git a/llama_index/llms/openai.py b/llama_index/llms/openai.py
index aa5dc9e641a88fc6c3dc7cfdad65d0be2d125aab..d58a2f4a25708d3d3cecabe2174791c55ecc4d59 100644
--- a/llama_index/llms/openai.py
+++ b/llama_index/llms/openai.py
@@ -73,7 +73,16 @@ class OpenAI(LLM):
     additional_kwargs: Dict[str, Any] = Field(
         default_factory=dict, description="Additional kwargs for the OpenAI API."
     )
-    max_retries: int = Field(description="The maximum number of API retries.")
+    max_retries: int = Field(
+        default=3,
+        description="The maximum number of API retries.",
+        gte=0,
+    )
+    timeout: float = Field(
+        default=60.0,
+        description="The timeout, in seconds, for API requests.",
+        gte=0,
+    )
 
     api_key: str = Field(default=None, description="The OpenAI API key.", exclude=True)
     api_base: str = Field(description="The base URL for OpenAI API.")
@@ -88,7 +97,8 @@ class OpenAI(LLM):
         temperature: float = 0.1,
         max_tokens: Optional[int] = None,
         additional_kwargs: Optional[Dict[str, Any]] = None,
-        max_retries: int = 10,
+        max_retries: int = 3,
+        timeout: float = 60.0,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
         api_version: Optional[str] = None,
@@ -113,6 +123,7 @@ class OpenAI(LLM):
             api_key=api_key,
             api_version=api_version,
             api_base=api_base,
+            timeout=timeout,
             **kwargs,
         )
 
@@ -195,6 +206,7 @@ class OpenAI(LLM):
             "api_key": self.api_key,
             "base_url": self.api_base,
             "max_retries": self.max_retries,
+            "timeout": self.timeout,
             **kwargs,
         }
 
diff --git a/llama_index/llms/openai_utils.py b/llama_index/llms/openai_utils.py
index 12a891961e38b2a35b89ad08b6e891d936a7f9f3..fd594571843f13c138e8820adb55a8b63cf4a5ad 100644
--- a/llama_index/llms/openai_utils.py
+++ b/llama_index/llms/openai_utils.py
@@ -1,4 +1,5 @@
 import logging
+import os
 import time
 from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, Union
 
@@ -361,3 +362,10 @@ def resolve_from_aliases(*args: Optional[str]) -> Optional[str]:
         if arg is not None:
             return arg
     return None
+
+
+def validate_openai_api_key(api_key: Optional[str] = None) -> None:
+    openai_api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
+
+    if not openai_api_key:
+        raise ValueError(MISSING_API_KEY_ERROR_MESSAGE)
diff --git a/llama_index/llms/utils.py b/llama_index/llms/utils.py
index 1b633e8c243b86f627689ea62d1f8ffe3f8c4263..c4101d101c376b0e1d950349346c94500d984429 100644
--- a/llama_index/llms/utils.py
+++ b/llama_index/llms/utils.py
@@ -8,6 +8,7 @@ from llama_index.llms.llama_cpp import LlamaCPP
 from llama_index.llms.llama_utils import completion_to_prompt, messages_to_prompt
 from llama_index.llms.mock import MockLLM
 from llama_index.llms.openai import OpenAI
+from llama_index.llms.openai_utils import validate_openai_api_key
 
 LLMType = Union[str, LLM, BaseLanguageModel]
 
@@ -18,9 +19,10 @@ def resolve_llm(llm: Optional[LLMType] = None) -> LLM:
         # return default OpenAI model. If it fails, return LlamaCPP
         try:
             llm = OpenAI()
+            validate_openai_api_key(llm.api_key)
         except ValueError as e:
             raise ValueError(
-                "******\n"
+                "\n******\n"
                 "Could not load OpenAI model. "
                 "If you intended to use OpenAI, please check your OPENAI_API_KEY.\n"
                 "Original error:\n"
diff --git a/llama_index/readers/file/base.py b/llama_index/readers/file/base.py
index feb78bef501bf8fccb39468c13ba0d2934ccd863..27278d1d2d764754b776f2e86fc49cda62ecff8f 100644
--- a/llama_index/readers/file/base.py
+++ b/llama_index/readers/file/base.py
@@ -231,9 +231,15 @@ class SimpleDirectoryReader(BaseReader):
                     self.file_extractor[file_suffix] = reader_cls()
                 reader = self.file_extractor[file_suffix]
 
+                # load data -- catch all errors except for ImportError
                 try:
                     docs = reader.load_data(input_file, extra_info=metadata)
+                except ImportError as e:
+                    # ensure that ImportError is raised so user knows
+                    # about missing dependencies
+                    raise ImportError(str(e))
                 except Exception as e:
+                    # otherwise, just skip the file and report the error
                     print(
                         f"Failed to load file {input_file} with error: {e}. Skipping...",
                         flush=True,