diff --git a/.github/workflows/publish_release.yml b/.github/workflows/publish_release.yml
index a9f0017858a17f1bb109ebac7b25c85d7258c6a4..bebd6d67a2166a9527ad321b2b18266a58246f6f 100644
--- a/.github/workflows/publish_release.yml
+++ b/.github/workflows/publish_release.yml
@@ -7,6 +7,10 @@ on:
 
   workflow_dispatch:
 
+env:
+  POETRY_VERSION: "1.6.1"
+  PYTHON_VERSION: "3.9"
+
 jobs:
   build-n-publish:
     name: Build and publish to PyPI
@@ -14,6 +18,23 @@ jobs:
 
     steps:
       - uses: actions/checkout@v3
+      - name: Set up python ${{ env.PYTHON_VERSION }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+        with:
+          version: ${{ env.POETRY_VERSION }}
+      - name: Install deps
+        shell: bash
+        run: poetry install
+      - name: Cache tiktoken and nltk files
+        shell: bash
+        run: python -c "from llama_index import get_tokenizer; get_tokenizer()"
+      - name: Clean up zip files
+        shell: bash
+        run: rm -rf llama_index/_static/nltk_cache/corpora/stopwords.zip llama_index/_static/nltk_cache/tokenizers/punkt.zip
       - name: Build and publish to pypi
         uses: JRubics/poetry-publish@v1.17
         with:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fd0ace912792f84c5998a4475ee30b322406b8e1..e3ae90f4736c9b94735c1bb3e33732b287049de9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,24 +10,30 @@ repos:
       - id: check-merge-conflict
       - id: check-symlinks
       - id: check-toml
+        exclude: llama_index/_static
       - id: check-yaml
+        exclude: llama_index/_static
       - id: detect-private-key
       - id: end-of-file-fixer
+        exclude: llama_index/_static
       - id: mixed-line-ending
+        exclude: llama_index/_static
       - id: trailing-whitespace
+        exclude: llama_index/_static
   - repo: https://github.com/charliermarsh/ruff-pre-commit
     rev: v0.1.5
 
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
+        exclude: llama_index/_static
   - repo: https://github.com/psf/black-pre-commit-mirror
     rev: 23.10.1
     hooks:
       - id: black-jupyter
         name: black-src
         alias: black
-        exclude: docs/
+        exclude: ^(docs/|llama_index/_static)
   - repo: https://github.com/psf/black-pre-commit-mirror
     rev: 23.10.1
     hooks:
@@ -51,11 +57,13 @@ repos:
     rev: v3.0.3
     hooks:
       - id: prettier
+        exclude: llama_index/_static
   - repo: https://github.com/codespell-project/codespell
     rev: v2.2.6
     hooks:
       - id: codespell
         additional_dependencies: [tomli]
+        exclude: llama_index/_static
   - repo: https://github.com/srstevenson/nb-clean
     rev: 3.1.0
     hooks:
@@ -65,4 +73,4 @@ repos:
     rev: v0.23.1
     hooks:
       - id: toml-sort-fix
-        exclude: poetry.lock
+        exclude: ^(poetry.lock|llama_index/_static)
diff --git a/llama_index/_static/nltk_cache/.gitignore b/llama_index/_static/nltk_cache/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..046c31c1546f89ae46ab263f05e2ff11dc927442
--- /dev/null
+++ b/llama_index/_static/nltk_cache/.gitignore
@@ -0,0 +1,2 @@
+# Include this file
+!.gitignore
diff --git a/llama_index/_static/tiktoken_cache/.gitignore b/llama_index/_static/tiktoken_cache/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..046c31c1546f89ae46ab263f05e2ff11dc927442
--- /dev/null
+++ b/llama_index/_static/tiktoken_cache/.gitignore
@@ -0,0 +1,2 @@
+# Include this file
+!.gitignore
diff --git a/llama_index/finetuning/cross_encoders/dataset_gen.py b/llama_index/finetuning/cross_encoders/dataset_gen.py
index a594c221ab8ca540fb906e780b9a4c0236d58c03..4fe35f555b9e20a33347feac551fff6c0b0f5890 100644
--- a/llama_index/finetuning/cross_encoders/dataset_gen.py
+++ b/llama_index/finetuning/cross_encoders/dataset_gen.py
@@ -3,10 +3,9 @@ import re
 from dataclasses import dataclass
 from typing import List, Optional
 
-import tiktoken
 from tqdm.auto import tqdm
 
-from llama_index import VectorStoreIndex
+from llama_index import VectorStoreIndex, get_tokenizer
 from llama_index.llms import ChatMessage, OpenAI
 from llama_index.llms.llm import LLM
 from llama_index.node_parser import TokenTextSplitter
@@ -46,7 +45,7 @@ def generate_synthetic_queries_over_documents(
         chunk_size=max_chunk_length,
         chunk_overlap=0,
         backup_separators=["\n"],
-        tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
+        tokenizer=get_tokenizer(),
     )
 
     llm = llm or OpenAI(model="gpt-3.5-turbo-16k", temperature=0.3)
@@ -123,7 +122,7 @@ def generate_ce_fine_tuning_dataset(
         chunk_size=max_chunk_length,
         chunk_overlap=0,
         backup_separators=["\n"],
-        tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
+        tokenizer=get_tokenizer(),
     )
 
     # Use logit bias in case of OpenAI for the tokens for Yes and No
diff --git a/llama_index/indices/keyword_table/utils.py b/llama_index/indices/keyword_table/utils.py
index e3d05648620cd32ae89fedb9a053c639d6df01a7..d6ec7363c1095150da81e8ede349a03f5576a34a 100644
--- a/llama_index/indices/keyword_table/utils.py
+++ b/llama_index/indices/keyword_table/utils.py
@@ -29,8 +29,6 @@ def rake_extract_keywords(
     """Extract keywords with RAKE."""
     try:
         import nltk
-
-        nltk.download("punkt")
     except ImportError:
         raise ImportError("Please install nltk: `pip install nltk`")
     try:
@@ -38,7 +36,10 @@ def rake_extract_keywords(
     except ImportError:
         raise ImportError("Please install rake_nltk: `pip install rake_nltk`")
 
-    r = Rake()
+    r = Rake(
+        sentence_tokenizer=nltk.tokenize.sent_tokenize,
+        word_tokenizer=nltk.tokenize.wordpunct_tokenize,
+    )
     r.extract_keywords_from_text(text_chunk)
     keywords = r.get_ranked_phrases()[:max_keywords]
     if expand_with_subtokens:
diff --git a/llama_index/memory/chat_memory_buffer.py b/llama_index/memory/chat_memory_buffer.py
index baa22c299dab88d4f7e2f7c53730c4df59c789dd..8592394ac465ee70d3701df86f4bbf09727b9b6c 100644
--- a/llama_index/memory/chat_memory_buffer.py
+++ b/llama_index/memory/chat_memory_buffer.py
@@ -1,10 +1,10 @@
-from typing import Any, Callable, Dict, List, Optional, cast
+from typing import Any, Callable, Dict, List, Optional
 
 from llama_index.bridge.pydantic import Field, root_validator
 from llama_index.llms.llm import LLM
 from llama_index.llms.types import ChatMessage, MessageRole
 from llama_index.memory.types import BaseMemory
-from llama_index.utils import GlobalsHelper
+from llama_index.utils import get_tokenizer
 
 DEFUALT_TOKEN_LIMIT_RATIO = 0.75
 DEFAULT_TOKEN_LIMIT = 3000
@@ -16,7 +16,7 @@ class ChatMemoryBuffer(BaseMemory):
     token_limit: int
     tokenizer_fn: Callable[[str], List] = Field(
         # NOTE: mypy does not handle the typing here well, hence the cast
-        default_factory=cast(Callable[[], Any], GlobalsHelper().tokenizer),
+        default_factory=get_tokenizer,
         exclude=True,
     )
     chat_history: List[ChatMessage] = Field(default_factory=list)
@@ -42,7 +42,7 @@ class ChatMemoryBuffer(BaseMemory):
         # Validate tokenizer -- this avoids errors when loading from json/dict
         tokenizer_fn = values.get("tokenizer_fn", None)
         if tokenizer_fn is None:
-            values["tokenizer_fn"] = GlobalsHelper().tokenizer
+            values["tokenizer_fn"] = get_tokenizer()
 
         return values
 
@@ -63,7 +63,7 @@ class ChatMemoryBuffer(BaseMemory):
 
         return cls(
             token_limit=token_limit,
-            tokenizer_fn=tokenizer_fn or GlobalsHelper().tokenizer,
+            tokenizer_fn=tokenizer_fn or get_tokenizer(),
             chat_history=chat_history or [],
         )
 
diff --git a/llama_index/node_parser/text/utils.py b/llama_index/node_parser/text/utils.py
index 1f581c43c369757af2ffcf5eceecebc28a1408cb..67465770e4598dd599a59be74d1d814d3e8ded13 100644
--- a/llama_index/node_parser/text/utils.py
+++ b/llama_index/node_parser/text/utils.py
@@ -35,31 +35,8 @@ def split_by_char() -> Callable[[str], List[str]]:
 
 
 def split_by_sentence_tokenizer() -> Callable[[str], List[str]]:
-    import os
-
     import nltk
 
-    from llama_index.utils import get_cache_dir
-
-    cache_dir = get_cache_dir()
-    nltk_data_dir = os.environ.get("NLTK_DATA", cache_dir)
-
-    # update nltk path for nltk so that it finds the data
-    if nltk_data_dir not in nltk.data.path:
-        nltk.data.path.append(nltk_data_dir)
-
-    try:
-        nltk.data.find("tokenizers/punkt")
-    except LookupError:
-        try:
-            nltk.download("punkt", download_dir=nltk_data_dir)
-        except FileExistsError:
-            logger.info(
-                "Tried to re-download NLTK files but already exists. "
-                "This could happen in multi-theaded deployments, "
-                "should be benign"
-            )
-
     tokenizer = nltk.tokenize.PunktSentenceTokenizer()
 
     # get the spans and then return the sentences
diff --git a/llama_index/postprocessor/optimizer.py b/llama_index/postprocessor/optimizer.py
index b5b80fe4961751ceb424d83f888036a94e6335fa..c811e76413e3fc4f0a8ced172d136d92df80ee82 100644
--- a/llama_index/postprocessor/optimizer.py
+++ b/llama_index/postprocessor/optimizer.py
@@ -66,24 +66,8 @@ class SentenceEmbeddingOptimizer(BaseNodePostprocessor):
         self._embed_model = embed_model or OpenAIEmbedding()
 
         if tokenizer_fn is None:
-            import os
-
             import nltk.data
 
-            from llama_index.utils import get_cache_dir
-
-            cache_dir = get_cache_dir()
-            nltk_data_dir = os.environ.get("NLTK_DATA", cache_dir)
-
-            # update nltk path for nltk so that it finds the data
-            if nltk_data_dir not in nltk.data.path:
-                nltk.data.path.append(nltk_data_dir)
-
-            try:
-                nltk.data.find("tokenizers/punkt")
-            except LookupError:
-                nltk.download("punkt", download_dir=nltk_data_dir)
-
             tokenizer = nltk.data.load("tokenizers/punkt/english.pickle")
             tokenizer_fn = tokenizer.tokenize
         self._tokenizer_fn = tokenizer_fn
diff --git a/llama_index/utils.py b/llama_index/utils.py
index 32a0e582c8e74fc272add381abf771cbc01a789e..585f5be9d491845379f6cbeecba00b819d441621 100644
--- a/llama_index/utils.py
+++ b/llama_index/utils.py
@@ -25,7 +25,6 @@ from typing import (
     Set,
     Type,
     Union,
-    cast,
     runtime_checkable,
 )
 
@@ -38,24 +37,34 @@ class GlobalsHelper:
 
     """
 
-    _tokenizer: Optional[Callable[[str], List]] = None
     _stopwords: Optional[List[str]] = None
+    _nltk_data_dir: Optional[str] = None
+
+    def __init__(self) -> None:
+        """Initialize NLTK stopwords and punkt."""
+        import nltk
+
+        self._nltk_data_dir = os.environ.get(
+            "NLTK_DATA",
+            os.path.join(
+                os.path.dirname(os.path.abspath(__file__)),
+                "_static/nltk_cache",
+            ),
+        )
 
-    @property
-    def tokenizer(self) -> Callable[[str], List]:
-        """Get tokenizer. TODO: Deprecated."""
-        if self._tokenizer is None:
-            tiktoken_import_err = (
-                "`tiktoken` package not found, please run `pip install tiktoken`"
-            )
-            try:
-                import tiktoken
-            except ImportError:
-                raise ImportError(tiktoken_import_err)
-            enc = tiktoken.get_encoding("gpt2")
-            self._tokenizer = cast(Callable[[str], List], enc.encode)
-            self._tokenizer = partial(self._tokenizer, allowed_special="all")
-        return self._tokenizer  # type: ignore
+        if self._nltk_data_dir not in nltk.data.path:
+            nltk.data.path.append(self._nltk_data_dir)
+
+        # ensure access to data is there
+        try:
+            nltk.data.find("corpora/stopwords", paths=[self._nltk_data_dir])
+        except LookupError:
+            nltk.download("stopwords", download_dir=self._nltk_data_dir)
+
+        try:
+            nltk.data.find("tokenizers/punkt", paths=[self._nltk_data_dir])
+        except LookupError:
+            nltk.download("punkt", download_dir=self._nltk_data_dir)
 
     @property
     def stopwords(self) -> List[str]:
@@ -69,19 +78,10 @@ class GlobalsHelper:
                     "`nltk` package not found, please run `pip install nltk`"
                 )
 
-            from llama_index.utils import get_cache_dir
-
-            cache_dir = get_cache_dir()
-            nltk_data_dir = os.environ.get("NLTK_DATA", cache_dir)
-
-            # update nltk path for nltk so that it finds the data
-            if nltk_data_dir not in nltk.data.path:
-                nltk.data.path.append(nltk_data_dir)
-
             try:
-                nltk.data.find("corpora/stopwords")
+                nltk.data.find("corpora/stopwords", paths=[self._nltk_data_dir])
             except LookupError:
-                nltk.download("stopwords", download_dir=nltk_data_dir)
+                nltk.download("stopwords", download_dir=self._nltk_data_dir)
             self._stopwords = stopwords.words("english")
         return self._stopwords
 
@@ -116,10 +116,23 @@ def get_tokenizer() -> Callable[[str], List]:
             import tiktoken
         except ImportError:
             raise ImportError(tiktoken_import_err)
+
+        # set tokenizer cache temporarily
+        should_revert = False
+        if "TIKTOKEN_CACHE_DIR" not in os.environ:
+            should_revert = True
+            os.environ["TIKTOKEN_CACHE_DIR"] = os.path.join(
+                os.path.dirname(os.path.abspath(__file__)),
+                "_static/tiktoken_cache",
+            )
+
         enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
         tokenizer = partial(enc.encode, allowed_special="all")
         set_global_tokenizer(tokenizer)
 
+        if should_revert:
+            del os.environ["TIKTOKEN_CACHE_DIR"]
+
     assert llama_index.global_tokenizer is not None
     return llama_index.global_tokenizer
 
diff --git a/pyproject.toml b/pyproject.toml
index 4eee9431aec03b84582907c24952ac786613e671..1c0ea8965fd7c176a749fb2e4884fd8b8f9412d0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,12 +8,12 @@ check-hidden = true
 ignore-words-list = "astroid,gallary,momento,narl,ot,rouge"
 # Feel free to un-skip examples, and experimental, you will just need to
 # work through many typos (--write-changes and --interactive will help)
-skip = "./examples,./experimental,*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+skip = "./llama_index/_static,./examples,./experimental,*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
 
 [tool.mypy]
 disallow_untyped_defs = true
 # Remove venv skip when integrated with pre-commit
-exclude = ["build", "examples", "notebooks", "venv"]
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
 ignore_missing_imports = true
 python_version = "3.8"
 
@@ -27,6 +27,7 @@ classifiers = [
 description = "Interface between LLMs and your data"
 documentation = "https://docs.llamaindex.ai/en/stable/"
 homepage = "https://llamaindex.ai"
+include = ["llama_index/_static"]
 keywords = ["LLM", "NLP", "RAG", "data", "devtools", "index", "retrieval"]
 license = "MIT"
 maintainers = [
@@ -147,6 +148,7 @@ llamaindex-cli = 'llama_index.command_line.command_line:main'
 
 [tool.ruff]
 exclude = [
+    "_static",
     "examples",
     "notebooks",
 ]
diff --git a/tests/memory/test_chat_memory_buffer.py b/tests/memory/test_chat_memory_buffer.py
index 7709f665c58878be077a1eb9fd4a294c1f28f90a..5e08180b8bf4876904b930090770b4dc8511ad01 100644
--- a/tests/memory/test_chat_memory_buffer.py
+++ b/tests/memory/test_chat_memory_buffer.py
@@ -3,9 +3,9 @@ import pickle
 import pytest
 from llama_index.llms import ChatMessage, MessageRole
 from llama_index.memory.chat_memory_buffer import ChatMemoryBuffer
-from llama_index.utils import GlobalsHelper
+from llama_index.utils import get_tokenizer
 
-tokenizer = GlobalsHelper().tokenizer
+tokenizer = get_tokenizer()
 
 USER_CHAT_MESSAGE = ChatMessage(role=MessageRole.USER, content="first message")
 USER_CHAT_MESSAGE_TOKENS = len(tokenizer(str(USER_CHAT_MESSAGE.content)))