diff --git a/s2s_pipeline.py b/s2s_pipeline.py
index 093bfb8c97ccbe4729b00ea8ca8fdbc6244f5559..675ef132ed629cfe4e04fddd0f488360565aba20 100644
--- a/s2s_pipeline.py
+++ b/s2s_pipeline.py
@@ -46,6 +46,10 @@ try:
     nltk.data.find("tokenizers/punkt_tab")
 except (LookupError, OSError):
     nltk.download("punkt_tab")
+try:
+    nltk.data.find("tokenizers/averaged_perceptron_tagger_eng")
+except (LookupError, OSError):
+    nltk.download("averaged_perceptron_tagger_eng")
 
 # caching allows ~50% compilation time reduction
 # see https://docs.google.com/document/d/1y5CRfMLdwEoF1nTk9q8qEu1mgMUuUtvhklPKJ2emLU8/edit#heading=h.o2asbxsrp1ma