From fd1507dc67bda5791b3424ce6efcc1458c5c7378 Mon Sep 17 00:00:00 2001 From: Andres Marafioti <andimarafioti@gmail.com> Date: Thu, 22 Aug 2024 18:31:15 +0200 Subject: [PATCH] download tokenizer if missing --- s2s_pipeline.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/s2s_pipeline.py b/s2s_pipeline.py index 093bfb8..675ef13 100644 --- a/s2s_pipeline.py +++ b/s2s_pipeline.py @@ -46,6 +46,10 @@ try: nltk.data.find("tokenizers/punkt_tab") except (LookupError, OSError): nltk.download("punkt_tab") +try: + nltk.data.find("tokenizers/averaged_perceptron_tagger_eng") +except (LookupError, OSError): + nltk.download("averaged_perceptron_tagger_eng") # caching allows ~50% compilation time reduction # see https://docs.google.com/document/d/1y5CRfMLdwEoF1nTk9q8qEu1mgMUuUtvhklPKJ2emLU8/edit#heading=h.o2asbxsrp1ma -- GitLab