From fd1507dc67bda5791b3424ce6efcc1458c5c7378 Mon Sep 17 00:00:00 2001
From: Andres Marafioti <andimarafioti@gmail.com>
Date: Thu, 22 Aug 2024 18:31:15 +0200
Subject: [PATCH] download tokenizer if missing

---
 s2s_pipeline.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/s2s_pipeline.py b/s2s_pipeline.py
index 093bfb8..675ef13 100644
--- a/s2s_pipeline.py
+++ b/s2s_pipeline.py
@@ -46,6 +46,10 @@ try:
     nltk.data.find("tokenizers/punkt_tab")
 except (LookupError, OSError):
     nltk.download("punkt_tab")
+try:
+    nltk.data.find("tokenizers/averaged_perceptron_tagger_eng")
+except (LookupError, OSError):
+    nltk.download("averaged_perceptron_tagger_eng")
 
 # caching allows ~50% compilation time reduction
 # see https://docs.google.com/document/d/1y5CRfMLdwEoF1nTk9q8qEu1mgMUuUtvhklPKJ2emLU8/edit#heading=h.o2asbxsrp1ma
-- 
GitLab