diff --git a/SwissArmyTransformer/tokenization/__init__.py b/SwissArmyTransformer/tokenization/__init__.py
index 64f9e3610a2f912b560372ed100d2f3afd3951f8..b54c6072c82cab8e45fff74e0cc1303fedc48427 100644
--- a/SwissArmyTransformer/tokenization/__init__.py
+++ b/SwissArmyTransformer/tokenization/__init__.py
@@ -15,6 +15,7 @@ import torch
 
 from SwissArmyTransformer.training.utils import print_rank_0
 
+
 def _export_vocab_size_to_args(args, original_num_tokens):
     tokenizer = get_tokenizer(args)
     num_tokens = original_num_tokens
@@ -32,6 +33,7 @@ def _export_vocab_size_to_args(args, original_num_tokens):
     print_rank_0("prepare tokenizer done")
     return tokenizer
 
+
 def get_tokenizer(args=None, outer_tokenizer=None):
     '''
         If you're using outer_tokenizer, call `get_tokenizer(args, outer_tokenizer)`
@@ -53,7 +55,7 @@ def get_tokenizer(args=None, outer_tokenizer=None):
             )
         elif args.tokenizer_type.startswith('glm_'):
             kwargs = {"add_block_symbols": True, "add_task_mask": args.task_mask,
-              "add_decoder_mask": False}
+                      "add_decoder_mask": False}
             if args.tokenizer_type == "glm_GPT2BPETokenizer":
                 from .glm import GPT2BPETokenizer
                 get_tokenizer.tokenizer = GPT2BPETokenizer(args.tokenizer_model_type, **kwargs)
diff --git a/SwissArmyTransformer/tokenization/glm/sp_tokenizer.py b/SwissArmyTransformer/tokenization/glm/sp_tokenizer.py
index b044d917d4e579b995e70be398a3389ea0fd93bd..a76903497aeb1cd2eba6bb5eca1124ae58f462c4 100644
--- a/SwissArmyTransformer/tokenization/glm/sp_tokenizer.py
+++ b/SwissArmyTransformer/tokenization/glm/sp_tokenizer.py
@@ -3,6 +3,11 @@ from https://github.com/openai/gpt-2/, changed for chinese
 """
 import json
 import os
+import csv
+import nltk
+import random
+
+from nltk import tokenize as nltk_tokenize
 import sentencepiece as spm
 
 """
@@ -22,129 +27,72 @@ python setup.py install
 PRETRAINED_MODEL_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)),
      'embed_assets', 'chinese_sentencepiece/cog-pretrain.model')
 
-def get_pairs(word):
-    pairs = set()
-    prev_char = word[0]
-    for char in word[1:]:
-        pairs.add((prev_char, char))
-        prev_char = char
-    return pairs
-
-
-class Encoder:
-    def __init__(self, encoder, bpe_merges):
-        self.encoder = encoder
-        self.decoder = {v: k for k, v in self.encoder.items()}
-        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
-        self.cache = {}
-        self.max_len = 0
-
-    def bpe(self, token):
-        if token in self.cache:
-            return self.cache[token]
-        word = tuple(token)
-        pairs = get_pairs(word)
-        if not pairs:
-            return token
-
-        while True:
-            bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf')))
-            if bigram not in self.bpe_ranks:
-                break
-            first, second = bigram
-            new_word = []
-            i = 0
-            while i < len(word):
-                try:
-                    j = word.index(first, i)
-                    new_word.extend(word[i:j])
-                    i = j
-                except:
-                    new_word.extend(word[i:])
-                    break
-
-                if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
-                    new_word.append(first + second)
-                    i += 2
-                else:
-                    new_word.append(word[i])
-                    i += 1
-            new_word = tuple(new_word)
-            word = new_word
-            if len(word) == 1:
-                break
-            else:
-                pairs = get_pairs(word)
-        word = ' '.join(word)
-        self.cache[token] = word
-        return word
-
-    def encode(self, text):
-        return [self.encoder.get(token, 1) for token in self.tokenize(text)]
-
-    def decode(self, tokens):
-        text = ''.join([self.decoder[token] for token in tokens])
-        return text
-
-    def tokenize(self, text):
-        bpe_tokens = []
-        bpe_tokens.extend(bpe_token for bpe_token in self.bpe(text).split(' '))
-        return bpe_tokens
-
-    def convert_tokens_to_ids(self, tokens):
-        return [self.encoder.get(token, 1) for token in tokens]
-
-
-class Encoder_SP:
-    def __init__(self, model_path):
+class SentencePieceTokenizer:
+    """Trains and uses sentencepiece for text tokenization"""
+
+    def __init__(self, model_path=None, **kwargs):
+        self.spm_model = model_path
+        self._tokens = []
+        self._vocab = {}
+        self.sp, self.vocab_size = None, 0
+        self.load_spm_model()
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, cache_dir=None, *inputs, **kwargs):
+        if pretrained_model_name_or_path in ['glm-large', 'glm-10b']:
+            return cls(model_path=PRETRAINED_MODEL_FILE)
+        else:
+            return cls(model_path=pretrained_model_name_or_path)
+
+    def __len__(self):
+        return self.num_text_tokens
+
+    def load_spm_model(self):
+        """load sentencepiece model and parse vocab"""
+        if not os.path.exists(self.spm_model) and not self.spm_model.endswith('.model'):
+            self.spm_model = self.spm_model + '.model'
         self.sp = spm.SentencePieceProcessor()
-        self.sp.Load(model_path)
+        self.sp.Load(self.spm_model)
+        self.vocab_size = self.num_text_tokens = len(self.sp)
+        self._tokens = [self.IdToToken(t) for t in range(self.vocab_size)]
+        self._vocab = {t: i for i, t in enumerate(self._tokens)}
+
+    @property
+    def tokens(self):
+        return self._tokens
+
+    @property
+    def vocab(self):
+        return self._vocab
+
+    @staticmethod
+    def exists(model_path):
+        if model_path is None:
+            return False
+        # check if path exists
+        dne = not os.path.exists(model_path)
+        # check if path.model exists
+        if dne and not model_path.endswith('.model'):
+            dne = not os.path.exists(model_path + '.model')
+        return not dne
 
     def encode(self, text):
-        """
-        text="...."
-        """
-        return self.sp.EncodeAsIds(text)
-
-    def decode(self, tokens):
-        """
-        tokens=[x1,x2,...]
-        """
-        text = [int(token) for token in tokens]
-        # print(text)
-        return self.sp.DecodeIds(text)
-
-    def tokenize(self, text):
-        return self.sp.EncodeAsPieces(text)
-
-    def convert_tokens_to_ids(self, tokens):
-        return [self.sp.PieceToId(token) for token in tokens]
-
-    def convert_token_to_id(self, token):
-        return self.sp.PieceToId(token)
+        """convert text to sentencepiece Ids"""
+        tokens = self.sp.EncodeAsIds(text)
+        return tokens
 
-    def convert_id_to_token(self, idx):
-        return self.sp.IdToPiece(idx)
+    def IdToToken(self, Id):
+        """convert Id to sentencpiece token"""
+        return self.sp.IdToPiece(Id)
 
+    def TokenToId(self, token):
+        """convert sentencpiece token to Id"""
+        return self.sp.PieceToId(token)
 
-def get_encoder(encoder_file, bpe_file):
-    # 以下是为了同一个函数入兼容sentencepiece
-    filepath, filename = os.path.split(encoder_file)
-    shotname, extension = os.path.splitext(filename)
-
-    if (".model" == extension) and (bpe_file == ""):
-        return Encoder_SP(encoder_file)
-    else:
-        with open(encoder_file, 'r', encoding="utf-8") as f:
-            encoder = json.load(f)
-        with open(bpe_file, 'r', encoding="utf-8") as f:
-            bpe_data = f.read()
-        bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]]
-        return Encoder(
-            encoder=encoder,
-            bpe_merges=bpe_merges,
-        )
+    def decode(self, Ids):
+        """converts ids to a text string"""
+        return self.sp.DecodeIds(Ids)
 
 
 def from_pretrained():
-    return get_encoder(PRETRAINED_MODEL_FILE, "")
+    return SentencePieceTokenizer(model_path=PRETRAINED_MODEL_FILE)
\ No newline at end of file
diff --git a/SwissArmyTransformer/tokenization/glm/tokenization.py b/SwissArmyTransformer/tokenization/glm/tokenization.py
index 1ff0170e089f372cf8c22aa2ee2818902c30bf70..674815ae00180827f8ff64fbe76e344fa669f6bb 100644
--- a/SwissArmyTransformer/tokenization/glm/tokenization.py
+++ b/SwissArmyTransformer/tokenization/glm/tokenization.py
@@ -20,15 +20,9 @@ import csv
 import torch
 import itertools
 
-import nltk
-from nltk import tokenize as nltk_tokenize
-import sentencepiece as spm
 
 from .tokenization_gpt2 import GPT2Tokenizer
-from . import sp_tokenizer
-import regex as re
-
-
+from .sp_tokenizer import SentencePieceTokenizer
 
 class Tokenization(object):
     """
@@ -36,7 +30,7 @@ class Tokenization(object):
     text. Can hold tokenization as Ids or tokens.
 
     It also holds command tokens (pad, unk, etc.) for the tokenization.
-    This allows functions to pad/operate on tokenizations without having
+    This allows functions to pad/operate on tokenization without having
     access to the full tokenizer, just the tokenization.
 
     Several standard array operations are implemented (insert, append, extend).
@@ -73,8 +67,11 @@ class Tokenization(object):
     def __len__(self):
         return len(self.tokenization)
 
+    def __str__(self):
+        return f"Tokenization = {self.tokenization}, Text = {self.text}"
+
     def insert(self, idx, other):
-        if isinstance(other, (CommandToken, TypeToken)):
+        if isinstance(other, CommandToken):
             self.tokenization.insert(idx, other.Id)
             if idx == 0:
                 self.text = other.token + self.text
@@ -88,7 +85,7 @@ class Tokenization(object):
             self.tokenization = self.tokenization[:idx] + other.tokenization + self.tokenization[idx:]
 
     def append(self, other):
-        if isinstance(other, (CommandToken, TypeToken)):
+        if isinstance(other, CommandToken):
             self.tokenization.append(other.Id)
             self.text += other.token
             self.original_text += other.token
@@ -101,11 +98,11 @@ class Tokenization(object):
         return self
 
     def extend(self, other):
-        if isinstance(other, (CommandToken, TypeToken)):
+        if isinstance(other, CommandToken):
             self.tokenization.append(other.Id)
             self.text += other.token
             self.original_text += other.token
-        elif isinstance(other, list) and isinstance(other[0], (CommandToken, TypeToken)):
+        elif isinstance(other, list) and isinstance(other[0], CommandToken):
             self.tokenization.extend([o.Id for o in other])
             self.text += [o.token for o in other]
             self.original_text += [o.token for o in other]
@@ -136,57 +133,10 @@ class CommandToken(object):
         self.lstrip = lstrip
         self.rstrip = rstrip
 
-    def __str__(self):
+    def __repr__(self):
         return str(COMMAND_TUPLE(self.name, self.token, self.Id))
 
 
-DEFAULT_COMMAND_TOKENS = [
-    ('pad', 0),
-    ('eos', 1),
-    ('bos', 2),
-    ('unk', 3),
-    ('sep', 4),
-    ('L2R', 5),
-    ('ENC', 6),
-    ('MASK', 7),
-]
-DEFAULT_COMMAND_TOKENS = prep_command_tokens(DEFAULT_COMMAND_TOKENS)
-
-"""define some default type tokens for bert training"""
-
-TYPE_TUPLE = namedtuple('TypeToken', ('name', 'token', 'Id'))
-
-
-def prep_type_tokens(tokenlist, token_format=token_format):
-    return [TypeToken(tok[0], token_format.format(tok[0]), tok[1]) for tok in tokenlist]
-
-
-class TypeToken(object):
-    def __init__(self, name, token, Id):
-        self.name = name
-        self.token = token
-        self.Id = Id
-
-    def __str__(self):
-        return str(TYPE_TUPLE(self.name, self.token, self.Id))
-
-
-DEFAULT_TYPE_TOKENS = [
-    ('function', 0),
-    ('command', 1),
-    ('str0', 2),
-    ('str1', 3),
-    ('str2', 4),
-    ('embedding0', 5),
-    ('embedding1', 6),
-    ('embedding2', 7),
-    ('arg0', 8),
-    ('arg1', 9),
-    ('arg2', 10),
-]
-DEFAULT_TYPE_TOKENS = prep_type_tokens(DEFAULT_TYPE_TOKENS)
-
-
 class Tokenizer(object):
     """
     Tokenizer object that handles text tokenization, command tokens, and type tokens.
@@ -198,47 +148,42 @@ class Tokenizer(object):
     Token types are stored in a separate mapping of size `len(type_tokens)`.
     """
 
-    def __init__(self, text_tokenizer, command_tokens=None, type_tokens=None):
+    def __init__(self, text_tokenizer, command_tokens=None):
         # set text tokenizer
         self.text_tokenizer = text_tokenizer
         if not hasattr(self, 'num_text_tokens'):
             self.num_text_tokens = len(self.text_tokenizer)
-
-        # set command tokens
-        if command_tokens is None:
-            command_tokens = DEFAULT_COMMAND_TOKENS
+        print(command_tokens)
         self._command_tokens = command_tokens
-        self.command_name_map = {tok.name: tok for tok in self._command_tokens}
-        self.command_token_map = {tok.token: tok for tok in self._command_tokens}
-        self.command_id_map = {tok.Id: tok for tok in self._command_tokens}
-        if not hasattr(self, 'num_command_tokens'):
-            self.num_command_tokens = len(self._command_tokens)
-        if not hasattr(self, 'num_tokens'):
-            self.num_tokens = self.num_command_tokens + self.num_text_tokens
-
-        # set type tokens
-        if type_tokens is None:
-            type_tokens = DEFAULT_TYPE_TOKENS
-        self.type_tokens = type_tokens
-        self.type_name_map = {tok.name: tok for tok in self.type_tokens}
-        self.type_token_map = {tok.token: tok for tok in self.type_tokens}
-        self.type_id_map = {tok.Id: tok for tok in self.type_tokens}
-        if not hasattr(self, 'num_type_tokens'):
-            self.num_type_tokens = len(self.type_tokens)
+        self.command_name_map = {tok.name: tok for tok in self.command_tokens}
+        self.command_token_map = {tok.token: tok for tok in self.command_tokens}
+        self.command_id_map = {tok.Id: tok for tok in self.command_tokens}
 
         # parse tokens and vocabs from tokenizer
-        self._tokens = list(self.command_token_map.keys()) + list(self.text_tokenizer.tokens)
-        self._vocab = {t: Id for Id, t in self.command_id_map.items()}
-        self._vocab.update({t: Id + self.num_command_tokens for t, Id in self.text_tokenizer.vocab.items()})
+        max_token_id = max(len(self.text_tokenizer.tokens) - 1, max(self.command_id_map.keys()))
+        self._tokens = [self.text_tokenizer.tokens[i] if i < len(self.text_tokenizer.tokens) else f'[UNUSED{i}]' for i
+                        in range(max_token_id + 1)]
+        for idx, token in self.command_id_map.items():
+            self._tokens[idx] = token.token
+        self._vocab = {t.token: Id for Id, t in self.command_id_map.items()}
+        self._vocab.update(self.text_tokenizer.vocab)
+
+        if not hasattr(self, 'num_command_tokens'):
+            self.num_command_tokens = len(self.command_tokens)
+        if not hasattr(self, 'num_tokens'):
+            self.num_tokens = len(self.tokens)
 
         self._text_tokens = list(self.text_tokenizer.tokens)
-        self._text_token_vocab = {t: Id + self.num_command_tokens for t, Id in self.text_tokenizer.vocab.items()}
+        self._text_token_vocab = {t: Id for t, Id in self.text_tokenizer.vocab.items()}
 
         self._command_token_tokens = list(self.command_token_map.keys())
         self._command_token_vocab = {t: Id for Id, t in self.command_id_map.items()}
 
-        self._token_types = list(self.type_token_map.keys())
-        self._token_type_vocab = {t: Id for Id, t in self.type_id_map.items()}
+        self.spaces_between_special_tokens = True
+
+    @property
+    def command_tokens(self):
+        return self._command_tokens
 
     def __call__(self, text, process_fn=None):
         """run preprocessing and encode text as Ids"""
@@ -252,10 +197,6 @@ class Tokenizer(object):
         """get command token corresponding to `name`"""
         return self.command_name_map[name]
 
-    def get_type(self, name):
-        """get type token corresponding to `name`"""
-        return self.type_name_map[name]
-
     @property
     def tokens(self):
         """list (or iterable) of all tokens for tokenizer"""
@@ -266,21 +207,6 @@ class Tokenizer(object):
         """dictionary mapping tokens to ids for tokenizer"""
         return self._vocab
 
-    @property
-    def token_types(self):
-        """list (or iterable) of all token types for tokenizer"""
-        return self._token_types
-
-    @property
-    def token_type_vocab(self):
-        """dictionary mapping token types to ids for tokenizer"""
-        return self._token_type_vocab
-
-    @property
-    def command_tokens(self):
-        """list (or iterable) of all command tokens for tokenizer"""
-        return self._command_token_tokens
-
     @property
     def command_token_vocab(self):
         """dictionary mapping command tokens to ids for tokenizer"""
@@ -370,725 +296,179 @@ class Tokenizer(object):
     def _encode(self, text):
         raise NotImplementedError
 
+    def _decode(self, ids):
+        raise NotImplementedError
+
+    @staticmethod
+    def clean_up_tokenization(out_string: str) -> str:
+        return out_string
+
     def EncodeAsTokens(self, text, process_fn=None):
         """
         encode text as tokens using text tokenizer
         """
-        tokenization = self.text_tokenizer.EncodeAsTokens(text, process_fn=process_fn)
-        tokenization.set_command_tokens(self._command_tokens)
+        tokenization = self.EncodeAsIds(text, process_fn=process_fn)
+        tokenization.tokenization = [self.IdToToken(idx) for idx in tokenization.tokenization]
         return tokenization
 
-    def IdToToken(self, Id, type_token=False):
-        """convert Id to token accounting for command and type tokens"""
-        if isinstance(Id, (TypeToken, CommandToken)):
+    def IdToToken(self, Id):
+        """convert Id to token accounting for command tokens"""
+        if isinstance(Id, CommandToken):
             return Id.token
-        if type_token:
-            return self.type_id_map[Id].token
-        if Id < self.num_command_tokens:
-            return self.command_id_map[Id].token
-        return self.text_tokenizer.IdToToken(Id - self.num_command_tokens)
-
-    def TokenToId(self, token, type_token=False):
-        """convert token to Id accounting for command and type tokens"""
-        if isinstance(token, (TypeToken, CommandToken)):
+        return self.tokens[Id]
+
+    def TokenToId(self, token):
+        """convert token to Id accounting for command tokens"""
+        if isinstance(token, CommandToken):
             return token.Id
-        if type_token:
-            return self.type_token_map[token].Id
-        if token in self.command_token_map:
-            return self.command_token_map[token].Id
-        return self.text_tokenizer.TokenToId(token) + self.num_command_tokens
+        return self.vocab[token]
 
-    def DecodeIds(self, Ids, type_token=False):
+    def DecodeIds(self, Ids):
         """
-        convert Ids to tokens accounting for command and type tokens, tokens
+        convert Ids to tokens accounting for command tokens, tokens
         are joined and returned as a string.
         """
-        if type_token:
-            return ' '.join(Id.token if isinstance(Id, TypeToken) else self.type_id_map[Id].token for Id in Ids)
         rtn_strs = []
         current_str = []
         if isinstance(Ids, Tokenization):
             Ids = Ids.tokenization
         for Id in Ids:
             if isinstance(Id, CommandToken):
-                rtn_strs.append(self.text_tokenizer.DecodeIds(current_str))
+                rtn_strs.append(self._decode(current_str))
                 current_str = []
                 rtn_strs.append(Id.token)
-            elif Id < self.num_command_tokens:
-                rtn_strs.append(self.text_tokenizer.DecodeIds(current_str))
+            elif Id in self.command_id_map:
+                rtn_strs.append(self._decode(current_str))
                 current_str = []
                 rtn_strs.append(self.command_id_map[Id].token)
             else:
-                current_str.append(Id - self.num_command_tokens)
-        if current_str != []:
-            rtn_strs.append(self.text_tokenizer.DecodeIds(current_str))
-        return ' '.join(rtn_strs)
+                current_str.append(Id)
+        if current_str:
+            rtn_strs.append(self._decode(current_str))
+        if self.spaces_between_special_tokens:
+            output = ' '.join(rtn_strs)
+        else:
+            output = "".join(rtn_strs)
+        output = self.clean_up_tokenization(output)
+        return output
 
-    def DecodeTokens(self, Tokens, type_token=False):
+    def DecodeTokens(self, Tokens):
         """
         convert tokens to a string accounting for command and type tokens.
         """
-        if type_token:
-            return ' '.join(t.token if isinstance(t, TypeToken) else t for t in Tokens)
-        rtn_strs = []
-        current_str = []
-        if isinstance(Tokens, Tokenization):
-            Tokens = Tokens.tokenization
-        for t in Tokens:
-            if isinstance(t, CommandToken):
-                rtn_strs.append(self.text_tokenizer.DecodeTokens(current_str))
-                current_str = []
-                rtn_strs.append(t.token)
-            elif t in self.command_token_map:
-                rtn_strs.append(self.text_tokenizer.DecodeTokens(current_str))
-                current_str = []
-                rtn_strs.append(t)
-            else:
-                current_str.append(t)
-        if current_str != []:
-            rtn_strs.append(self.text_tokenizer.DecodeTokens(current_str))
-        return ' '.join(rtn_strs)
-
-
-class TextTokenizer(object):
-    """
-    Interface for text tokenizer
-    """
-
-    def __init__(self):
-        if not hasattr(self, 'num_text_tokens'):
-            self.num_text_tokens = 0
-        if not hasattr(self, 'num_tokens'):
-            self.num_tokens = self.num_text_tokens
-
-    def __call__(self, text, process_fn=None):
-        return self.EncodeAsIds(text, process_fn)
-
-    def __len__(self):
-        return self.num_text_tokens
-
-    @property
-    def tokens(self):
-        """list (or iterable) of text tokens for text tokenizer"""
-        raise NotImplementedError('TextTokenizer tokens property not implemented')
-
-    @property
-    def vocab(self):
-        """dictionary mapping tokens to ids"""
-        raise NotImplementedError('TextTokenizer vocab property not implemented')
-
-    @staticmethod
-    def exists(model_path):
-        """check if the filepath for a text tokenizer exists"""
-        raise NotImplementedError('TextTokenizer exists method not implemented')
-
-    def Train(self, corpus):
-        """train a tokenizer on a data corpus and save model for future use"""
-        raise NotImplementedError('TextTokenizer Train not implemented')
-
-    def EncodeAsIds(self, text, process_fn=None):
-        """
-        Preprocess text and encode as ids. Return a tokenization object with
-        original text, processed text, and id tokenization.
-        """
-        raise NotImplementedError('TextTokenizer EncodeAsIds not implemented')
-
-    def EncodeAsTokens(self, text, process_fn=None):
-        """
-        Preprocess text and encode as tokens. Return a tokenization object with
-        original text, processed text, and token tokenization.
-        """
-        raise NotImplementedError('TextTokenizer EncodeAsTokens not implemented')
-
-    def IdToToken(self, Id):
-        """Convert an Id to Token. Reverse lookup of self.vocab"""
-        raise NotImplementedError('TextTokenizer IdToToken not implemented')
-
-    def TokenToId(self, token):
-        """Convert a Token to Id. Lookup of self.vocab"""
-        raise NotImplementedError('TextTokenizer TokenToId not implemented')
-
-    def DecodeIds(self, Ids):
-        """Convert a list or tokenization object of Ids to a text string"""
-        raise NotImplementedError('TextTokenizer DecodeIds not implemented')
-
-    def DecodeTokens(self, Tokens):
-        """Convert a list or tokenization object of tokens to a text string"""
-        raise NotImplementedError('TextTokenizer DecodeTokens not implemented')
-
-
-class CharacterLevelTokenizer(TextTokenizer):
-    """
-    Text tokenizer for ASCII-256 Character Level Tokenization.
-    """
-
-    def __init__(self, **kwargs):
-        self.num_text_tokens = 256
-        super(CharacterLevelTokenizer, self).__init__()
-        self._tokens = [self.IdToToken(Id) for Id in range(self.num_text_tokens)]
-        self._vocab = {t: i for i, t in enumerate(self._tokens)}
-
-    def __len__(self):
-        return 256
-
-    @staticmethod
-    def exists(model_path):
-        return True
-
-    def Train(self, corpus):
-        pass
-
-    @property
-    def tokens(self):
-        return self._tokens
-
-    @property
-    def vocab(self):
-        return self._vocab
-
-    def EncodeAsIds(self, text, process_fn=None):
-        """convert text to ascii 256 Ids"""
-        processed_text = text
-        if process_fn is not None:
-            processed_text = process_fn(processed_text)
-            processed_text = str(processed_text)
-        tokens = [self.TokenToId(c) for c in processed_text]
-        return Tokenization(tokens, processed_text, text)
-
-    def EncodeAsTokens(self, text, process_fn=None):
-        """convert text to ascii 256 characters"""
-        processed_text = text
-        if process_fn is not None:
-            processed_text = process_fn(processed_text)
-        processed_text = str(processed_text)
-        tokens = [c for c in processed_text]
-        return Tokenization(tokens, processed_text, text, asIds=False)
-
-    def IdToToken(self, Id):
-        """ascii index to character"""
-        return chr(Id)
-
-    def TokenToId(self, token):
-        """ascii character to index"""
-        return ord(token)
-
-    def DecodeIds(self, Ids):
-        """converts ascii ids to tokens before joining them into text"""
-        if isinstance(Ids, Tokenization):
-            Ids = Ids.tokenization
-        return ''.join([self.IdToToken(tok) for tok in Ids])
-
-    def DecodeTokens(self, Tokens):
-        """just concatenates ascii tokens into text"""
-        if isinstance(Tokens, Tokenization):
-            Tokens = Tokens.tokenization
-        return ''.join(Tokens)
-
-
-MAX_SENTENCEPIECE_SENTENCES = 100000000
-
-
-def get_corpus_freq(dataset, filepath, filetype='tsv'):
-    """
-    Take corpus, split it into sentences, and extract word frequencies.
-    Write frequencies to `filepath` as a tsv. Only write the first
-    MAX_SENTENCEPIECE_SENTENCES most common words to the file.
-    """
-    nltk.download('punkt', download_dir="./nltk")
-    if filetype == 'tsv':
-        delimiter = '\t'
-    else:
-        delimiter = ','
-
-    print("compute corpus frequency\n", flush=True)
-
-    total_sentence_count = 0
-    maxlen = 0
-    freqs = {}
-    for entry in dataset:
-        if isinstance(entry, dict):
-            entry = entry['text']
-        lines = entry.strip().split('\n')
-        for line in lines:
-            sentences = nltk_tokenize.sent_tokenize(line)
-            total_sentence_count += len(sentences)
-            for sentence in sentences:
-                maxlen = max(len(line), maxlen)
-                for word in sentence.split():
-                    if word not in freqs:
-                        freqs[word] = 0
-                    freqs[word] += 1
-
-    print("length of freqs before truncating " + str(len(freqs)), flush=True)
-    print("file path for freq " + str(filepath), flush=True)
-
-    freqs_sorted = {}
-    counter = 0
-    for word, count in sorted(freqs.items(), key=lambda x: x[1], reverse=True):
-        if counter >= MAX_SENTENCEPIECE_SENTENCES:
-            break
-        counter += 1
-        freqs_sorted[word] = count
-
-    print("length of freqs after trancating " + str(len(freqs_sorted)), flush=True)
-
-    with open(filepath, 'w') as f:
-        writer = csv.writer(f, delimiter=delimiter)
-        for k, v in freqs_sorted.items():
-            writer.writerow([str(k), str(v)])
-
-    return total_sentence_count, maxlen
-
-
-class SentencePieceTokenizer(TextTokenizer):
-    """Trains and uses sentencepiece for text tokenization"""
-
-    def __init__(self, model_type='bpe', vocab_size=None, corpus=None, model_path=None, character_coverage=1.0,
-                 **kwargs):
-        self.character_coverage = character_coverage
-        self.model_type = model_type.lower()
-        self.spm_model = model_path
-        self.num_text_tokens = vocab_size
-        make_train = not SentencePieceTokenizer.exists(self.spm_model)
-        if make_train:
-            assert corpus is not None and self.num_text_tokens is not None
-            self.Train(corpus, self.num_text_tokens)
-        self._tokens = []
-        self._vocab = {}
-        self.load_spm_model()
-        super(SentencePieceTokenizer, self).__init__()
-
-    def __len__(self):
-        return self.num_text_tokens
-
-    @property
-    def tokens(self):
-        return self._tokens
-
-    @property
-    def vocab(self):
-        return self._vocab
-
-    @staticmethod
-    def exists(model_path):
-        if model_path is None:
-            return False
-        # check if path exists
-        dne = not os.path.exists(model_path)
-        # check if path.model exists
-        if dne and not model_path.endswith('.model'):
-            dne = not os.path.exists(model_path + '.model')
-        return not dne
-
-    def load_spm_model(self):
-        """load sentencepiece model and parse vocab"""
-        if not os.path.exists(self.spm_model) and not self.spm_model.endswith('.model'):
-            self.spm_model = self.spm_model + '.model'
-        self.sp = spm.SentencePieceProcessor()
-        self.sp.Load(self.spm_model)
-        self.vocab_size = self.num_text_tokens = len(self.sp)
-        self._tokens = [self.IdToToken(t) for t in range(self.vocab_size)]
-        self._vocab = {t: i for i, t in enumerate(self._tokens)}
-
-    def Train(self, corpus, num_text_tokens):
-        """train sentencepiece model on corpus using word frequencies"""
-        self.num_text_tokens = num_text_tokens
-        use_model_path = self.spm_model
-        random_hash = str(random.randint(0, 2147483647))
-        if use_model_path is None:
-            use_model_path = random_hash
-        if use_model_path.endswith('.model'):
-            use_model_path = use_model_path[:use_model_path.rfind('.model')]
-        input_path = use_model_path + '.tsv.' + random_hash
-        line_count, maxlenline = get_corpus_freq(corpus, input_path)
-        line_count = min(line_count, MAX_SENTENCEPIECE_SENTENCES)
-        print('line count used as input_sentence_size ', line_count, flush=True)
-        print('training sentencepiece model', flush=True)
-        train_string = '--input={file_path} --model_prefix={model_prefix} --vocab_size={vocab_size}' \
-                       + ' --model_type={model_type} --character_coverage={character_coverage} ' \
-                       + '--input_sentence_size={input_sentence_size} ' \
-                       + '--input_format=tsv'
-        train_string = train_string.format(file_path=input_path, model_prefix=use_model_path,
-                                           vocab_size=num_text_tokens,
-                                           model_type=self.model_type, character_coverage=self.character_coverage,
-                                           input_sentence_size=int(line_count))  # , #)#,
-        print("calling spm.SentencePieceTrainer.Train(%s)" % (train_string), flush=True)
-        spm.SentencePieceTrainer.Train(train_string)
-        os.remove(input_path)
-        self.spm_model = use_model_path + '.model'
-        print('sentencepiece model written to ' + self.spm_model, flush=True)
-
-    def EncodeAsIds(self, text, process_fn=None):
-        """convert text to sentencepiece Ids"""
-        processed_text = text
-        if process_fn is not None:
-            processed_text = process_fn(processed_text)
-        tokens = self.sp.EncodeAsIds(processed_text)
-        return Tokenization(tokens, processed_text, text)
-
-    def EncodeAsTokens(self, text, process_fn=None):
-        """convert text to sentencepiece tokens"""
-        processed_text = text
-        if process_fn is not None:
-            processed_text = process_fn(processed_text)
-        tokens = self.sp.EncodeAsTokens(processed_text)
-        return Tokenization(tokens, processed_text, text, asIds=False)
-
-    def IdToToken(self, Id):
-        """convert Id to sentencpiece token"""
-        return self.sp.IdToPiece(Id)
-
-    def TokenToId(self, token):
-        """convert sentencpiece token to Id"""
-        return self.sp.PieceToId(token)
-
-    def DecodeIds(self, Ids):
-        """converts ids to a text string"""
-        if isinstance(Ids, Tokenization):
-            Ids = Ids.tokenization
-        return self.sp.DecodeIds(Ids)
-
-    def DecodeTokens(self, Tokens):
-        """converts sentencepiece tokens to a text string"""
-        if isinstance(Tokens, Tokenization):
-            Tokens = Tokens.tokenization
-        return self.sp.DecodeTokens(Tokens)
+        Ids = [self.TokenToId(token) for token in Tokens]
+        return self.DecodeIds(Ids)
 
 
 class GPT2BPETokenizer(Tokenizer):
     def __init__(self, model_type_or_path, cache_dir=None, add_block_symbols=False, add_task_mask=False,
                  add_decoder_mask=False, **kwargs):
-        self.text_tokenizer = GPT2Tokenizer.from_pretrained(model_type_or_path,
-                                                            cache_dir=cache_dir)
+        text_tokenizer = GPT2Tokenizer.from_pretrained(model_type_or_path,
+                                                       cache_dir=cache_dir)
 
         # disable max len warnings by increasing max len
-        self.text_tokenizer.max_len = int(1e12)
-        self.num_tokens = len(self.text_tokenizer.encoder)
-        self.num_type_tokens = 2
+        text_tokenizer.max_len = int(1e12)
+        num_tokens = len(text_tokenizer.encoder)
         if model_type_or_path.startswith('roberta'):
-            self.num_command_tokens = 6
-            self.num_text_tokens = self.num_tokens - 3
-            self._command_tokens = [
-                CommandToken('pad', '<|endoftext|>', self.text_tokenizer.encoder['</s>']),
-                CommandToken('eos', '<|endoftext|>', self.text_tokenizer.encoder['</s>']),
-                CommandToken('sep', '[SEP]', self.text_tokenizer.encoder['<pad>']),
-                CommandToken('ENC', '[CLS]', self.text_tokenizer.encoder['<s>']),
-                CommandToken('MASK', '[MASK]', self.text_tokenizer.encoder['<mask>'], lstrip=True),
-                CommandToken('unk', '[UNK]', self.text_tokenizer.encoder['<unk>'])
+            command_tokens = [
+                CommandToken('pad', '<|endoftext|>', text_tokenizer.encoder['</s>']),
+                CommandToken('eos', '<|endoftext|>', text_tokenizer.encoder['</s>']),
+                CommandToken('sep', '[SEP]', text_tokenizer.encoder['<pad>']),
+                CommandToken('ENC', '[CLS]', text_tokenizer.encoder['<s>']),
+                CommandToken('MASK', '[MASK]', text_tokenizer.encoder['<mask>'], lstrip=True),
+                CommandToken('unk', '[UNK]', text_tokenizer.encoder['<unk>'])
             ]
             if add_block_symbols:
-                self._command_tokens.extend([
-                    CommandToken('sop', '<|startofpiece|>', self.num_tokens),
-                    CommandToken('eop', '<|endofpiece|>', self.num_tokens + 1)
+                command_tokens.extend([
+                    CommandToken('sop', '<|startofpiece|>', num_tokens),
+                    CommandToken('eop', '<|endofpiece|>', num_tokens + 1)
                 ])
-                self.num_tokens += 2
-                self.num_command_tokens += 2
+                num_tokens += 2
         else:
-            self.num_command_tokens = 2
-            self.num_text_tokens = self.num_tokens - 1
-            self._command_tokens = [
-                CommandToken('pad', '<|endoftext|>', self.text_tokenizer.encoder['<|endoftext|>']),
-                CommandToken('eos', '<|endoftext|>', self.text_tokenizer.encoder['<|endoftext|>'])
+            command_tokens = [
+                CommandToken('pad', '<|endoftext|>', text_tokenizer.encoder['<|endoftext|>']),
+                CommandToken('eos', '<|endoftext|>', text_tokenizer.encoder['<|endoftext|>'])
             ]
             if add_block_symbols:
-                self._command_tokens.extend([
-                    CommandToken('sop', '<|startofpiece|>', self.num_tokens),
-                    CommandToken('eop', '<|endofpiece|>', self.num_tokens + 1),
-                    CommandToken('ENC', '[CLS]', self.num_tokens + 2),
-                    CommandToken('MASK', '[MASK]', self.num_tokens + 3, lstrip=True),
-                    CommandToken('sep', '[SEP]', self.num_tokens + 4),
-                    CommandToken('unk', '[UNK]', self.num_tokens + 5)
+                command_tokens.extend([
+                    CommandToken('sop', '<|startofpiece|>', num_tokens),
+                    CommandToken('eop', '<|endofpiece|>', num_tokens + 1),
+                    CommandToken('ENC', '[CLS]', num_tokens + 2),
+                    CommandToken('MASK', '[MASK]', num_tokens + 3, lstrip=True),
+                    CommandToken('sep', '[SEP]', num_tokens + 4),
+                    CommandToken('unk', '[UNK]', num_tokens + 5)
                 ])
-                self.num_tokens += 6
-                self.num_command_tokens += 6
+                num_tokens += 6
         if add_block_symbols:
             if add_task_mask:
-                self._command_tokens.extend([
-                    CommandToken('gMASK', '[gMASK]', self.num_tokens, lstrip=True),
-                    CommandToken('sMASK', '[sMASK]', self.num_tokens + 1, lstrip=True)
+                command_tokens.extend([
+                    CommandToken('gMASK', '[gMASK]', num_tokens, lstrip=True),
+                    CommandToken('sMASK', '[sMASK]', num_tokens + 1, lstrip=True)
                 ])
-                self.num_tokens += 2
-                self.num_command_tokens += 2
+                num_tokens += 2
             if add_decoder_mask:
-                self._command_tokens.extend([
-                    CommandToken('dBLOCK', '[dBLOCK]', self.num_tokens)
+                command_tokens.extend([
+                    CommandToken('dBLOCK', '[dBLOCK]', num_tokens)
                 ])
-                self.num_tokens += 1
-                self.num_command_tokens += 1
-        self.command_name_map = {tok.name: tok for tok in self._command_tokens}
-        self.command_token_map = {tok.token: tok for tok in self._command_tokens}
-        self.command_id_map = {tok.Id: tok for tok in self._command_tokens}
-
-        self.type_tokens = [
-            TypeToken('str0', '<str0>', 0),
-            TypeToken('str1', '<str1>', 1),
-        ]
-        self.type_name_map = {tok.name: tok for tok in self.type_tokens}
-        self.type_token_map = {tok.token: tok for tok in self.type_tokens}
-        self.type_id_map = {tok.Id: tok for tok in self.type_tokens}
-
-        self._tokens = list(self.text_tokenizer.encoder.keys())
-        self._vocab = {k: v for k, v in self.text_tokenizer.encoder.items()}
-
-        self._text_tokens = list(self._tokens)
-        self._text_token_vocab = {k: v for k, v in self.text_tokenizer.encoder.items()}
-
-        self._command_token_tokens = list(self.command_token_map.keys())
-        self._command_token_vocab = {t: Id for Id, t in self.command_id_map.items()}
-
-        self._token_types = list(self.type_token_map.keys())
-        self._token_type_vocab = {t: Id for Id, t in self.type_id_map.items()}
-
-        for idx, tok in self.command_id_map.items():
-            self.text_tokenizer.decoder[idx] = tok.token
-
-    def EncodeAsIds(self, text, process_fn=None):
-        processed_text = text
-        if process_fn is not None:
-            processed_text = process_fn(processed_text)
-
-        def split_on_token(tok_extended: CommandToken, text):
-            result = []
-            tok = tok_extended.token
-            split_text = text.split(tok)
-            for i, sub_text in enumerate(split_text):
-                # CommandToken can control whitespace stripping around them.
-                # We use them for GPT2 and Roberta to have different behavior depending on the special token
-                # Cf. https://github.com/huggingface/transformers/pull/2778
-                # and https://github.com/huggingface/transformers/issues/3788
-                # Strip white spaces on the right
-                if tok_extended.rstrip and i > 0:
-                    # A bit counter-intuitive but we strip the left of the string
-                    # since tok_extended.rstrip means the special token is eating all white spaces on its right
-                    sub_text = sub_text.lstrip()
-                # Strip white spaces on the left
-                if tok_extended.lstrip and i < len(split_text) - 1:
-                    sub_text = sub_text.rstrip()  # Opposite here
-
-                if i == 0 and not sub_text:
-                    result.append(tok)
-                elif i == len(split_text) - 1:
-                    if sub_text:
-                        result.append(sub_text)
-                    else:
-                        pass
-                else:
-                    if sub_text:
-                        result.append(sub_text)
-                    result.append(tok)
-            return result
-
-        def split_on_tokens(tok_list, text):
-            if not text.strip():
-                return []
-            if not tok_list:
-                return self.text_tokenizer.encode(text)
-
-            tokenized_text = []
-            text_list = [text]
-            for tok in tok_list:
-                tokenized_text = []
-                for sub_text in text_list:
-                    if sub_text not in self._command_token_tokens:
-                        tokenized_text.extend(split_on_token(tok, sub_text))
-                    else:
-                        tokenized_text.append(sub_text)
-                text_list = tokenized_text
-
-            return list(
-                itertools.chain.from_iterable(
-                    (
-                        self.text_tokenizer.encode(token) if token not in self._command_token_tokens else [
-                            self.command_token_map[token].Id] for token in tokenized_text
-                    )
-                )
-            )
-
-        no_split_tokens = self._command_tokens
-        Ids = split_on_tokens(no_split_tokens, processed_text)
-        tokenization = Tokenization(Ids, processed_text, text)
-        tokenization.set_command_tokens(self._command_tokens)
-        return tokenization
+                num_tokens += 1
+        super().__init__(text_tokenizer, command_tokens=command_tokens)
 
     def _encode(self, text):
         return self.text_tokenizer.encode(text)
 
-    def EncodeAsTokens(self, text, process_fn=None):
-        processed_text = text
-        if process_fn is not None:
-            processed_text = process_fn(processed_text)
-        tokens = []
-        for token in re.findall(self.text_tokenizer.pat, processed_text):
-            token = ''.join(self.text_tokenizer.bye_encoder[b] for b in token.encode('utf-8'))
-            tokens.extend(bpe_token for bpe_token in self.text_tokenizer.bpe(token).split(' '))
-        tokenization = Tokenization(tokens, processed_text, text, asIds=False)
-        tokenization.set_command_tokens(self._command_tokens)
-        return tokenization
-
-    def DecodeAsTokens(self, Ids):
-        return [self.IdToToken(x) for x in Ids]
-
-    def IdToToken(self, Id, type_token=False):
-        if isinstance(Id, (TypeToken, CommandToken)):
-            return Id.token
-        if type_token:
-            return self.type_id_map[Id].token
-        if Id in self.command_id_map:
-            return self.command_id_map[Id].token
-        return self.text_tokenizer.decoder[Id]
-
-    def TokenToId(self, token, type_token=False):
-        if isinstance(token, (TypeToken, CommandToken)):
-            return token.Id
-        if type_token:
-            return self.type_token_map[token].Id
-        return self.text_tokenizer.encoder[token]
-
-    def DecodeIds(self, Ids, type_token=False):
-        if type_token:
-            return ' '.join(Id.token if isinstance(Id, TypeToken) else self.type_id_map[Id].token for Id in Ids)
-        if isinstance(Ids, Tokenization):
-            Ids = Ids.tokenization
-        return self.text_tokenizer.decode(Ids)
-
-    def DecodeTokens(self, Tokens, type_token=False):
-        if type_token:
-            return ' '.join(t.token if isinstance(t, TypeToken) else t for t in Tokens)
-        if isinstance(Tokens, Tokenization):
-            Tokens = Tokens.tokenization
-        return self.text_tokenizer.decode([self.TokenToId(tok) for tok in Tokens])
+    def _decode(self, ids):
+        return self.text_tokenizer.decode(ids)
 
 
 class ChineseSPTokenizer(Tokenizer):
     def __init__(self, model_type_or_path, add_block_symbols=False, add_task_mask=False, add_decoder_mask=False,
                  **kwargs):
-        self.text_tokenizer = sp_tokenizer.from_pretrained()
-
-        self.num_command_tokens = 0
-        self.num_text_tokens = self.text_tokenizer.sp.vocab_size()
-        self.num_tokens = self.num_text_tokens
-        self.num_type_tokens = 2
-
-        self._command_tokens = [
-            CommandToken('pad', '<|endoftext|>', self.num_text_tokens),
-            CommandToken('eos', '<|endoftext|>', self.num_text_tokens),
-            CommandToken('sep', '[SEP]', self.num_text_tokens + 1),
-            CommandToken('ENC', '[CLS]', self.num_text_tokens + 2),
-            CommandToken('MASK', '[MASK]', self.num_text_tokens + 3, lstrip=True),
-            CommandToken('unk', '[UNK]', self.num_text_tokens + 4)
+        text_tokenizer = SentencePieceTokenizer.from_pretrained(model_type_or_path)
+        num_tokens = len(text_tokenizer.tokens)
+
+        command_tokens = [
+            CommandToken('pad', '<|endoftext|>', num_tokens),
+            CommandToken('eos', '<|endoftext|>', num_tokens),
+            CommandToken('sep', '[SEP]', num_tokens + 1),
+            CommandToken('ENC', '[CLS]', num_tokens + 2),
+            CommandToken('MASK', '[MASK]', num_tokens + 3, lstrip=True),
+            CommandToken('unk', '[UNK]', num_tokens + 4)
         ]
-        self.num_tokens += 5
-        self.num_command_tokens += 6
+        num_tokens += 5
         if add_block_symbols:
-            self._command_tokens.extend([
-                CommandToken('sop', '<|startofpiece|>', self.num_tokens + 1),
-                CommandToken('eop', '<|endofpiece|>', self.num_tokens + 2)
+            command_tokens.extend([
+                CommandToken('sop', '<|startofpiece|>', num_tokens + 1),
+                CommandToken('eop', '<|endofpiece|>', num_tokens + 2)
             ])
             if model_type_or_path == 'glm-large':
-                self.num_tokens += 3
+                num_tokens += 3
             else:
-                self.num_tokens += 2
-            self.num_command_tokens += 2
+                num_tokens += 2
             if add_task_mask:
                 if model_type_or_path == 'glm-large':
-                    self._command_tokens.extend([
-                        CommandToken('sMASK', '[sMASK]', self.num_tokens, lstrip=True),
-                        CommandToken('gMASK', '[gMASK]', self.num_tokens + 1, lstrip=True)
+                    command_tokens.extend([
+                        CommandToken('sMASK', '[sMASK]', num_tokens, lstrip=True),
+                        CommandToken('gMASK', '[gMASK]', num_tokens + 1, lstrip=True)
                     ])
                 else:
-                    self._command_tokens.extend([
-                        CommandToken('gMASK', '[gMASK]', self.num_tokens, lstrip=True),
-                        CommandToken('sMASK', '[sMASK]', self.num_tokens + 1, lstrip=True)
+                    command_tokens.extend([
+                        CommandToken('gMASK', '[gMASK]', num_tokens, lstrip=True),
+                        CommandToken('sMASK', '[sMASK]', num_tokens + 1, lstrip=True)
                     ])
-                self.num_tokens += 2
-                self.num_command_tokens += 2
+                num_tokens += 2
             if add_decoder_mask:
-                self._command_tokens.extend([
-                    CommandToken('dBLOCK', '[dBLOCK]', self.num_tokens)
+                command_tokens.extend([
+                    CommandToken('dBLOCK', '[dBLOCK]', num_tokens)
                 ])
-                self.num_tokens += 1
-                self.num_command_tokens += 1
-        self.command_name_map = {tok.name: tok for tok in self._command_tokens}
-        self.command_token_map = {tok.token: tok for tok in self._command_tokens}
-        self.command_id_map = {tok.Id: tok for tok in self._command_tokens}
-        self.type_tokens = [
-            TypeToken('str0', '<str0>', 0),
-            TypeToken('str1', '<str1>', 1),
-        ]
-        self.type_name_map = {tok.name: tok for tok in self.type_tokens}
-        self.type_token_map = {tok.token: tok for tok in self.type_tokens}
-        self.type_id_map = {tok.Id: tok for tok in self.type_tokens}
-
-        # self._tokens = list(self.text_tokenizer.encoder.keys())
-        # self._vocab = {k:v for k,v in self.text_tokenizer.encoder.items()}
-        #
-        # self._text_tokens = list(self._tokens)
-        # self._text_token_vocab = {k:v for k,v in self.text_tokenizer.encoder.items()}
-
-        self._command_token_tokens = list(self.command_token_map.keys())
-        self._command_token_vocab = {t: Id for Id, t in self.command_id_map.items()}
-
-        self._token_types = list(self.type_token_map.keys())
-        self._token_type_vocab = {t: Id for Id, t in self.type_id_map.items()}
+                num_tokens += 1
+        super().__init__(text_tokenizer, command_tokens=command_tokens)
+        if model_type_or_path in ['glm-large', 'glm-10b']:
+            self.spaces_between_special_tokens = False
 
     def _encode(self, text):
         ids = self.text_tokenizer.encode(text)
         return ids
 
-    def EncodeAsTokens(self, text, process_fn=None):
-        processed_text = text
-        if process_fn is not None:
-            processed_text = process_fn(processed_text)
-        tokens = self.text_tokenizer.tokenize(processed_text)
-        tokenization = Tokenization(tokens, processed_text, text, asIds=False)
-        tokenization.set_command_tokens(self._command_tokens)
-        return tokenization
-        # return Tokenization(tokens, processed_text, text, asIds=False)
-
-    def IdToToken(self, Id, type_token=False):
-        if isinstance(Id, (TypeToken, CommandToken)):
-            return Id.token
-        if type_token:
-            return self.type_id_map[Id].token
-        if Id in self.command_id_map:
-            return self.command_id_map[Id].token
-        elif Id in self.type_id_map:
-            return self.type_id_map[Id].token
-        else:
-            return self.text_tokenizer.convert_id_to_token(int(Id))
-
-    def TokenToId(self, token, type_token=False):
-        if isinstance(token, (TypeToken, CommandToken)):
-            return token.Id
-        if type_token:
-            return self.type_token_map[token].Id
-        return self.text_tokenizer.convert_token_to_id(token)
-
-    def DecodeIds(self, Ids, type_token=False):
-        if type_token:
-            return ' '.join(Id.token if isinstance(Id, TypeToken) else self.type_id_map[Id].token for Id in Ids)
-        if isinstance(Ids, Tokenization):
-            Ids = Ids.tokenization
-        Ids = list(map(int, Ids))
-        pieces = []
-        last = 0
-        for i, token_id in enumerate(Ids):
-            if token_id in self.command_id_map:
-                pieces.append(Ids[last: i])
-                pieces.append(token_id)
-                last = i + 1
-        pieces.append(Ids[last:])
-        text = ""
-        for piece in pieces:
-            if isinstance(piece, int):
-                text += self.command_id_map[piece].token
-            elif piece:
-                text += self.text_tokenizer.decode(piece)
-        return text
-
-    def DecodeTokens(self, Tokens, type_token=False):
-        if type_token:
-            return ' '.join(t.token if isinstance(t, TypeToken) else t for t in Tokens)
-        if isinstance(Tokens, Tokenization):
-            Tokens = Tokens.tokenization
-        return self.text_tokenizer.decode([self.TokenToId(tok) for tok in Tokens])
+    def _decode(self, ids):
+        text = self.text_tokenizer.decode(ids)
+        return text
\ No newline at end of file
diff --git a/SwissArmyTransformer/tokenization/glm/tokenization_gpt2.py b/SwissArmyTransformer/tokenization/glm/tokenization_gpt2.py
index 46b2e6ca607a682f00e2095644d67c871efd4794..c263729a8dd719d2ee84d60a6ccd73027dc74d25 100644
--- a/SwissArmyTransformer/tokenization/glm/tokenization_gpt2.py
+++ b/SwissArmyTransformer/tokenization/glm/tokenization_gpt2.py
@@ -168,6 +168,14 @@ class GPT2Tokenizer(object):
         self.special_tokens_decoder = {}
         self.set_special_tokens(special_tokens)
 
+    @property
+    def tokens(self):
+        return self.decoder
+
+    @property
+    def vocab(self):
+        return self.encoder
+
     def __len__(self):
         return len(self.encoder) + len(self.special_tokens)
 
@@ -309,4 +317,4 @@ class GPT2Tokenizer(object):
                 writer.write(token + u'\n')
                 index += 1
 
-        return vocab_file, merge_file, special_tokens_file
+        return vocab_file, merge_file, special_tokens_file
\ No newline at end of file
diff --git a/SwissArmyTransformer/training/deepspeed_training.py b/SwissArmyTransformer/training/deepspeed_training.py
index bc8ea682180fd6c8ef134d89377ceb6c8efb3d39..b3dfc225b9fba4dd5a86263275d9bbaade29dc98 100644
--- a/SwissArmyTransformer/training/deepspeed_training.py
+++ b/SwissArmyTransformer/training/deepspeed_training.py
@@ -204,9 +204,9 @@ def get_params_for_weight_decay_optimization(module):
                  if p is not None and n == 'bias' and p.requires_grad])
 
     if len(weight_decay_params['params']) == 0:
-        return tuple(no_weight_decay_params)
+        return (no_weight_decay_params,)
     elif len(no_weight_decay_params['params']) == 0:
-        return tuple(weight_decay_params)
+        return (weight_decay_params,)
 
     return weight_decay_params, no_weight_decay_params