diff --git a/SwissArmyTransformer/tokenization/base_tokenizer.py b/SwissArmyTransformer/tokenization/base_tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8264e389cf648da61ba262024aaa51fa1fc922ba
--- /dev/null
+++ b/SwissArmyTransformer/tokenization/base_tokenizer.py
@@ -0,0 +1,53 @@
+import os
+from .utils import *
+
+class BaseTokenizer:
+    def __init__(self, **kwargs):
+        pass
+    
+    def __call__(self, text, **kwargs):
+        """run preprocessing and encode text as Ids"""
+        return self.EncodeAsIds(text, **kwargs)
+    
+    def __len__(self):
+        """total number of tokens"""
+        return self.num_tokens
+
+    def __repr__(self):
+        """info interpretation for tokenizer"""
+        return "Base Tokenizer for SAT"
+    
+    @property
+    def command_tokens(self):
+        """get command tokens of the tokenizer"""
+        return None
+    
+    @property
+    def num_tokens(self):
+        """get total number of tokens"""
+        return 0
+    
+    def from_pretrained(self, **kwargs):
+        """load tokenizer params from pretrained"""
+        pass
+    
+    def EncodeAsIds(self, text, **kwargs):
+        """encode to ids by tokenizer"""
+        raise NotImplementedError
+    
+    def EncodeAsTokens(self, text, **kwargs):
+        """encode to tokens by tokenizer"""
+        raise NotImplementedError
+        
+    def DecodeIds(self, ids, **kwargs):
+        """decode ids to original form by tokenizer"""
+        raise NotImplementedError
+    
+    def DecodeTokens(self, tokens, **kwargs):
+        """decode tokens to original form by tokenizer"""
+        raise NotImplementedError
+
+    
+        
+        
+    
\ No newline at end of file
diff --git a/SwissArmyTransformer/tokenization/utils.py b/SwissArmyTransformer/tokenization/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..085b5ed41fff9bf184d10509104722c6d4d32a56
--- /dev/null
+++ b/SwissArmyTransformer/tokenization/utils.py
@@ -0,0 +1,15 @@
+import torch
+import numpy as np
+import tensorflow as tf
+
+def _is_list(x):
+    return isinstance(x, list)
+
+def _is_numpy(x):
+    return isinstance(x, np.ndarray)
+
+def _is_torch(x):
+    return isinstance(x, torch.Tensor)
+
+def _is_tensorflow(x):
+    return isinstance(x, tf.Tensor)
\ No newline at end of file