Skip to content
Snippets Groups Projects
Commit 21ce004a authored by minkowski0125's avatar minkowski0125
Browse files

add base tokenizer

parent 5de0e2c2
No related branches found
No related tags found
No related merge requests found
import os
from .utils import *
class BaseTokenizer:
def __init__(self, **kwargs):
pass
def __call__(self, text, **kwargs):
"""run preprocessing and encode text as Ids"""
return self.EncodeAsIds(text, **kwargs)
def __len__(self):
"""total number of tokens"""
return self.num_tokens
def __repr__(self):
"""info interpretation for tokenizer"""
return "Base Tokenizer for SAT"
@property
def command_tokens(self):
"""get command tokens of the tokenizer"""
return None
@property
def num_tokens(self):
"""get total number of tokens"""
return 0
def from_pretrained(self, **kwargs):
"""load tokenizer params from pretrained"""
pass
def EncodeAsIds(self, text, **kwargs):
"""encode to ids by tokenizer"""
raise NotImplementedError
def EncodeAsTokens(self, text, **kwargs):
"""encode to tokens by tokenizer"""
raise NotImplementedError
def DecodeIds(self, ids, **kwargs):
"""decode ids to original form by tokenizer"""
raise NotImplementedError
def DecodeTokens(self, tokens, **kwargs):
"""decode tokens to original form by tokenizer"""
raise NotImplementedError
\ No newline at end of file
import torch
import numpy as np
import tensorflow as tf
def _is_list(x):
return isinstance(x, list)
def _is_numpy(x):
return isinstance(x, np.ndarray)
def _is_torch(x):
return isinstance(x, torch.Tensor)
def _is_tensorflow(x):
return isinstance(x, tf.Tensor)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment