From 113ea18bf1c8ddf6491c330db4da0d3b4427d9ad Mon Sep 17 00:00:00 2001 From: Matthias Reso <13337103+mreso@users.noreply.github.com> Date: Fri, 12 Apr 2024 10:46:15 -0700 Subject: [PATCH] Replace LlamaTokenizer with AutoTokenizer --- recipes/inference/local_inference/inference.py | 10 +++++----- src/llama_recipes/finetuning.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/recipes/inference/local_inference/inference.py b/recipes/inference/local_inference/inference.py index 4f83c8f2..194adcf0 100644 --- a/recipes/inference/local_inference/inference.py +++ b/recipes/inference/local_inference/inference.py @@ -10,7 +10,7 @@ import time import gradio as gr import torch -from transformers import LlamaTokenizer +from transformers import AutoTokenizer from llama_recipes.inference.safety_utils import get_safety_checker, AgentType from llama_recipes.inference.model_utils import load_model, load_peft_model @@ -69,17 +69,17 @@ def main( else: torch.cuda.manual_seed(seed) torch.manual_seed(seed) - + model = load_model(model_name, quantization, use_fast_kernels) if peft_model: model = load_peft_model(model, peft_model) model.eval() - - tokenizer = LlamaTokenizer.from_pretrained(model_name) + + tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.pad_token = tokenizer.eos_token - + batch = tokenizer(user_prompt, padding='max_length', truncation=True, max_length=max_padding_length, return_tensors="pt") if is_xpu_available(): batch = {k: v.to("xpu") for k, v in batch.items()} diff --git a/src/llama_recipes/finetuning.py b/src/llama_recipes/finetuning.py index d2768577..f7b3a2ca 100644 --- a/src/llama_recipes/finetuning.py +++ b/src/llama_recipes/finetuning.py @@ -18,8 +18,8 @@ from torch.distributed.fsdp import ( from torch.distributed.fsdp.fully_sharded_data_parallel import CPUOffload from torch.optim.lr_scheduler import StepLR from transformers import ( + AutoTokenizer, LlamaForCausalLM, - LlamaTokenizer, LlamaConfig, ) from transformers.models.llama.modeling_llama import LlamaDecoderLayer @@ -137,7 +137,7 @@ def main(**kwargs): ) # Load the tokenizer and add special tokens - tokenizer = LlamaTokenizer.from_pretrained(train_config.model_name) + tokenizer = AutoTokenizer.from_pretrained(train_config.model_name) tokenizer.pad_token_id = tokenizer.eos_token_id print_model_size(model, train_config, rank if train_config.enable_fsdp else 0) -- GitLab