Skip to content
Snippets Groups Projects
Unverified Commit d6ae2031 authored by celestinoalan's avatar celestinoalan Committed by GitHub
Browse files

Fix fine-tuning training loss accumulation (#725)

parent b9ec61a3
No related branches found
No related tags found
No related merge requests found
......@@ -151,11 +151,11 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche
batch[key] = batch[key].to('cuda:0')
with autocast():
loss = model(**batch).loss
total_loss += loss.detach().float()
loss = loss / gradient_accumulation_steps
if train_config.save_metrics:
train_step_loss.append(loss.detach().float().item())
train_step_perplexity.append(float(torch.exp(loss.detach().float())))
total_loss += loss.detach().float()
if train_config.use_fp16:
# if fp16 is enabled, use gradient scaler to handle gradient update
scaler.scale(loss).backward()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment