From 707af7ea24f20b7b541168ba86c230d6b28886bf Mon Sep 17 00:00:00 2001 From: Hamid Shojanazeri <hamid.nazeri2010@gmail.com> Date: Tue, 25 Jul 2023 06:26:37 +0000 Subject: [PATCH] adding cuda:0 for non-fsdp situations --- utils/train_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/train_utils.py b/utils/train_utils.py index 7366e3f3..8a68f0c1 100644 --- a/utils/train_utils.py +++ b/utils/train_utils.py @@ -84,7 +84,7 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche if train_config.enable_fsdp: batch[key] = batch[key].to(local_rank) else: - batch[key] = batch[key].to('cuda') + batch[key] = batch[key].to('cuda:0') outputs = model(**batch) loss = outputs.loss loss = loss / gradient_accumulation_steps -- GitLab