diff --git a/utils/train_utils.py b/utils/train_utils.py index 8a68f0c18b7e151d7c2b3b13de5627a87a8aa901..e41f503ea1ba7140f84b202a4ecd72c88ee0285b 100644 --- a/utils/train_utils.py +++ b/utils/train_utils.py @@ -199,7 +199,7 @@ def evaluation(model,train_config, eval_dataloader, local_rank, tokenizer): if train_config.enable_fsdp: batch[key] = batch[key].to(local_rank) else: - batch[key] = batch[key].to('cuda') + batch[key] = batch[key].to('cuda:0') # Ensure no gradients are computed for this scope to save memory with torch.no_grad(): # Forward pass and compute loss