Skip to content

Commit 6cbcd02

Browse files
committed
fix: regression in torch -> always initialize gloo backend as well
1 parent f2c6cd1 commit 6cbcd02

1 file changed

Lines changed: 5 additions & 5 deletions

File tree

dmlcloud/core/distributed.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -538,15 +538,15 @@ def init(kind='auto'):
538538
raise ValueError(f"Invalid kind: {kind}. Must be one of 'auto', 'dummy', 'slurm', 'mpi', 'env'")
539539

540540
if kind == 'auto':
541-
_init_process_group_auto()
541+
_init_process_group_auto(backend='cpu:gloo,cuda:nccl')
542542
elif kind == 'dummy':
543-
_init_process_group_dummy()
543+
_init_process_group_dummy(backend='cpu:gloo,cuda:nccl')
544544
elif kind == 'slurm':
545-
_init_process_group_slurm()
545+
_init_process_group_slurm(backend='cpu:gloo,cuda:nccl')
546546
elif kind == 'mpi':
547-
_init_process_group_MPI()
547+
_init_process_group_MPI(backend='cpu:gloo,cuda:nccl')
548548
elif kind == 'env':
549-
_init_process_group_env()
549+
_init_process_group_env(backend='cpu:gloo,cuda:nccl')
550550

551551
atexit.register(deinitialize_torch_distributed, fail_silently=True)
552552

0 commit comments

Comments
 (0)