@@ -493,7 +493,7 @@ def main(opt): | |||
assert not opt.sync_bn, '--sync-bn known training issue, see https://github.com/ultralytics/yolov5/issues/3998' | |||
torch.cuda.set_device(LOCAL_RANK) | |||
device = torch.device('cuda', LOCAL_RANK) | |||
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo", timeout=timedelta(seconds=60)) | |||
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") | |||
# Train | |||
if not opt.evolve: |
@@ -35,10 +35,10 @@ def torch_distributed_zero_first(local_rank: int): | |||
Decorator to make all processes in distributed training wait for each local_master to do something. | |||
""" | |||
if local_rank not in [-1, 0]: | |||
dist.barrier() | |||
dist.barrier(device_ids=[local_rank]) | |||
yield | |||
if local_rank == 0: | |||
dist.barrier() | |||
dist.barrier(device_ids=[0]) | |||
def init_torch_seeds(seed=0): |