|
|
@@ -333,7 +333,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary |
|
|
|
mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) |
|
|
|
pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % ( |
|
|
|
f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) |
|
|
|
callbacks.on_train_batch_end(ni, model, imgs, targets, paths, plots) |
|
|
|
callbacks.on_train_batch_end(ni, model, imgs, targets, paths, plots, opt.sync_bn) |
|
|
|
# end batch ------------------------------------------------------------------------------------------------ |
|
|
|
|
|
|
|
# Scheduler |
|
|
@@ -499,7 +499,6 @@ def main(opt): |
|
|
|
assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count' |
|
|
|
assert not opt.image_weights, '--image-weights argument is not compatible with DDP training' |
|
|
|
assert not opt.evolve, '--evolve argument is not compatible with DDP training' |
|
|
|
assert not opt.sync_bn, '--sync-bn known training issue, see https://github.com/ultralytics/yolov5/issues/3998' |
|
|
|
torch.cuda.set_device(LOCAL_RANK) |
|
|
|
device = torch.device('cuda', LOCAL_RANK) |
|
|
|
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") |