|
|
@@ -252,9 +252,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary |
|
|
|
|
|
|
|
# DDP mode |
|
|
|
if cuda and RANK != -1: |
|
|
|
model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, |
|
|
|
# nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698 |
|
|
|
find_unused_parameters=any(isinstance(layer, nn.MultiheadAttention) for layer in model.modules())) |
|
|
|
model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) |
|
|
|
|
|
|
|
# Model parameters |
|
|
|
hyp['box'] *= 3. / nl # scale to layers |