torch.cuda.amp bug fix (#2750)

PR https://github.com/ultralytics/yolov5/pull/2725 introduced a very specific bug that only affects multi-GPU trainings. Apparently the cause was using the torch.cuda.amp decorator in the autoShape forward method. I've implemented amp more traditionally in this PR, and the bug is resolved.
2021-04-09 18:19:49 +02:00 · 2021-04-09 18:19:49 +02:00 · b5de52c4cd
parent fca5e2a48f
commit b5de52c4cd
1 changed files with 13 additions and 11 deletions
--- a/models/common.py
+++ b/models/common.py
@ -10,6 +10,7 @@ import requests
 import torch
 import torch.nn as nn
 from PIL import Image
+from torch.cuda import amp

 from utils.datasets import letterbox
 from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh
@ -237,7 +238,6 @@ class autoShape(nn.Module):
        return self

    @torch.no_grad()
-    @torch.cuda.amp.autocast(torch.cuda.is_available())
    def forward(self, imgs, size=640, augment=False, profile=False):
        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
        #   filename:   imgs = 'data/samples/zidane.jpg'
@ -251,6 +251,7 @@ class autoShape(nn.Module):
        t = [time_synchronized()]
        p = next(self.model.parameters())  # for device and type
        if isinstance(imgs, torch.Tensor):  # torch
+            with amp.autocast(enabled=p.device.type != 'cpu'):
                return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference

        # Pre-process
@ -278,6 +279,7 @@ class autoShape(nn.Module):
        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
        t.append(time_synchronized())

+        with amp.autocast(enabled=p.device.type != 'cpu'):
            # Inference
            y = self.model(x, augment, profile)[0]  # forward
            t.append(time_synchronized())