diff --git a/detect.py b/detect.py index 29904f3..ecf868b 100644 --- a/detect.py +++ b/detect.py @@ -97,8 +97,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) vid_path, vid_writer = [None] * bs, [None] * bs # Run inference - if pt and device.type != 'cpu': - model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # warmup + model.warmup(imgsz=(1, 3, *imgsz), half=half) # warmup dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() diff --git a/models/common.py b/models/common.py index 284f03e..7254980 100644 --- a/models/common.py +++ b/models/common.py @@ -421,6 +421,13 @@ class DetectMultiBackend(nn.Module): y = torch.tensor(y) if isinstance(y, np.ndarray) else y return (y, []) if val else y + def warmup(self, imgsz=(1, 3, 640, 640), half=False): + # Warmup model by running inference once + if self.pt or self.engine or self.onnx: # warmup types + if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models + im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float) # input image + self.forward(im) # warmup + class AutoShape(nn.Module): # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS diff --git a/val.py b/val.py index 165cab1..bd0ce9a 100644 --- a/val.py +++ b/val.py @@ -149,8 +149,7 @@ def run(data, # Dataloader if not training: - if pt and device.type != 'cpu': - model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # warmup + model.warmup(imgsz=(1, 3, imgsz, imgsz), half=half) # warmup pad = 0.0 if task == 'speed' else 0.5 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=pt,