* Fix float zeros format * 255 to integermodifyDataloader
else: | else: | ||||
img = torch.from_numpy(img).to(device) | img = torch.from_numpy(img).to(device) | ||||
img = img.half() if half else img.float() # uint8 to fp16/32 | img = img.half() if half else img.float() # uint8 to fp16/32 | ||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0 | |||||
img /= 255 # 0 - 255 to 0.0 - 1.0 | |||||
if len(img.shape) == 3: | if len(img.shape) == 3: | ||||
img = img[None] # expand for batch dim | img = img[None] # expand for batch dim | ||||
t2 = time_sync() | t2 = time_sync() |
model.train() # CoreML exports should be placed in model.train() mode | model.train() # CoreML exports should be placed in model.train() mode | ||||
ts = torch.jit.trace(model, im, strict=False) # TorchScript model | ts = torch.jit.trace(model, im, strict=False) # TorchScript model | ||||
ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255.0, bias=[0, 0, 0])]) | |||||
ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])]) | |||||
ct_model.save(f) | ct_model.save(f) | ||||
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') | LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') |
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad | x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad | ||||
x = np.stack(x, 0) if n > 1 else x[0][None] # stack | x = np.stack(x, 0) if n > 1 else x[0][None] # stack | ||||
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW | x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW | ||||
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 | |||||
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 | |||||
t.append(time_sync()) | t.append(time_sync()) | ||||
with amp.autocast(enabled=p.device.type != 'cpu'): | with amp.autocast(enabled=p.device.type != 'cpu'): | ||||
def __init__(self, imgs, pred, files, times=None, names=None, shape=None): | def __init__(self, imgs, pred, files, times=None, names=None, shape=None): | ||||
super().__init__() | super().__init__() | ||||
d = pred[0].device # device | d = pred[0].device # device | ||||
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1., 1.], device=d) for im in imgs] # normalizations | |||||
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations | |||||
self.imgs = imgs # list of images as numpy arrays | self.imgs = imgs # list of images as numpy arrays | ||||
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) | self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) | ||||
self.names = names # class names | self.names = names # class names |
self.weight = weight # apply weights boolean | self.weight = weight # apply weights boolean | ||||
self.iter = range(n - 1) # iter object | self.iter = range(n - 1) # iter object | ||||
if weight: | if weight: | ||||
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights | |||||
self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights | |||||
def forward(self, x): | def forward(self, x): | ||||
y = x[0] # no weight | y = x[0] # no weight |
self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv) | self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv) | ||||
def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c) | def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c) | ||||
# inputs = inputs / 255. # normalize 0-255 to 0-1 | |||||
# inputs = inputs / 255 # normalize 0-255 to 0-1 | |||||
return self.conv(tf.concat([inputs[:, ::2, ::2, :], | return self.conv(tf.concat([inputs[:, ::2, ::2, :], | ||||
inputs[:, 1::2, ::2, :], | inputs[:, 1::2, ::2, :], | ||||
inputs[:, ::2, 1::2, :], | inputs[:, ::2, 1::2, :], | ||||
if not self.training: # inference | if not self.training: # inference | ||||
y = tf.sigmoid(x[i]) | y = tf.sigmoid(x[i]) | ||||
xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy | |||||
xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy | |||||
wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] | ||||
# Normalize xywh to 0-1 to reduce calibration error | # Normalize xywh to 0-1 to reduce calibration error | ||||
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) | xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) | ||||
for n, (path, img, im0s, vid_cap, string) in enumerate(dataset): | for n, (path, img, im0s, vid_cap, string) in enumerate(dataset): | ||||
input = np.transpose(img, [1, 2, 0]) | input = np.transpose(img, [1, 2, 0]) | ||||
input = np.expand_dims(input, axis=0).astype(np.float32) | input = np.expand_dims(input, axis=0).astype(np.float32) | ||||
input /= 255.0 | |||||
input /= 255 | |||||
yield [input] | yield [input] | ||||
if n >= ncalib: | if n >= ncalib: | ||||
break | break |
y = x[i].sigmoid() | y = x[i].sigmoid() | ||||
if self.inplace: | if self.inplace: | ||||
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy | |||||
y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy | |||||
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh | ||||
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 | else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 | ||||
xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy | |||||
xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy | |||||
wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh | ||||
y = torch.cat((xy, wh, y[..., 4:]), -1) | y = torch.cat((xy, wh, y[..., 4:]), -1) | ||||
z.append(y.view(bs, -1, self.no)) | z.append(y.view(bs, -1, self.no)) |
# Model parameters | # Model parameters | ||||
nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) | nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) | ||||
hyp['box'] *= 3. / nl # scale to layers | |||||
hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers | |||||
hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers | |||||
hyp['box'] *= 3 / nl # scale to layers | |||||
hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers | |||||
hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers | |||||
hyp['label_smoothing'] = opt.label_smoothing | hyp['label_smoothing'] = opt.label_smoothing | ||||
model.nc = nc # attach number of classes to model | model.nc = nc # attach number of classes to model | ||||
model.hyp = hyp # attach hyperparameters to model | model.hyp = hyp # attach hyperparameters to model | ||||
optimizer.zero_grad() | optimizer.zero_grad() | ||||
for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- | for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- | ||||
ni = i + nb * epoch # number integrated batches (since train start) | ni = i + nb * epoch # number integrated batches (since train start) | ||||
imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 | |||||
imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 | |||||
# Warmup | # Warmup | ||||
if ni <= nw: | if ni <= nw: |
@staticmethod | @staticmethod | ||||
def forward(x): | def forward(x): | ||||
# return x * F.hardsigmoid(x) # for torchscript and CoreML | # return x * F.hardsigmoid(x) # for torchscript and CoreML | ||||
return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX | |||||
return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for torchscript, CoreML and ONNX | |||||
# Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- |
def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, | def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, | ||||
border=(0, 0)): | border=(0, 0)): | ||||
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) | |||||
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) | |||||
# targets = [cls, xyxy] | # targets = [cls, xyxy] | ||||
height = im.shape[0] + border[0] * 2 # shape(h,w,c) | height = im.shape[0] + border[0] * 2 # shape(h,w,c) |
def metric(k): # compute metric | def metric(k): # compute metric | ||||
r = wh[:, None] / k[None] | r = wh[:, None] / k[None] | ||||
x = torch.min(r, 1. / r).min(2)[0] # ratio metric | |||||
x = torch.min(r, 1 / r).min(2)[0] # ratio metric | |||||
best = x.max(1)[0] # best_x | best = x.max(1)[0] # best_x | ||||
aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold | |||||
bpr = (best > 1. / thr).float().mean() # best possible recall | |||||
aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold | |||||
bpr = (best > 1 / thr).float().mean() # best possible recall | |||||
return bpr, aat | return bpr, aat | ||||
anchors = m.anchors.clone() * m.stride.to(m.anchors.device).view(-1, 1, 1) # current anchors | anchors = m.anchors.clone() * m.stride.to(m.anchors.device).view(-1, 1, 1) # current anchors | ||||
""" | """ | ||||
from scipy.cluster.vq import kmeans | from scipy.cluster.vq import kmeans | ||||
thr = 1. / thr | |||||
thr = 1 / thr | |||||
prefix = colorstr('autoanchor: ') | prefix = colorstr('autoanchor: ') | ||||
def metric(k, wh): # compute metrics | def metric(k, wh): # compute metrics | ||||
r = wh[:, None] / k[None] | r = wh[:, None] / k[None] | ||||
x = torch.min(r, 1. / r).min(2)[0] # ratio metric | |||||
x = torch.min(r, 1 / r).min(2)[0] # ratio metric | |||||
# x = wh_iou(wh, torch.tensor(k)) # iou metric | # x = wh_iou(wh, torch.tensor(k)) # iou metric | ||||
return x, x.max(1)[0] # x, best_x | return x, x.max(1)[0] # x, best_x | ||||
n = len(shapes) // 4 | n = len(shapes) // 4 | ||||
img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] | img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] | ||||
ho = torch.tensor([[0., 0, 0, 1, 0, 0]]) | |||||
wo = torch.tensor([[0., 0, 1, 0, 0, 0]]) | |||||
s = torch.tensor([[1, 1, .5, .5, .5, .5]]) # scale | |||||
ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]]) | |||||
wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]]) | |||||
s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale | |||||
for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW | for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW | ||||
i *= 4 | i *= 4 | ||||
if random.random() < 0.5: | if random.random() < 0.5: | ||||
im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[ | |||||
im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', align_corners=False)[ | |||||
0].type(img[i].type()) | 0].type(img[i].type()) | ||||
l = label[i] | l = label[i] | ||||
else: | else: |
im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 | im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 | ||||
im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 | im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 | ||||
im /= 255.0 # 0 - 255 to 0.0 - 1.0 | |||||
im /= 255 # 0 - 255 to 0.0 - 1.0 | |||||
ims.append(im) | ims.append(im) | ||||
pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction | pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction |
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) | ||||
det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module | det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module | ||||
self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7 | |||||
self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 | |||||
self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index | self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index | ||||
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance | self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance | ||||
for k in 'na', 'nc', 'nl', 'anchors': | for k in 'na', 'nc', 'nl', 'anchors': | ||||
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets | ps = pi[b, a, gj, gi] # prediction subset corresponding to targets | ||||
# Regression | # Regression | ||||
pxy = ps[:, :2].sigmoid() * 2. - 0.5 | |||||
pxy = ps[:, :2].sigmoid() * 2 - 0.5 | |||||
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] | pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] | ||||
pbox = torch.cat((pxy, pwh), 1) # predicted box | pbox = torch.cat((pxy, pwh), 1) # predicted box | ||||
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) | iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) | ||||
if nt: | if nt: | ||||
# Matches | # Matches | ||||
r = t[:, :, 4:6] / anchors[:, None] # wh ratio | r = t[:, :, 4:6] / anchors[:, None] # wh ratio | ||||
j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare | |||||
j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare | |||||
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) | ||||
t = t[j] # filter | t = t[j] # filter | ||||
# Offsets | # Offsets | ||||
gxy = t[:, 2:4] # grid xy | gxy = t[:, 2:4] # grid xy | ||||
gxi = gain[[2, 3]] - gxy # inverse | gxi = gain[[2, 3]] - gxy # inverse | ||||
j, k = ((gxy % 1. < g) & (gxy > 1.)).T | |||||
l, m = ((gxi % 1. < g) & (gxi > 1.)).T | |||||
j, k = ((gxy % 1 < g) & (gxy > 1)).T | |||||
l, m = ((gxi % 1 < g) & (gxi > 1)).T | |||||
j = torch.stack((torch.ones_like(j), j, k, l, m)) | j = torch.stack((torch.ones_like(j), j, k, l, m)) | ||||
t = t.repeat((5, 1, 1))[j] | t = t.repeat((5, 1, 1))[j] | ||||
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] |
if isinstance(targets, torch.Tensor): | if isinstance(targets, torch.Tensor): | ||||
targets = targets.cpu().numpy() | targets = targets.cpu().numpy() | ||||
if np.max(images[0]) <= 1: | if np.max(images[0]) <= 1: | ||||
images *= 255.0 # de-normalise (optional) | |||||
images *= 255 # de-normalise (optional) | |||||
bs, _, h, w = images.shape # batch size, _, height, width | bs, _, h, w = images.shape # batch size, _, height, width | ||||
bs = min(bs, max_subplots) # limit plot images | bs = min(bs, max_subplots) # limit plot images | ||||
ns = np.ceil(bs ** 0.5) # number of subplots (square) | ns = np.ceil(bs ** 0.5) # number of subplots (square) |
for m in ops if isinstance(ops, list) else [ops]: | for m in ops if isinstance(ops, list) else [ops]: | ||||
m = m.to(device) if hasattr(m, 'to') else m # device | m = m.to(device) if hasattr(m, 'to') else m # device | ||||
m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m | m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m | ||||
tf, tb, t = 0., 0., [0., 0., 0.] # dt forward, backward | |||||
tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward | |||||
try: | try: | ||||
flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs | flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs | ||||
except: | except: | ||||
def sparsity(model): | def sparsity(model): | ||||
# Return global model sparsity | # Return global model sparsity | ||||
a, b = 0., 0. | |||||
a, b = 0, 0 | |||||
for p in model.parameters(): | for p in model.parameters(): | ||||
a += p.numel() | a += p.numel() | ||||
b += (p == 0).sum() | b += (p == 0).sum() | ||||
for k, v in self.ema.state_dict().items(): | for k, v in self.ema.state_dict().items(): | ||||
if v.dtype.is_floating_point: | if v.dtype.is_floating_point: | ||||
v *= d | v *= d | ||||
v += (1. - d) * msd[k].detach() | |||||
v += (1 - d) * msd[k].detach() | |||||
def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): | ||||
# Update EMA attributes | # Update EMA attributes |
t1 = time_sync() | t1 = time_sync() | ||||
img = img.to(device, non_blocking=True) | img = img.to(device, non_blocking=True) | ||||
img = img.half() if half else img.float() # uint8 to fp16/32 | img = img.half() if half else img.float() # uint8 to fp16/32 | ||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0 | |||||
img /= 255 # 0 - 255 to 0.0 - 1.0 | |||||
targets = targets.to(device) | targets = targets.to(device) | ||||
nb, _, height, width = img.shape # batch size, channels, height, width | nb, _, height, width = img.shape # batch size, channels, height, width | ||||
t2 = time_sync() | t2 = time_sync() |